FST  openfst-1.8.3
OpenFst Library
mapped-file.cc
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 
19 #include <fst/mapped-file.h>
20 
21 #include <fcntl.h>
22 
23 #include <cstddef>
24 #include <cstdint>
25 
26 #ifdef _WIN32
27 #include <io.h> // for _get_osfhandle, _open
28 #include <memoryapi.h> // for CreateFileMapping, UnmapViewOfFile
29 #include <windows.h>
30 #else
31 #include <sys/mman.h>
32 #include <unistd.h>
33 #endif // _WIN32
34 
35 #include <algorithm>
36 #include <cerrno>
37 #include <cstring>
38 #include <ios>
39 #include <istream>
40 #include <memory>
41 #include <string>
42 
43 #include <fst/log.h>
44 
45 namespace fst {
46 
47 #ifdef _WIN32
48 namespace {
49 static constexpr DWORD DWORD_MAX = std::numeric_limits<DWORD>::max();
50 } // namespace
51 #endif // _WIN32
52 
53 MappedFile::MappedFile(const MemoryRegion &region) : region_(region) {}
54 
56  if (region_.size != 0) {
57  if (region_.mmap) {
58  VLOG(2) << "munmap'ed " << region_.size << " bytes at " << region_.mmap;
59 #ifdef _WIN32
60  if (UnmapViewOfFile(region_.mmap) != 0) {
61  LOG(ERROR) << "Failed to unmap region: " << GetLastError();
62  }
63  CloseHandle(region_.file_mapping);
64 #else
65  if (munmap(region_.mmap, region_.size) != 0) {
66  LOG(ERROR) << "Failed to unmap region: " << strerror(errno);
67  }
68 #endif
69  } else {
70  if (region_.data) {
71  operator delete(static_cast<char *>(region_.data) - region_.offset);
72  }
73  }
74  }
75 }
76 
77 MappedFile * MappedFile::Map(std::istream &istrm,
78  bool memorymap,
79  const std::string &source,
80  size_t size) {
81  const auto spos = istrm.tellg();
82  VLOG(2) << "memorymap: " << (memorymap ? "true" : "false") << " source: \""
83  << source << "\""
84  << " size: " << size << " offset: " << spos;
85  if (memorymap && spos >= 0 && spos % kArchAlignment == 0) {
86  const size_t pos = static_cast<size_t>(spos);
87 #ifdef _WIN32
88  const int fd = _open(source.c_str(), _O_RDONLY);
89 #else
90  const int fd = open(source.c_str(), O_RDONLY);
91 #endif
92  if (fd != -1) {
93  std::unique_ptr<MappedFile> mmf(MapFromFileDescriptor(fd, pos, size));
94  if (close(fd) == 0 && mmf != nullptr) {
95  istrm.seekg(pos + size, std::ios::beg);
96  if (istrm) {
97  VLOG(2) << "mmap'ed region of " << size << " at offset " << pos
98  << " from " << source << " to addr " << mmf->region_.mmap;
99  return mmf.release();
100  }
101  } else {
102  LOG(WARNING) << "Mapping of file failed: " << strerror(errno);
103  }
104  }
105  }
106 
107  // If all else fails, reads from the file into the allocated buffer.
108  if (memorymap) {
109  LOG(WARNING) << "File mapping at offset " << spos << " of file " << source
110  << " could not be honored, reading instead";
111  }
112  // Reads the file into the buffer in chunks not larger than kMaxReadChunk.
113  std::unique_ptr<MappedFile> mf(Allocate(size));
114  auto *buffer = static_cast<char *>(mf->mutable_data());
115  while (size > 0) {
116  const auto next_size = std::min(size, kMaxReadChunk);
117  const auto current_pos = istrm.tellg();
118  if (!istrm.read(buffer, next_size)) {
119  LOG(ERROR) << "Failed to read " << next_size << " bytes at offset "
120  << current_pos << "from \"" << source << "\"";
121  return nullptr;
122  }
123  size -= next_size;
124  buffer += next_size;
125  VLOG(2) << "Read " << next_size << " bytes. " << size << " remaining";
126  }
127  return mf.release();
128 }
129 
131  size_t pos,
132  size_t size) {
133 #ifdef _WIN32
134  SYSTEM_INFO sysInfo;
135  GetSystemInfo(&sysInfo);
136  const DWORD pagesize = sysInfo.dwAllocationGranularity;
137 #else
138  const int pagesize = sysconf(_SC_PAGESIZE);
139 #endif // _WIN32
140 
141  const size_t offset = pos % pagesize;
142  const size_t offset_pos = pos - offset;
143  const size_t upsize = size + offset;
144 
145 #ifdef _WIN32
146  if (fd == -1) {
147  LOG(ERROR) << "Invalid file descriptor fd=" << fd;
148  return nullptr;
149  }
150  HANDLE file = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
151  if (file == INVALID_HANDLE_VALUE) {
152  LOG(ERROR) << "Invalid file descriptor fd=" << fd;
153  return nullptr;
154  }
155  const DWORD max_size_hi =
156  sizeof(size_t) > sizeof(DWORD) ? upsize >> (CHAR_BIT * sizeof(DWORD)) : 0;
157  const DWORD max_size_lo = upsize & DWORD_MAX;
158  HANDLE file_mapping = CreateFileMappingA(file, nullptr, PAGE_READONLY,
159  max_size_hi, max_size_lo, nullptr);
160  if (file_mapping == INVALID_HANDLE_VALUE) {
161  LOG(ERROR) << "Can't create mapping for fd=" << fd << " size=" << upsize
162  << ": " << GetLastError();
163  return nullptr;
164  }
165 
166  const DWORD offset_pos_hi =
167  sizeof(size_t) > sizeof(DWORD) ? offset_pos >> (CHAR_BIT * sizeof(DWORD))
168  : 0;
169  const DWORD offset_pos_lo = offset_pos & DWORD_MAX;
170  void *map = MapViewOfFile(file_mapping, FILE_MAP_READ,
171  offset_pos_hi, offset_pos_lo, upsize);
172  if (!map) {
173  LOG(ERROR) << "mmap failed for fd=" << fd << " size=" << upsize
174  << " offset=" << offset_pos << ": " << GetLastError();
175  CloseHandle(file_mapping);
176  return nullptr;
177  }
178 #else
179  void *map = mmap(nullptr, upsize, PROT_READ, MAP_SHARED, fd, offset_pos);
180  if (map == MAP_FAILED) {
181  LOG(ERROR) << "mmap failed for fd=" << fd << " size=" << upsize
182  << " offset=" << offset_pos;
183  return nullptr;
184  }
185 #endif
186  MemoryRegion region;
187  region.mmap = map;
188  region.size = upsize;
189  region.data = static_cast<void *>(static_cast<char *>(map) + offset);
190  region.offset = offset;
191 #ifdef _WIN32
192  region.file_mapping = file_mapping;
193 #endif // _WIN32
194  return new MappedFile(region);
195 }
196 
197 MappedFile *MappedFile::Allocate(size_t size, size_t align) {
198  MemoryRegion region;
199  region.data = nullptr;
200  region.offset = 0;
201  if (size > 0) {
202  // TODO(jrosenstock,sorenj): Use std::align() when that is no longer banned.
203  // Use std::aligned_alloc() when C++17 is allowed.
204  char *buffer = static_cast<char *>(operator new(size + align));
205  uintptr_t address = reinterpret_cast<uintptr_t>(buffer);
206  region.offset = align - (address % align);
207  region.data = buffer + region.offset;
208  }
209  region.mmap = nullptr;
210  region.size = size;
211  return new MappedFile(region);
212 }
213 
215  MemoryRegion region;
216  region.data = data;
217  region.mmap = data;
218  region.size = 0;
219  region.offset = 0;
220  return new MappedFile(region);
221 }
222 
223 } // namespace fst
std::string source
Definition: fst.h:73
static MappedFile * Map(std::istream &istrm, bool memorymap, const std::string &source, size_t size)
Definition: mapped-file.cc:77
#define LOG(type)
Definition: log.h:53
static MappedFile * Allocate(size_t size, size_t align=kArchAlignment)
Definition: mapped-file.cc:197
#define VLOG(level)
Definition: log.h:54
static MappedFile * Borrow(void *data)
Definition: mapped-file.cc:214
static MappedFile * MapFromFileDescriptor(int fd, size_t pos, size_t size)
Definition: mapped-file.cc:130