FST  openfst-1.8.4
OpenFst Library
mapped-file.cc
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 
19 #include <fst/mapped-file.h>
20 
21 #include <fcntl.h>
22 
23 #include <cstddef>
24 #include <cstdint>
25 #include <new>
26 
27 #ifdef _WIN32
28 #include <io.h> // for _get_osfhandle, _open
29 #include <memoryapi.h> // for CreateFileMapping, UnmapViewOfFile
30 #include <windows.h>
31 #else
32 #include <sys/mman.h>
33 #include <unistd.h>
34 #endif // _WIN32
35 
36 #include <algorithm>
37 #include <cerrno>
38 #include <cstring>
39 #include <ios>
40 #include <istream>
41 #include <memory>
42 #include <string>
43 
44 #include <fst/log.h>
45 
46 namespace fst {
47 
48 #ifdef _WIN32
49 namespace {
50 static constexpr DWORD DWORD_MAX = std::numeric_limits<DWORD>::max();
51 } // namespace
52 #endif // _WIN32
53 
54 MappedFile::MappedFile(const MemoryRegion &region) : region_(region) {}
55 
57  if (region_.size != 0) {
58  if (region_.mmap) {
59  VLOG(2) << "munmap'ed " << region_.size << " bytes at " << region_.mmap;
60 #ifdef _WIN32
61  if (UnmapViewOfFile(region_.mmap) != 0) {
62  LOG(ERROR) << "Failed to unmap region: " << GetLastError();
63  }
64  CloseHandle(region_.file_mapping);
65 #else
66  if (munmap(region_.mmap, region_.size) != 0) {
67  LOG(ERROR) << "Failed to unmap region: " << strerror(errno);
68  }
69 #endif
70  } else {
71  if (region_.data) {
72  operator delete(region_.data, region_.size,
73  std::align_val_t{region_.offset});
74  }
75  }
76  }
77 }
78 
79 MappedFile * MappedFile::Map(std::istream &istrm,
80  bool memorymap,
81  const std::string &source,
82  size_t size) {
83  const auto spos = istrm.tellg();
84  VLOG(2) << "memorymap: " << (memorymap ? "true" : "false") << " source: \""
85  << source << "\""
86  << " size: " << size << " offset: " << spos;
87  if (memorymap && spos >= 0 && spos % kArchAlignment == 0) {
88  const size_t pos = static_cast<size_t>(spos);
89 #ifdef _WIN32
90  const int fd = _open(source.c_str(), _O_RDONLY);
91 #else
92  const int fd = open(source.c_str(), O_RDONLY);
93 #endif
94  if (fd != -1) {
95  std::unique_ptr<MappedFile> mmf(MapFromFileDescriptor(fd, pos, size));
96  if (close(fd) == 0 && mmf != nullptr) {
97  istrm.seekg(pos + size, std::ios::beg);
98  if (istrm) {
99  VLOG(2) << "mmap'ed region of " << size << " at offset " << pos
100  << " from " << source << " to addr " << mmf->region_.mmap;
101  return mmf.release();
102  }
103  } else {
104  LOG(WARNING) << "Mapping of file failed: " << strerror(errno);
105  }
106  }
107  }
108 
109  // If all else fails, reads from the file into the allocated buffer.
110  if (memorymap) {
111  LOG(WARNING) << "File mapping at offset " << spos << " of file " << source
112  << " could not be honored, reading instead";
113  }
114  // Reads the file into the buffer in chunks not larger than kMaxReadChunk.
115  std::unique_ptr<MappedFile> mf(Allocate(size));
116  auto *buffer = static_cast<char *>(mf->mutable_data());
117  while (size > 0) {
118  const auto next_size = std::min(size, kMaxReadChunk);
119  const auto current_pos = istrm.tellg();
120  if (!istrm.read(buffer, next_size)) {
121  LOG(ERROR) << "Failed to read " << next_size << " bytes at offset "
122  << current_pos << "from \"" << source << "\"";
123  return nullptr;
124  }
125  size -= next_size;
126  buffer += next_size;
127  VLOG(2) << "Read " << next_size << " bytes. " << size << " remaining";
128  }
129  return mf.release();
130 }
131 
133  size_t pos,
134  size_t size) {
135 #ifdef _WIN32
136  SYSTEM_INFO sysInfo;
137  GetSystemInfo(&sysInfo);
138  const DWORD pagesize = sysInfo.dwAllocationGranularity;
139 #else
140  const int pagesize = sysconf(_SC_PAGESIZE);
141 #endif // _WIN32
142 
143  const size_t offset = pos % pagesize;
144  const size_t offset_pos = pos - offset;
145  const size_t upsize = size + offset;
146 
147 #ifdef _WIN32
148  if (fd == -1) {
149  LOG(ERROR) << "Invalid file descriptor fd=" << fd;
150  return nullptr;
151  }
152  HANDLE file = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
153  if (file == INVALID_HANDLE_VALUE) {
154  LOG(ERROR) << "Invalid file descriptor fd=" << fd;
155  return nullptr;
156  }
157  const DWORD max_size_hi =
158  sizeof(size_t) > sizeof(DWORD) ? upsize >> (CHAR_BIT * sizeof(DWORD)) : 0;
159  const DWORD max_size_lo = upsize & DWORD_MAX;
160  HANDLE file_mapping = CreateFileMappingA(file, nullptr, PAGE_READONLY,
161  max_size_hi, max_size_lo, nullptr);
162  if (file_mapping == INVALID_HANDLE_VALUE) {
163  LOG(ERROR) << "Can't create mapping for fd=" << fd << " size=" << upsize
164  << ": " << GetLastError();
165  return nullptr;
166  }
167 
168  const DWORD offset_pos_hi =
169  sizeof(size_t) > sizeof(DWORD) ? offset_pos >> (CHAR_BIT * sizeof(DWORD))
170  : 0;
171  const DWORD offset_pos_lo = offset_pos & DWORD_MAX;
172  void *map = MapViewOfFile(file_mapping, FILE_MAP_READ,
173  offset_pos_hi, offset_pos_lo, upsize);
174  if (!map) {
175  LOG(ERROR) << "mmap failed for fd=" << fd << " size=" << upsize
176  << " offset=" << offset_pos << ": " << GetLastError();
177  CloseHandle(file_mapping);
178  return nullptr;
179  }
180 #else
181  void *map = mmap(nullptr, upsize, PROT_READ, MAP_SHARED, fd, offset_pos);
182  if (map == MAP_FAILED) {
183  LOG(ERROR) << "mmap failed for fd=" << fd << " size=" << upsize
184  << " offset=" << offset_pos;
185  return nullptr;
186  }
187 #endif
188  MemoryRegion region;
189  region.mmap = map;
190  region.size = upsize;
191  region.data = static_cast<void *>(static_cast<char *>(map) + offset);
192  region.offset = offset;
193 #ifdef _WIN32
194  region.file_mapping = file_mapping;
195 #endif // _WIN32
196  return new MappedFile(region);
197 }
198 
199 MappedFile *MappedFile::Allocate(size_t size, size_t align) {
200  MemoryRegion region;
201  region.data = nullptr;
202  region.offset = 0;
203  if (size > 0) {
204  region.offset = align;
205  region.data = static_cast<char *>(operator new(
206  size, std::align_val_t{align}
207  ));
208  }
209  region.mmap = nullptr;
210  region.size = size;
211  return new MappedFile(region);
212 }
213 
215  MemoryRegion region;
216  region.data = data;
217  region.mmap = data;
218  region.size = 0;
219  region.offset = 0;
220  return new MappedFile(region);
221 }
222 
223 } // namespace fst
std::string source
Definition: fst.h:73
static MappedFile * Map(std::istream &istrm, bool memorymap, const std::string &source, size_t size)
Definition: mapped-file.cc:79
#define LOG(type)
Definition: log.h:53
static MappedFile * Allocate(size_t size, size_t align=kArchAlignment)
Definition: mapped-file.cc:199
#define VLOG(level)
Definition: log.h:54
static MappedFile * Borrow(void *data)
Definition: mapped-file.cc:214
static MappedFile * MapFromFileDescriptor(int fd, size_t pos, size_t size)
Definition: mapped-file.cc:132