FST  openfst-1.8.3
OpenFst Library
util.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // FST utility inline definitions.
19 
20 #ifndef FST_UTIL_H_
21 #define FST_UTIL_H_
22 
23 #include <array>
24 #include <cstddef>
25 #include <cstdint>
26 #include <ios>
27 #include <iostream>
28 #include <istream>
29 #include <iterator>
30 #include <list>
31 #include <map>
32 #include <optional>
33 #include <ostream>
34 #include <set>
35 #include <sstream>
36 #include <string>
37 #include <type_traits>
38 #include <unordered_map>
39 #include <unordered_set>
40 #include <utility>
41 #include <vector>
42 
43 #include <fst/compat.h>
44 #include <fst/flags.h>
45 #include <fst/log.h>
46 #include <fstream>
47 #include <fst/mapped-file.h>
48 #include <unordered_map>
49 #include <string_view>
50 #include <optional>
51 
52 // Utility for error handling.
53 
54 DECLARE_bool(fst_error_fatal);
55 
56 #define FSTERROR() \
57  (FST_FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
58 
59 namespace fst {
60 
61 // Utility for type I/O. For portability of serialized objects across
62 // architectures, care must be taken so that only fixed-size types (like
63 // `int32_t`) are used with `WriteType`/`ReadType`, not types that may differ in
64 // size depending on the architecture, such as `int`. For `enum` types, a
65 // fixed-size base (like `enum E : int32_t`) should be used. Objects are
66 // written and read in the host byte order, so will not be portable across
67 // different endiannesses.
68 
69 namespace internal {
70 // Whether the scalar type is supported by `ReadType`/`WriteType`.
71 template <class T>
72 inline constexpr bool IsScalarIOTypeV =
73  std::is_arithmetic_v<T> || std::is_enum_v<T>;
74 } // namespace internal
75 
76 // Reads types from an input stream.
77 
78 // Generic case.
79 template <class T, typename std::enable_if_t<std::is_class_v<T>, T> * = nullptr>
80 inline std::istream &ReadType(std::istream &strm, T *t) {
81  return t->Read(strm);
82 }
83 
84 // Numeric (boolean, integral, floating-point) or enum case.
85 template <class T, typename std::enable_if_t<internal::IsScalarIOTypeV<T>, T>
86  * = nullptr>
87 inline std::istream &ReadType(std::istream &strm, T *t) {
88  return strm.read(reinterpret_cast<char *>(t), sizeof(T));
89 }
90 
91 // Numeric (boolean, integral, floating-point) or enum case only.
92 template <class T>
93 inline std::istream &ReadType(std::istream &strm, size_t n, T *t) {
94  static_assert(internal::IsScalarIOTypeV<T>,
95  "Type not supported for batch read.");
96  return strm.read(reinterpret_cast<char *>(t), sizeof(T) * n);
97 }
98 
99 // String case.
100 inline std::istream &ReadType(std::istream &strm, std::string *s) {
101  s->clear();
102  int32_t ns = 0;
103  ReadType(strm, &ns);
104  if (ns <= 0) return strm;
105  s->resize(ns);
106  ReadType(strm, ns, s->data());
107  return strm;
108 }
109 
110 // Declares types that can be read from an input stream.
111 template <class... T>
112 std::istream &ReadType(std::istream &strm, std::vector<T...> *c);
113 template <class... T>
114 std::istream &ReadType(std::istream &strm, std::list<T...> *c);
115 template <class... T>
116 std::istream &ReadType(std::istream &strm, std::set<T...> *c);
117 template <class... T>
118 std::istream &ReadType(std::istream &strm, std::map<T...> *c);
119 template <class... T>
120 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c);
121 template <class... T>
122 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c);
123 
124 // Pair case.
125 template <typename S, typename T>
126 inline std::istream &ReadType(std::istream &strm, std::pair<S, T> *p) {
127  ReadType(strm, &p->first);
128  ReadType(strm, &p->second);
129  return strm;
130 }
131 
132 template <typename S, typename T>
133 inline std::istream &ReadType(std::istream &strm, std::pair<const S, T> *p) {
134  ReadType(strm, const_cast<S *>(&p->first));
135  ReadType(strm, &p->second);
136  return strm;
137 }
138 
139 namespace internal {
140 template <class C, class ReserveFn>
141 std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) {
142  c->clear();
143  int64_t n = 0;
144  ReadType(strm, &n);
145  reserve(c, n);
146  auto insert = std::inserter(*c, c->begin());
147  for (int64_t i = 0; i < n; ++i) {
148  typename C::value_type value;
149  ReadType(strm, &value);
150  *insert = value;
151  }
152  return strm;
153 }
154 
155 // Generic vector case.
156 template <typename T, class A,
157  typename std::enable_if_t<std::is_class_v<T>, T> * = nullptr>
158 inline std::istream &ReadVectorType(std::istream &strm, std::vector<T, A> *c) {
160  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
161 }
162 
163 // Vector of numerics (boolean, integral, floating-point, char) or enum case.
164 template <
165  typename T, class A,
166  typename std::enable_if_t<internal::IsScalarIOTypeV<T>, T> * = nullptr>
167 inline std::istream &ReadVectorType(std::istream &strm, std::vector<T, A> *c) {
168  c->clear();
169  int64_t n = 0;
170  ReadType(strm, &n);
171  if (n == 0) return strm;
172  c->resize(n);
173  ReadType(strm, n, c->data());
174  return strm;
175 }
176 } // namespace internal
177 
178 template <class T, size_t N>
179 std::istream &ReadType(std::istream &strm, std::array<T, N> *c) {
180  if constexpr (internal::IsScalarIOTypeV<T>) {
181  ReadType(strm, c->size(), c->data());
182  } else {
183  for (auto &v : *c) ReadType(strm, &v);
184  }
185  return strm;
186 }
187 
188 template <class... T>
189 std::istream &ReadType(std::istream &strm, std::vector<T...> *c) {
190  return internal::ReadVectorType(strm, c);
191 }
192 
193 template <class... T>
194 std::istream &ReadType(std::istream &strm, std::list<T...> *c) {
195  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
196 }
197 
198 template <class... T>
199 std::istream &ReadType(std::istream &strm, std::set<T...> *c) {
200  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
201 }
202 
203 template <class... T>
204 std::istream &ReadType(std::istream &strm, std::map<T...> *c) {
205  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
206 }
207 
208 template <class... T>
209 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c) {
211  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
212 }
213 
214 template <class... T>
215 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c) {
217  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
218 }
219 
220 // Writes types to an output stream.
221 
222 // Generic case.
223 template <class T, typename std::enable_if<
224  std::is_class<T>::value &&
225  // `string_view` is handled separately below.
226  !std::is_convertible<T, std::string_view>::value,
227  T>::type * = nullptr>
228 inline std::ostream &WriteType(std::ostream &strm, const T t) {
229  t.Write(strm);
230  return strm;
231 }
232 
233 // Numeric (boolean, integral, floating-point) or enum case.
234 template <class T, typename std::enable_if_t<internal::IsScalarIOTypeV<T>, T>
235  * = nullptr>
236 inline std::ostream &WriteType(std::ostream &strm, const T t) {
237  return strm.write(reinterpret_cast<const char *>(&t), sizeof(T));
238 }
239 
240 // Numeric (boolean, integral, floating-point) or enum case only.
241 template <class T>
242 inline std::ostream &WriteType(std::ostream &strm, size_t n, const T *t) {
243  static_assert(internal::IsScalarIOTypeV<T>,
244  "Type not supported for batch write.");
245  return strm.write(reinterpret_cast<const char *>(t), sizeof(T) * n);
246 }
247 
248 inline std::ostream &WriteType(std::ostream &strm, std::string_view s) {
249  int32_t ns = s.size();
250  WriteType(strm, ns);
251  return strm.write(s.data(), ns);
252 }
253 
254 // Declares types that can be written to an output stream.
255 
256 template <typename... T>
257 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c);
258 
259 template <typename... T>
260 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c);
261 
262 template <typename... T>
263 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c);
264 
265 template <typename... T>
266 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c);
267 
268 template <typename... T>
269 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c);
270 
271 template <typename... T>
272 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c);
273 
274 // Pair case.
275 template <typename S, typename T>
276 inline std::ostream &WriteType(std::ostream &strm, const std::pair<S, T> &p) {
277  WriteType(strm, p.first);
278  WriteType(strm, p.second);
279  return strm;
280 }
281 
282 namespace internal {
283 template <class C>
284 std::ostream &WriteSequence(std::ostream &strm, const C &c) {
285  for (const auto &e : c) {
286  WriteType(strm, e);
287  }
288  return strm;
289 }
290 
291 template <class C>
292 std::ostream &WriteContainer(std::ostream &strm, const C &c) {
293  const int64_t n = c.size();
294  WriteType(strm, n);
295  WriteSequence(strm, c);
296  return strm;
297 }
298 } // namespace internal
299 
300 template <class T, size_t N>
301 std::ostream &WriteType(std::ostream &strm, const std::array<T, N> &c) {
302  return internal::WriteSequence(strm, c);
303 }
304 
305 template <typename... T>
306 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c) {
307  return internal::WriteContainer(strm, c);
308 }
309 
310 template <typename... T>
311 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c) {
312  return internal::WriteContainer(strm, c);
313 }
314 
315 template <typename... T>
316 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c) {
317  return internal::WriteContainer(strm, c);
318 }
319 
320 template <typename... T>
321 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c) {
322  return internal::WriteContainer(strm, c);
323 }
324 
325 template <typename... T>
326 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c) {
327  return internal::WriteContainer(strm, c);
328 }
329 
330 template <typename... T>
331 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c) {
332  return internal::WriteContainer(strm, c);
333 }
334 
335 // Utilities for converting between int64_t or Weight and string.
336 
337 // Parses a 64-bit signed integer in some base out of an input string. The
338 // string should consist only of digits (no prefixes such as "0x") and an
339 // optionally preceding minus. Returns a value iff the entirety of the string is
340 // consumed during integer parsing, otherwise returns `std::nullopt`.
341 std::optional<int64_t> ParseInt64(std::string_view s, int base = 10);
342 
343 int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline,
344  bool * error = nullptr);
345 
346 template <typename Weight>
347 Weight StrToWeight(std::string_view s) {
348  Weight w;
349  std::istringstream strm(std::string{s});
350  strm >> w;
351  if (!strm) {
352  FSTERROR() << "StrToWeight: Bad weight: " << s;
353  return Weight::NoWeight();
354  }
355  return w;
356 }
357 
358 template <typename Weight>
359 std::string WeightToStr(Weight w) {
360  std::ostringstream strm;
361  strm.precision(9);
362  strm << w;
363  return strm.str();
364 }
365 
366 // Utilities for reading/writing integer pairs (typically labels).
367 
368 template <typename I>
369 bool ReadIntPairs(std::string_view source,
370  std::vector<std::pair<I, I>> *pairs) {
371  std::ifstream strm(std::string(source), std::ios_base::in);
372  if (!strm) {
373  LOG(ERROR) << "ReadIntPairs: Can't open file: " << source;
374  return false;
375  }
376  const int kLineLen = 8096;
377  char line[kLineLen];
378  size_t nline = 0;
379  pairs->clear();
380  while (strm.getline(line, kLineLen)) {
381  ++nline;
382  std::vector<std::string_view> col =
383  StrSplit(line, ByAnyChar("\n\t "), SkipEmpty());
384  // empty line or comment?
385  if (col.empty() || col[0].empty() || col[0][0] == '#') continue;
386  if (col.size() != 2) {
387  LOG(ERROR) << "ReadIntPairs: Bad number of columns, "
388  << "file = " << source << ", line = " << nline;
389  return false;
390  }
391  bool err;
392  I i1 = StrToInt64(col[0], source, nline, &err);
393  if (err) return false;
394  I i2 = StrToInt64(col[1], source, nline, &err);
395  if (err) return false;
396  pairs->emplace_back(i1, i2);
397  }
398  return true;
399 }
400 
401 template <typename I>
402 bool WriteIntPairs(std::string_view source,
403  const std::vector<std::pair<I, I>> &pairs) {
404  std::ofstream fstrm;
405  if (!source.empty()) {
406  fstrm.open(std::string(source));
407  if (!fstrm) {
408  LOG(ERROR) << "WriteIntPairs: Can't open file: " << source;
409  return false;
410  }
411  }
412  std::ostream &ostrm = fstrm.is_open() ? fstrm : std::cout;
413  for (const auto &pair : pairs) {
414  ostrm << pair.first << "\t" << pair.second << "\n";
415  }
416  return !!ostrm;
417 }
418 
419 // Utilities for reading/writing label pairs.
420 
421 template <typename Label>
422 bool ReadLabelPairs(std::string_view source,
423  std::vector<std::pair<Label, Label>> *pairs) {
424  return ReadIntPairs(source, pairs);
425 }
426 
427 template <typename Label>
428 bool WriteLabelPairs(std::string_view source,
429  const std::vector<std::pair<Label, Label>> &pairs) {
430  return WriteIntPairs(source, pairs);
431 }
432 
433 // Utilities for converting a type name to a legal C symbol.
434 
435 void ConvertToLegalCSymbol(std::string *s);
436 
437 // Utilities for stream I/O.
438 
439 bool AlignInput(std::istream &strm, size_t align = MappedFile::kArchAlignment);
440 bool AlignOutput(std::ostream &strm, size_t align = MappedFile::kArchAlignment);
441 
442 // An associative container for which testing membership is faster than an STL
443 // set if members are restricted to an interval that excludes most non-members.
444 // A Key must have ==, !=, and < operators defined. Element NoKey should be a
445 // key that marks an uninitialized key and is otherwise unused. Find() returns
446 // an STL const_iterator to the match found, otherwise it equals End().
447 template <class Key, Key NoKey>
448 class CompactSet {
449  public:
450  using const_iterator = typename std::set<Key>::const_iterator;
451 
452  CompactSet() : min_key_(NoKey), max_key_(NoKey) {}
453 
454  CompactSet(const CompactSet &) = default;
455 
456  void Insert(Key key) {
457  set_.insert(key);
458  if (min_key_ == NoKey || key < min_key_) min_key_ = key;
459  if (max_key_ == NoKey || max_key_ < key) max_key_ = key;
460  }
461 
462  void Erase(Key key) {
463  set_.erase(key);
464  if (set_.empty()) {
465  min_key_ = max_key_ = NoKey;
466  } else if (key == min_key_) {
467  ++min_key_;
468  } else if (key == max_key_) {
469  --max_key_;
470  }
471  }
472 
473  void Clear() {
474  set_.clear();
475  min_key_ = max_key_ = NoKey;
476  }
477 
478  const_iterator Find(Key key) const {
479  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
480  return set_.end();
481  } else {
482  return set_.find(key);
483  }
484  }
485 
486  bool Member(Key key) const {
487  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
488  return false; // out of range
489  } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) {
490  return true; // dense range
491  } else {
492  return set_.count(key);
493  }
494  }
495 
496  const_iterator Begin() const { return set_.begin(); }
497 
498  const_iterator End() const { return set_.end(); }
499 
500  // All stored keys are greater than or equal to this value.
501  Key LowerBound() const { return min_key_; }
502 
503  // All stored keys are less than or equal to this value.
504  Key UpperBound() const { return max_key_; }
505 
506  private:
507  std::set<Key> set_;
508  Key min_key_;
509  Key max_key_;
510 
511  void operator=(const CompactSet &) = delete;
512 };
513 
514 } // namespace fst
515 
516 #endif // FST_UTIL_H_
bool AlignInput(std::istream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:85
void ConvertToLegalCSymbol(std::string *s)
Definition: util.cc:75
std::ostream & WriteSequence(std::ostream &strm, const C &c)
Definition: util.h:284
Key LowerBound() const
Definition: util.h:501
bool WriteLabelPairs(std::string_view source, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:428
#define LOG(type)
Definition: log.h:53
bool AlignOutput(std::ostream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:101
static constexpr size_t kArchAlignment
Definition: mapped-file.h:100
void Erase(Key key)
Definition: util.h:462
constexpr int kLineLen
Definition: symbol-table.h:62
Key UpperBound() const
Definition: util.h:504
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:77
typename std::set< Label >::const_iterator const_iterator
Definition: util.h:450
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:228
#define FSTERROR()
Definition: util.h:56
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
Definition: util.cc:46
DECLARE_bool(fst_error_fatal)
const_iterator Find(Key key) const
Definition: util.h:478
void Insert(Key key)
Definition: util.h:456
void Clear()
Definition: util.h:473
std::string WeightToStr(Weight w)
Definition: util.h:359
constexpr bool IsScalarIOTypeV
Definition: util.h:72
bool Member(Key key) const
Definition: util.h:486
std::ostream & WriteContainer(std::ostream &strm, const C &c)
Definition: util.h:292
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:80
const_iterator Begin() const
Definition: util.h:496
std::istream & ReadContainerType(std::istream &strm, C *c, ReserveFn reserve)
Definition: util.h:141
bool ReadLabelPairs(std::string_view source, std::vector< std::pair< Label, Label >> *pairs)
Definition: util.h:422
int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline, bool *error=nullptr)
Definition: util.cc:62
bool ReadIntPairs(std::string_view source, std::vector< std::pair< I, I >> *pairs)
Definition: util.h:369
std::istream & ReadVectorType(std::istream &strm, std::vector< T, A > *c)
Definition: util.h:158
const_iterator End() const
Definition: util.h:498
Weight StrToWeight(std::string_view s)
Definition: util.h:347
bool WriteIntPairs(std::string_view source, const std::vector< std::pair< I, I >> &pairs)
Definition: util.h:402