FST  openfst-1.8.2
OpenFst Library
util.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // FST utility inline definitions.
19 
20 #ifndef FST_UTIL_H_
21 #define FST_UTIL_H_
22 
23 #include <array>
24 #include <cstdint>
25 #include <iostream>
26 #include <iterator>
27 #include <list>
28 #include <map>
29 #include <set>
30 #include <sstream>
31 #include <string>
32 #include <type_traits>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <utility>
36 #include <vector>
37 
38 #include <fst/compat.h>
39 #include <fst/log.h>
40 #include <fstream>
41 #include <fst/mapped-file.h>
42 
43 #include <fst/flags.h>
44 #include <unordered_map>
45 #include <string_view>
46 #include <optional>
47 
48 
49 // Utility for error handling.
50 
51 DECLARE_bool(fst_error_fatal);
52 
53 #define FSTERROR() \
54  LOG(LEVEL(FST_FLAGS_fst_error_fatal ? base_logging::FATAL \
55  : base_logging::ERROR))
56 
57 namespace fst {
58 
59 // Utility for type I/O.
60 
61 // Reads types from an input stream.
62 
63 // Generic case.
64 template <class T, typename std::enable_if_t<std::is_class_v<T>, T> * = nullptr>
65 inline std::istream &ReadType(std::istream &strm, T *t) {
66  return t->Read(strm);
67 }
68 
69 // Numeric (boolean, integral, floating-point) case.
70 template <class T,
71  typename std::enable_if_t<std::is_arithmetic_v<T>, T> * = nullptr>
72 inline std::istream &ReadType(std::istream &strm, T *t) {
73  return strm.read(reinterpret_cast<char *>(t), sizeof(T));
74 }
75 
76 // Numeric (boolean, integral, floating-point) case only.
77 template <class T>
78 inline std::istream &ReadTypeN(std::istream &strm, size_t n, T *t) {
79  static_assert(std::is_arithmetic_v<T>, "Type not supported for batch read.");
80  return strm.read(reinterpret_cast<char *>(t), sizeof(T) * n);
81 }
82 
83 // String case.
84 inline std::istream &ReadType(std::istream &strm, std::string *s) {
85  s->clear();
86  int32_t ns = 0;
87  ReadType(strm, &ns);
88  if (ns <= 0) return strm;
89  s->resize(ns);
90  ReadTypeN(strm, ns, s->data());
91  return strm;
92 }
93 
94 // Declares types that can be read from an input stream.
95 template <class... T>
96 std::istream &ReadType(std::istream &strm, std::vector<T...> *c);
97 template <class... T>
98 std::istream &ReadType(std::istream &strm, std::list<T...> *c);
99 template <class... T>
100 std::istream &ReadType(std::istream &strm, std::set<T...> *c);
101 template <class... T>
102 std::istream &ReadType(std::istream &strm, std::map<T...> *c);
103 template <class... T>
104 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c);
105 template <class... T>
106 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c);
107 
108 // Pair case.
109 template <typename S, typename T>
110 inline std::istream &ReadType(std::istream &strm, std::pair<S, T> *p) {
111  ReadType(strm, &p->first);
112  ReadType(strm, &p->second);
113  return strm;
114 }
115 
116 template <typename S, typename T>
117 inline std::istream &ReadType(std::istream &strm, std::pair<const S, T> *p) {
118  ReadType(strm, const_cast<S *>(&p->first));
119  ReadType(strm, &p->second);
120  return strm;
121 }
122 
123 namespace internal {
124 template <class C, class ReserveFn>
125 std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) {
126  c->clear();
127  int64_t n = 0;
128  ReadType(strm, &n);
129  reserve(c, n);
130  auto insert = std::inserter(*c, c->begin());
131  for (int64_t i = 0; i < n; ++i) {
132  typename C::value_type value;
133  ReadType(strm, &value);
134  *insert = value;
135  }
136  return strm;
137 }
138 
139 // Generic vector case.
140 template <typename T, class A,
141  typename std::enable_if_t<std::is_class_v<T>, T> * = nullptr>
142 inline std::istream &ReadVectorType(std::istream &strm, std::vector<T, A> *c) {
144  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
145 }
146 
147 // Vector of numerics (boolean, integral, floating-point, char) case.
148 template <typename T, class A,
149  typename std::enable_if_t<std::is_arithmetic_v<T>, T> * = nullptr>
150 inline std::istream &ReadVectorType(std::istream &strm, std::vector<T, A> *c) {
151  c->clear();
152  int64_t n = 0;
153  ReadType(strm, &n);
154  if (n == 0) return strm;
155  c->resize(n);
156  ReadTypeN(strm, n, c->data());
157  return strm;
158 }
159 } // namespace internal
160 
161 template <class T, size_t N>
162 std::istream &ReadType(std::istream &strm, std::array<T, N> *c) {
163  if (std::is_arithmetic_v<T>) {
164  ReadTypeN(strm, c->size(), c->data());
165  } else {
166  for (auto &v : *c) ReadType(strm, &v);
167  }
168  return strm;
169 }
170 
171 template <class... T>
172 std::istream &ReadType(std::istream &strm, std::vector<T...> *c) {
173  return internal::ReadVectorType(strm, c);
174 }
175 
176 template <class... T>
177 std::istream &ReadType(std::istream &strm, std::list<T...> *c) {
178  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
179 }
180 
181 template <class... T>
182 std::istream &ReadType(std::istream &strm, std::set<T...> *c) {
183  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
184 }
185 
186 template <class... T>
187 std::istream &ReadType(std::istream &strm, std::map<T...> *c) {
188  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
189 }
190 
191 template <class... T>
192 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c) {
194  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
195 }
196 
197 template <class... T>
198 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c) {
200  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
201 }
202 
203 // Writes types to an output stream.
204 
205 // Generic case.
206 template <class T, typename std::enable_if<
207  std::is_class<T>::value &&
208  // `string_view` is handled separately below.
209  !std::is_convertible<T, std::string_view>::value,
210  T>::type * = nullptr>
211 inline std::ostream &WriteType(std::ostream &strm, const T t) {
212  t.Write(strm);
213  return strm;
214 }
215 
216 // Numeric (boolean, integral, floating-point) case.
217 template <class T,
218  typename std::enable_if_t<std::is_arithmetic_v<T>, T> * = nullptr>
219 inline std::ostream &WriteType(std::ostream &strm, const T t) {
220  return strm.write(reinterpret_cast<const char *>(&t), sizeof(T));
221 }
222 
223 inline std::ostream &WriteType(std::ostream &strm, std::string_view s) {
224  int32_t ns = s.size();
225  WriteType(strm, ns);
226  return strm.write(s.data(), ns);
227 }
228 
229 // Declares types that can be written to an output stream.
230 
231 template <typename... T>
232 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c);
233 
234 template <typename... T>
235 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c);
236 
237 template <typename... T>
238 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c);
239 
240 template <typename... T>
241 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c);
242 
243 template <typename... T>
244 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c);
245 
246 template <typename... T>
247 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c);
248 
249 // Pair case.
250 template <typename S, typename T>
251 inline std::ostream &WriteType(std::ostream &strm,
252  const std::pair<S, T> &p) {
253  WriteType(strm, p.first);
254  WriteType(strm, p.second);
255  return strm;
256 }
257 
258 namespace internal {
259 template <class C>
260 std::ostream &WriteSequence(std::ostream &strm, const C &c) {
261  for (const auto &e : c) {
262  WriteType(strm, e);
263  }
264  return strm;
265 }
266 
267 template <class C>
268 std::ostream &WriteContainer(std::ostream &strm, const C &c) {
269  const int64_t n = c.size();
270  WriteType(strm, n);
271  WriteSequence(strm, c);
272  return strm;
273 }
274 } // namespace internal
275 
276 template <class T, size_t N>
277 std::ostream &WriteType(std::ostream &strm, const std::array<T, N> &c) {
278  return internal::WriteSequence(strm, c);
279 }
280 
281 template <typename... T>
282 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c) {
283  return internal::WriteContainer(strm, c);
284 }
285 
286 template <typename... T>
287 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c) {
288  return internal::WriteContainer(strm, c);
289 }
290 
291 template <typename... T>
292 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c) {
293  return internal::WriteContainer(strm, c);
294 }
295 
296 template <typename... T>
297 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c) {
298  return internal::WriteContainer(strm, c);
299 }
300 
301 template <typename... T>
302 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c) {
303  return internal::WriteContainer(strm, c);
304 }
305 
306 template <typename... T>
307 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c) {
308  return internal::WriteContainer(strm, c);
309 }
310 
311 // Utilities for converting between int64_t or Weight and string.
312 
313 // Parses a 64-bit signed integer in some base out of an input string. The
314 // string should consist only of digits (no prefixes such as "0x") and an
315 // optionally preceding minus. Returns a value iff the entirety of the string is
316 // consumed during integer parsing, otherwise returns `std::nullopt`.
317 std::optional<int64_t> ParseInt64(std::string_view s, int base = 10);
318 
319 int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline,
320  bool allow_negative, bool *error = nullptr);
321 
322 template <typename Weight>
323 Weight StrToWeight(std::string_view s) {
324  Weight w;
325  std::istringstream strm(std::string{s});
326  strm >> w;
327  if (!strm) {
328  FSTERROR() << "StrToWeight: Bad weight: " << s;
329  return Weight::NoWeight();
330  }
331  return w;
332 }
333 
334 template <typename Weight>
335 std::string WeightToStr(Weight w) {
336  std::ostringstream strm;
337  strm.precision(9);
338  strm << w;
339  return strm.str();
340 }
341 
342 // Utilities for reading/writing integer pairs (typically labels).
343 
344 template <typename I>
345 bool ReadIntPairs(const std::string &source,
346  std::vector<std::pair<I, I>> *pairs,
347  bool allow_negative = false) {
348  std::ifstream strm(source, std::ios_base::in);
349  if (!strm) {
350  LOG(ERROR) << "ReadIntPairs: Can't open file: " << source;
351  return false;
352  }
353  const int kLineLen = 8096;
354  char line[kLineLen];
355  size_t nline = 0;
356  pairs->clear();
357  while (strm.getline(line, kLineLen)) {
358  ++nline;
359  std::vector<std::string_view> col =
360  StrSplit(line, ByAnyChar("\n\t "), SkipEmpty());
361  // empty line or comment?
362  if (col.empty() || col[0].empty() || col[0][0] == '#') continue;
363  if (col.size() != 2) {
364  LOG(ERROR) << "ReadIntPairs: Bad number of columns, "
365  << "file = " << source << ", line = " << nline;
366  return false;
367  }
368  bool err;
369  I i1 = StrToInt64(col[0], source, nline, allow_negative, &err);
370  if (err) return false;
371  I i2 = StrToInt64(col[1], source, nline, allow_negative, &err);
372  if (err) return false;
373  pairs->emplace_back(i1, i2);
374  }
375  return true;
376 }
377 
378 template <typename I>
379 bool WriteIntPairs(const std::string &source,
380  const std::vector<std::pair<I, I>> &pairs) {
381  std::ofstream fstrm;
382  if (!source.empty()) {
383  fstrm.open(source);
384  if (!fstrm) {
385  LOG(ERROR) << "WriteIntPairs: Can't open file: " << source;
386  return false;
387  }
388  }
389  std::ostream &ostrm = fstrm.is_open() ? fstrm : std::cout;
390  for (const auto &pair : pairs) {
391  ostrm << pair.first << "\t" << pair.second << "\n";
392  }
393  return !!ostrm;
394 }
395 
396 // Utilities for reading/writing label pairs.
397 
398 template <typename Label>
399 bool ReadLabelPairs(const std::string &source,
400  std::vector<std::pair<Label, Label>> *pairs,
401  bool allow_negative = false) {
402  return ReadIntPairs(source, pairs, allow_negative);
403 }
404 
405 template <typename Label>
406 bool WriteLabelPairs(const std::string &source,
407  const std::vector<std::pair<Label, Label>> &pairs) {
408  return WriteIntPairs(source, pairs);
409 }
410 
411 // Utilities for converting a type name to a legal C symbol.
412 
413 void ConvertToLegalCSymbol(std::string *s);
414 
415 // Utilities for stream I/O.
416 
417 bool AlignInput(std::istream &strm, size_t align = MappedFile::kArchAlignment);
418 bool AlignOutput(std::ostream &strm, size_t align = MappedFile::kArchAlignment);
419 
420 // An associative container for which testing membership is faster than an STL
421 // set if members are restricted to an interval that excludes most non-members.
422 // A Key must have ==, !=, and < operators defined. Element NoKey should be a
423 // key that marks an uninitialized key and is otherwise unused. Find() returns
424 // an STL const_iterator to the match found, otherwise it equals End().
425 template <class Key, Key NoKey>
426 class CompactSet {
427  public:
428  using const_iterator = typename std::set<Key>::const_iterator;
429 
430  CompactSet() : min_key_(NoKey), max_key_(NoKey) {}
431 
432  CompactSet(const CompactSet &) = default;
433 
434  void Insert(Key key) {
435  set_.insert(key);
436  if (min_key_ == NoKey || key < min_key_) min_key_ = key;
437  if (max_key_ == NoKey || max_key_ < key) max_key_ = key;
438  }
439 
440  void Erase(Key key) {
441  set_.erase(key);
442  if (set_.empty()) {
443  min_key_ = max_key_ = NoKey;
444  } else if (key == min_key_) {
445  ++min_key_;
446  } else if (key == max_key_) {
447  --max_key_;
448  }
449  }
450 
451  void Clear() {
452  set_.clear();
453  min_key_ = max_key_ = NoKey;
454  }
455 
456  const_iterator Find(Key key) const {
457  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
458  return set_.end();
459  } else {
460  return set_.find(key);
461  }
462  }
463 
464  bool Member(Key key) const {
465  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
466  return false; // out of range
467  } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) {
468  return true; // dense range
469  } else {
470  return set_.count(key);
471  }
472  }
473 
474  const_iterator Begin() const { return set_.begin(); }
475 
476  const_iterator End() const { return set_.end(); }
477 
478  // All stored keys are greater than or equal to this value.
479  Key LowerBound() const { return min_key_; }
480 
481  // All stored keys are less than or equal to this value.
482  Key UpperBound() const { return max_key_; }
483 
484  private:
485  std::set<Key> set_;
486  Key min_key_;
487  Key max_key_;
488 
489  void operator=(const CompactSet &) = delete;
490 };
491 
492 } // namespace fst
493 
494 #endif // FST_UTIL_H_
bool AlignInput(std::istream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:81
void ConvertToLegalCSymbol(std::string *s)
Definition: util.cc:71
bool WriteIntPairs(const std::string &source, const std::vector< std::pair< I, I >> &pairs)
Definition: util.h:379
std::ostream & WriteSequence(std::ostream &strm, const C &c)
Definition: util.h:260
Key LowerBound() const
Definition: util.h:479
#define LOG(type)
Definition: log.h:49
bool AlignOutput(std::ostream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:97
static constexpr size_t kArchAlignment
Definition: mapped-file.h:98
void Erase(Key key)
Definition: util.h:440
constexpr int kLineLen
Definition: symbol-table.h:63
Key UpperBound() const
Definition: util.h:482
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:81
typename std::set< Label >::const_iterator const_iterator
Definition: util.h:428
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:211
#define FSTERROR()
Definition: util.h:53
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
Definition: util.cc:42
bool ReadLabelPairs(const std::string &source, std::vector< std::pair< Label, Label >> *pairs, bool allow_negative=false)
Definition: util.h:399
DECLARE_bool(fst_error_fatal)
const_iterator Find(Key key) const
Definition: util.h:456
void Insert(Key key)
Definition: util.h:434
void Clear()
Definition: util.h:451
std::string WeightToStr(Weight w)
Definition: util.h:335
bool Member(Key key) const
Definition: util.h:464
bool ReadIntPairs(const std::string &source, std::vector< std::pair< I, I >> *pairs, bool allow_negative=false)
Definition: util.h:345
std::ostream & WriteContainer(std::ostream &strm, const C &c)
Definition: util.h:268
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:65
const_iterator Begin() const
Definition: util.h:474
std::istream & ReadContainerType(std::istream &strm, C *c, ReserveFn reserve)
Definition: util.h:125
int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline, bool allow_negative, bool *error=nullptr)
Definition: util.cc:58
bool WriteLabelPairs(const std::string &source, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:406
std::istream & ReadVectorType(std::istream &strm, std::vector< T, A > *c)
Definition: util.h:142
const_iterator End() const
Definition: util.h:476
Weight StrToWeight(std::string_view s)
Definition: util.h:323
std::istream & ReadTypeN(std::istream &strm, size_t n, T *t)
Definition: util.h:78