FST  openfst-1.8.1
OpenFst Library
util.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // FST utility inline definitions.
19 
20 #ifndef FST_UTIL_H_
21 #define FST_UTIL_H_
22 
23 #include <array>
24 #include <iostream>
25 #include <iterator>
26 #include <list>
27 #include <map>
28 #include <set>
29 #include <sstream>
30 #include <string>
31 #include <type_traits>
32 #include <unordered_map>
33 #include <unordered_set>
34 #include <utility>
35 #include <vector>
36 
37 #include <fst/compat.h>
38 #include <fst/types.h>
39 #include <fst/log.h>
40 #include <fstream>
41 #include <fst/mapped-file.h>
42 
43 #include <fst/flags.h>
44 #include <unordered_map>
45 #include <string_view>
46 #include <optional>
47 
48 
49 // Utility for error handling.
50 
51 DECLARE_bool(fst_error_fatal);
52 
53 #define FSTERROR() \
54  (FST_FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
55 
56 namespace fst {
57 
58 // Utility for type I/O.
59 
60 // Reads types from an input stream.
61 
62 // Generic case.
63 template <class T,
64  typename std::enable_if<std::is_class<T>::value, T>::type * = nullptr>
65 inline std::istream &ReadType(std::istream &strm, T *t) {
66  return t->Read(strm);
67 }
68 
69 // Numeric (boolean, integral, floating-point) case.
70 template <class T, typename std::enable_if<std::is_arithmetic<T>::value,
71  T>::type * = nullptr>
72 inline std::istream &ReadType(std::istream &strm, T *t) {
73  return strm.read(reinterpret_cast<char *>(t), sizeof(T));
74 }
75 
76 // String case.
77 inline std::istream &ReadType(std::istream &strm, std::string *s) {
78  s->clear();
79  int32 ns = 0;
80  ReadType(strm, &ns);
81  for (int32 i = 0; i < ns; ++i) {
82  char c;
83  strm.read(&c, 1);
84  *s += c;
85  }
86  return strm;
87 }
88 
89 // Declares types that can be read from an input stream.
90 template <class... T>
91 std::istream &ReadType(std::istream &strm, std::vector<T...> *c);
92 template <class... T>
93 std::istream &ReadType(std::istream &strm, std::list<T...> *c);
94 template <class... T>
95 std::istream &ReadType(std::istream &strm, std::set<T...> *c);
96 template <class... T>
97 std::istream &ReadType(std::istream &strm, std::map<T...> *c);
98 template <class... T>
99 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c);
100 template <class... T>
101 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c);
102 
103 // Pair case.
104 template <typename S, typename T>
105 inline std::istream &ReadType(std::istream &strm, std::pair<S, T> *p) {
106  ReadType(strm, &p->first);
107  ReadType(strm, &p->second);
108  return strm;
109 }
110 
111 template <typename S, typename T>
112 inline std::istream &ReadType(std::istream &strm, std::pair<const S, T> *p) {
113  ReadType(strm, const_cast<S *>(&p->first));
114  ReadType(strm, &p->second);
115  return strm;
116 }
117 
118 namespace internal {
119 template <class C, class ReserveFn>
120 std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) {
121  c->clear();
122  int64 n = 0;
123  ReadType(strm, &n);
124  reserve(c, n);
125  auto insert = std::inserter(*c, c->begin());
126  for (int64 i = 0; i < n; ++i) {
127  typename C::value_type value;
128  ReadType(strm, &value);
129  *insert = value;
130  }
131  return strm;
132 }
133 } // namespace internal
134 
135 template <class T, size_t N>
136 std::istream &ReadType(std::istream &strm, std::array<T, N> *c) {
137  for (auto &v : *c) ReadType(strm, &v);
138  return strm;
139 }
140 
141 template <class... T>
142 std::istream &ReadType(std::istream &strm, std::vector<T...> *c) {
144  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
145 }
146 
147 template <class... T>
148 std::istream &ReadType(std::istream &strm, std::list<T...> *c) {
149  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
150 }
151 
152 template <class... T>
153 std::istream &ReadType(std::istream &strm, std::set<T...> *c) {
154  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
155 }
156 
157 template <class... T>
158 std::istream &ReadType(std::istream &strm, std::map<T...> *c) {
159  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
160 }
161 
162 template <class... T>
163 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c) {
165  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
166 }
167 
168 template <class... T>
169 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c) {
171  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
172 }
173 
174 // Writes types to an output stream.
175 
176 // Generic case.
177 template <class T,
178  typename std::enable_if<std::is_class<T>::value, T>::type * = nullptr>
179 inline std::ostream &WriteType(std::ostream &strm, const T t) {
180  t.Write(strm);
181  return strm;
182 }
183 
184 // Numeric (boolean, integral, floating-point) case.
185 template <class T, typename std::enable_if<std::is_arithmetic<T>::value,
186  T>::type * = nullptr>
187 inline std::ostream &WriteType(std::ostream &strm, const T t) {
188  return strm.write(reinterpret_cast<const char *>(&t), sizeof(T));
189 }
190 
191 // String case.
192 inline std::ostream &WriteType(std::ostream &strm, const std::string &s) {
193  int32 ns = s.size();
194  WriteType(strm, ns);
195  return strm.write(s.data(), ns);
196 }
197 
198 // Declares types that can be written to an output stream.
199 
200 template <typename... T>
201 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c);
202 
203 template <typename... T>
204 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c);
205 
206 template <typename... T>
207 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c);
208 
209 template <typename... T>
210 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c);
211 
212 template <typename... T>
213 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c);
214 
215 template <typename... T>
216 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c);
217 
218 // Pair case.
219 template <typename S, typename T>
220 inline std::ostream &WriteType(std::ostream &strm,
221  const std::pair<S, T> &p) {
222  WriteType(strm, p.first);
223  WriteType(strm, p.second);
224  return strm;
225 }
226 
227 namespace internal {
228 template <class C>
229 std::ostream &WriteSequence(std::ostream &strm, const C &c) {
230  for (const auto &e : c) {
231  WriteType(strm, e);
232  }
233  return strm;
234 }
235 
236 template <class C>
237 std::ostream &WriteContainer(std::ostream &strm, const C &c) {
238  const int64 n = c.size();
239  WriteType(strm, n);
240  WriteSequence(strm, c);
241  return strm;
242 }
243 } // namespace internal
244 
245 template <class T, size_t N>
246 std::ostream &WriteType(std::ostream &strm, const std::array<T, N> &c) {
247  return internal::WriteSequence(strm, c);
248 }
249 
250 template <typename... T>
251 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c) {
252  return internal::WriteContainer(strm, c);
253 }
254 
255 template <typename... T>
256 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c) {
257  return internal::WriteContainer(strm, c);
258 }
259 
260 template <typename... T>
261 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c) {
262  return internal::WriteContainer(strm, c);
263 }
264 
265 template <typename... T>
266 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c) {
267  return internal::WriteContainer(strm, c);
268 }
269 
270 template <typename... T>
271 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c) {
272  return internal::WriteContainer(strm, c);
273 }
274 
275 template <typename... T>
276 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c) {
277  return internal::WriteContainer(strm, c);
278 }
279 
280 // Utilities for converting between int64 or Weight and string.
281 
282 // Parses a 64-bit signed integer out of an input string. Returns a value iff
283 // the entirety of the string is consumed during integer parsing, otherwise
284 // returning `std::nullopt`.
285 std::optional<int64> ParseInt64(std::string_view s);
286 
287 int64 StrToInt64(std::string_view s, std::string_view source, size_t nline,
288  bool allow_negative, bool *error = nullptr);
289 
290 template <typename Weight>
291 Weight StrToWeight(std::string_view s) {
292  Weight w;
293  std::istringstream strm(std::string{s});
294  strm >> w;
295  if (!strm) {
296  FSTERROR() << "StrToWeight: Bad weight: " << s;
297  return Weight::NoWeight();
298  }
299  return w;
300 }
301 
302 template <typename Weight>
303 void WeightToStr(Weight w, std::string *s) {
304  std::ostringstream strm;
305  strm.precision(9);
306  strm << w;
307  s->append(strm.str().data(), strm.str().size());
308 }
309 
310 // Utilities for reading/writing integer pairs (typically labels).
311 
312 // Splits `line` on any of the chars in `delim`, dropping empty spans if
313 // `omit_empty_strings` is true.
314 std::vector<std::string_view> SplitString(std::string_view line,
315  std::string_view delim,
316  bool omit_empty_strings);
317 
318 template <typename I>
319 bool ReadIntPairs(const std::string &source,
320  std::vector<std::pair<I, I>> *pairs,
321  bool allow_negative = false) {
322  std::ifstream strm(source, std::ios_base::in);
323  if (!strm) {
324  LOG(ERROR) << "ReadIntPairs: Can't open file: " << source;
325  return false;
326  }
327  const int kLineLen = 8096;
328  char line[kLineLen];
329  size_t nline = 0;
330  pairs->clear();
331  while (strm.getline(line, kLineLen)) {
332  ++nline;
333  std::vector<std::string_view> col = SplitString(line, "\n\t ", true);
334  // empty line or comment?
335  if (col.empty() || col[0].empty() || col[0][0] == '#') continue;
336  if (col.size() != 2) {
337  LOG(ERROR) << "ReadIntPairs: Bad number of columns, "
338  << "file = " << source << ", line = " << nline;
339  return false;
340  }
341  bool err;
342  I i1 = StrToInt64(col[0], source, nline, allow_negative, &err);
343  if (err) return false;
344  I i2 = StrToInt64(col[1], source, nline, allow_negative, &err);
345  if (err) return false;
346  pairs->emplace_back(i1, i2);
347  }
348  return true;
349 }
350 
351 template <typename I>
352 bool WriteIntPairs(const std::string &source,
353  const std::vector<std::pair<I, I>> &pairs) {
354  std::ofstream fstrm;
355  if (!source.empty()) {
356  fstrm.open(source);
357  if (!fstrm) {
358  LOG(ERROR) << "WriteIntPairs: Can't open file: " << source;
359  return false;
360  }
361  }
362  std::ostream &ostrm = fstrm.is_open() ? fstrm : std::cout;
363  for (const auto &pair : pairs) {
364  ostrm << pair.first << "\t" << pair.second << "\n";
365  }
366  return !!ostrm;
367 }
368 
369 // Utilities for reading/writing label pairs.
370 
371 template <typename Label>
372 bool ReadLabelPairs(const std::string &source,
373  std::vector<std::pair<Label, Label>> *pairs,
374  bool allow_negative = false) {
375  return ReadIntPairs(source, pairs, allow_negative);
376 }
377 
378 template <typename Label>
379 bool WriteLabelPairs(const std::string &source,
380  const std::vector<std::pair<Label, Label>> &pairs) {
381  return WriteIntPairs(source, pairs);
382 }
383 
384 // Utilities for converting a type name to a legal C symbol.
385 
386 void ConvertToLegalCSymbol(std::string *s);
387 
388 // Utilities for stream I/O.
389 
390 bool AlignInput(std::istream &strm, size_t align = MappedFile::kArchAlignment);
391 bool AlignOutput(std::ostream &strm, size_t align = MappedFile::kArchAlignment);
392 
393 // An associative container for which testing membership is faster than an STL
394 // set if members are restricted to an interval that excludes most non-members.
395 // A Key must have ==, !=, and < operators defined. Element NoKey should be a
396 // key that marks an uninitialized key and is otherwise unused. Find() returns
397 // an STL const_iterator to the match found, otherwise it equals End().
398 template <class Key, Key NoKey>
399 class CompactSet {
400  public:
401  using const_iterator = typename std::set<Key>::const_iterator;
402 
403  CompactSet() : min_key_(NoKey), max_key_(NoKey) {}
404 
405  CompactSet(const CompactSet &) = default;
406 
407  void Insert(Key key) {
408  set_.insert(key);
409  if (min_key_ == NoKey || key < min_key_) min_key_ = key;
410  if (max_key_ == NoKey || max_key_ < key) max_key_ = key;
411  }
412 
413  void Erase(Key key) {
414  set_.erase(key);
415  if (set_.empty()) {
416  min_key_ = max_key_ = NoKey;
417  } else if (key == min_key_) {
418  ++min_key_;
419  } else if (key == max_key_) {
420  --max_key_;
421  }
422  }
423 
424  void Clear() {
425  set_.clear();
426  min_key_ = max_key_ = NoKey;
427  }
428 
429  const_iterator Find(Key key) const {
430  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
431  return set_.end();
432  } else {
433  return set_.find(key);
434  }
435  }
436 
437  bool Member(Key key) const {
438  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
439  return false; // out of range
440  } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) {
441  return true; // dense range
442  } else {
443  return set_.count(key);
444  }
445  }
446 
447  const_iterator Begin() const { return set_.begin(); }
448 
449  const_iterator End() const { return set_.end(); }
450 
451  // All stored keys are greater than or equal to this value.
452  Key LowerBound() const { return min_key_; }
453 
454  // All stored keys are less than or equal to this value.
455  Key UpperBound() const { return max_key_; }
456 
457  private:
458  std::set<Key> set_;
459  Key min_key_;
460  Key max_key_;
461 
462  void operator=(const CompactSet &) = delete;
463 };
464 
465 } // namespace fst
466 
467 #endif // FST_UTIL_H_
bool AlignInput(std::istream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:91
void ConvertToLegalCSymbol(std::string *s)
Definition: util.cc:81
std::vector< std::string_view > SplitString(std::string_view line, std::string_view delim, bool omit_empty_strings)
Definition: util.cc:42
bool WriteIntPairs(const std::string &source, const std::vector< std::pair< I, I >> &pairs)
Definition: util.h:352
std::ostream & WriteSequence(std::ostream &strm, const C &c)
Definition: util.h:229
Key LowerBound() const
Definition: util.h:452
#define LOG(type)
Definition: log.h:46
bool AlignOutput(std::ostream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:107
static constexpr size_t kArchAlignment
Definition: mapped-file.h:98
void Erase(Key key)
Definition: util.h:413
Key UpperBound() const
Definition: util.h:455
typename std::set< Label >::const_iterator const_iterator
Definition: util.h:401
int64_t int64
Definition: types.h:27
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:179
#define FSTERROR()
Definition: util.h:53
bool ReadLabelPairs(const std::string &source, std::vector< std::pair< Label, Label >> *pairs, bool allow_negative=false)
Definition: util.h:372
std::optional< int64 > ParseInt64(std::string_view s)
Definition: util.cc:59
void WeightToStr(Weight w, std::string *s)
Definition: util.h:303
DECLARE_bool(fst_error_fatal)
const_iterator Find(Key key) const
Definition: util.h:429
int64 StrToInt64(std::string_view s, std::string_view source, size_t nline, bool allow_negative, bool *error=nullptr)
Definition: util.cc:68
void Insert(Key key)
Definition: util.h:407
void Clear()
Definition: util.h:424
const int kLineLen
Definition: symbol-table.cc:44
bool Member(Key key) const
Definition: util.h:437
bool ReadIntPairs(const std::string &source, std::vector< std::pair< I, I >> *pairs, bool allow_negative=false)
Definition: util.h:319
int32_t int32
Definition: types.h:26
std::ostream & WriteContainer(std::ostream &strm, const C &c)
Definition: util.h:237
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:65
const_iterator Begin() const
Definition: util.h:447
std::istream & ReadContainerType(std::istream &strm, C *c, ReserveFn reserve)
Definition: util.h:120
bool WriteLabelPairs(const std::string &source, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:379
const_iterator End() const
Definition: util.h:449
Weight StrToWeight(std::string_view s)
Definition: util.h:291