FST  openfst-1.7.1
OpenFst Library
util.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // FST utility inline definitions.
5 
6 #ifndef FST_UTIL_H_
7 #define FST_UTIL_H_
8 
9 #include <iostream>
10 #include <iterator>
11 #include <list>
12 #include <map>
13 #include <set>
14 #include <sstream>
15 #include <string>
16 #include <type_traits>
17 #include <unordered_map>
18 #include <unordered_set>
19 #include <utility>
20 #include <vector>
21 
22 #include <fst/compat.h>
23 #include <fst/types.h>
24 #include <fst/log.h>
25 #include <fstream>
26 
27 #include <fst/flags.h>
28 #include <unordered_map>
29 
30 
31 // Utility for error handling.
32 
33 DECLARE_bool(fst_error_fatal);
34 
35 #define FSTERROR() \
36  (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
37 
38 namespace fst {
39 
40 // Utility for type I/O.
41 
42 // Reads types from an input stream.
43 
44 // Generic case.
45 template <class T,
46  typename std::enable_if<std::is_class<T>::value, T>::type* = nullptr>
47 inline std::istream &ReadType(std::istream &strm, T *t) {
48  return t->Read(strm);
49 }
50 
51 // Numeric (boolean, integral, floating-point) case.
52 template <class T,
53  typename std::enable_if<std::is_arithmetic<T>::value, T>::type* = nullptr>
54 inline std::istream &ReadType(std::istream &strm, T *t) {
55  return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
56 }
57 
58 // String case.
59 inline std::istream &ReadType(std::istream &strm, string *s) { // NOLINT
60  s->clear();
61  int32 ns = 0;
62  strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
63  for (int32 i = 0; i < ns; ++i) {
64  char c;
65  strm.read(&c, 1);
66  *s += c;
67  }
68  return strm;
69 }
70 
71 // Declares types that can be read from an input stream.
72 template <class... T>
73 std::istream &ReadType(std::istream &strm, std::vector<T...> *c);
74 template <class... T>
75 std::istream &ReadType(std::istream &strm, std::list<T...> *c);
76 template <class... T>
77 std::istream &ReadType(std::istream &strm, std::set<T...> *c);
78 template <class... T>
79 std::istream &ReadType(std::istream &strm, std::map<T...> *c);
80 template <class... T>
81 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c);
82 template <class... T>
83 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c);
84 
85 // Pair case.
86 template <typename S, typename T>
87 inline std::istream &ReadType(std::istream &strm, std::pair<S, T> *p) {
88  ReadType(strm, &p->first);
89  ReadType(strm, &p->second);
90  return strm;
91 }
92 
93 template <typename S, typename T>
94 inline std::istream &ReadType(std::istream &strm, std::pair<const S, T> *p) {
95  ReadType(strm, const_cast<S *>(&p->first));
96  ReadType(strm, &p->second);
97  return strm;
98 }
99 
100 namespace internal {
101 template <class C, class ReserveFn>
102 std::istream &ReadContainerType(std::istream &strm, C *c, ReserveFn reserve) {
103  c->clear();
104  int64 n = 0;
105  ReadType(strm, &n);
106  reserve(c, n);
107  auto insert = std::inserter(*c, c->begin());
108  for (int64 i = 0; i < n; ++i) {
109  typename C::value_type value;
110  ReadType(strm, &value);
111  *insert = value;
112  }
113  return strm;
114 }
115 } // namespace internal
116 
117 template <class... T>
118 std::istream &ReadType(std::istream &strm, std::vector<T...> *c) {
120  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
121 }
122 
123 template <class... T>
124 std::istream &ReadType(std::istream &strm, std::list<T...> *c) {
125  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
126 }
127 
128 template <class... T>
129 std::istream &ReadType(std::istream &strm, std::set<T...> *c) {
130  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
131 }
132 
133 template <class... T>
134 std::istream &ReadType(std::istream &strm, std::map<T...> *c) {
135  return internal::ReadContainerType(strm, c, [](decltype(c) v, int n) {});
136 }
137 
138 template <class... T>
139 std::istream &ReadType(std::istream &strm, std::unordered_set<T...> *c) {
141  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
142 }
143 
144 template <class... T>
145 std::istream &ReadType(std::istream &strm, std::unordered_map<T...> *c) {
147  strm, c, [](decltype(c) v, int n) { v->reserve(n); });
148 }
149 
150 // Writes types to an output stream.
151 
152 // Generic case.
153 template <class T,
154  typename std::enable_if<std::is_class<T>::value, T>::type* = nullptr>
155 inline std::ostream &WriteType(std::ostream &strm, const T t) {
156  t.Write(strm);
157  return strm;
158 }
159 
160 // Numeric (boolean, integral, floating-point) case.
161 template <class T,
162  typename std::enable_if<std::is_arithmetic<T>::value, T>::type* = nullptr>
163 inline std::ostream &WriteType(std::ostream &strm, const T t) {
164  return strm.write(reinterpret_cast<const char *>(&t), sizeof(T));
165 }
166 
167 // String case.
168 inline std::ostream &WriteType(std::ostream &strm, const string &s) { // NOLINT
169  int32 ns = s.size();
170  strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
171  return strm.write(s.data(), ns);
172 }
173 
174 // Declares types that can be written to an output stream.
175 
176 template <typename... T>
177 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c);
178 template <typename... T>
179 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c);
180 template <typename... T>
181 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c);
182 template <typename... T>
183 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c);
184 template <typename... T>
185 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c);
186 template <typename... T>
187 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c);
188 
189 // Pair case.
190 template <typename S, typename T>
191 inline std::ostream &WriteType(std::ostream &strm,
192  const std::pair<S, T> &p) { // NOLINT
193  WriteType(strm, p.first);
194  WriteType(strm, p.second);
195  return strm;
196 }
197 
198 namespace internal {
199 template <class C>
200 std::ostream &WriteContainer(std::ostream &strm, const C &c) {
201  const int64 n = c.size();
202  WriteType(strm, n);
203  for (const auto &e : c) {
204  WriteType(strm, e);
205  }
206  return strm;
207 }
208 } // namespace internal
209 
210 template <typename... T>
211 std::ostream &WriteType(std::ostream &strm, const std::vector<T...> &c) {
212  return internal::WriteContainer(strm, c);
213 }
214 
215 template <typename... T>
216 std::ostream &WriteType(std::ostream &strm, const std::list<T...> &c) {
217  return internal::WriteContainer(strm, c);
218 }
219 
220 template <typename... T>
221 std::ostream &WriteType(std::ostream &strm, const std::set<T...> &c) {
222  return internal::WriteContainer(strm, c);
223 }
224 
225 template <typename... T>
226 std::ostream &WriteType(std::ostream &strm, const std::map<T...> &c) {
227  return internal::WriteContainer(strm, c);
228 }
229 
230 template <typename... T>
231 std::ostream &WriteType(std::ostream &strm, const std::unordered_map<T...> &c) {
232  return internal::WriteContainer(strm, c);
233 }
234 
235 template <typename... T>
236 std::ostream &WriteType(std::ostream &strm, const std::unordered_set<T...> &c) {
237  return internal::WriteContainer(strm, c);
238 }
239 
240 // Utilities for converting between int64 or Weight and string.
241 
242 int64 StrToInt64(const string &s, const string &src, size_t nline,
243  bool allow_negative, bool *error = nullptr);
244 
245 template <typename Weight>
246 Weight StrToWeight(const string &s, const string &src, size_t nline) {
247  Weight w;
248  std::istringstream strm(s);
249  strm >> w;
250  if (!strm) {
251  FSTERROR() << "StrToWeight: Bad weight = \"" << s << "\", source = " << src
252  << ", line = " << nline;
253  return Weight::NoWeight();
254  }
255  return w;
256 }
257 
258 template <typename Weight>
259 void WeightToStr(Weight w, string *s) {
260  std::ostringstream strm;
261  strm.precision(9);
262  strm << w;
263  s->append(strm.str().data(), strm.str().size());
264 }
265 
266 // Utilities for reading/writing integer pairs (typically labels)
267 
268 // Modifies line using a vector of pointers to a buffer beginning with line.
269 void SplitString(char *line, const char *delim, std::vector<char *> *vec,
270  bool omit_empty_strings);
271 
272 template <typename I>
273 bool ReadIntPairs(const string &filename, std::vector<std::pair<I, I>> *pairs,
274  bool allow_negative = false) {
275  std::ifstream strm(filename, std::ios_base::in);
276  if (!strm) {
277  LOG(ERROR) << "ReadIntPairs: Can't open file: " << filename;
278  return false;
279  }
280  const int kLineLen = 8096;
281  char line[kLineLen];
282  size_t nline = 0;
283  pairs->clear();
284  while (strm.getline(line, kLineLen)) {
285  ++nline;
286  std::vector<char *> col;
287  SplitString(line, "\n\t ", &col, true);
288  // empty line or comment?
289  if (col.empty() || col[0][0] == '\0' || col[0][0] == '#') continue;
290  if (col.size() != 2) {
291  LOG(ERROR) << "ReadIntPairs: Bad number of columns, "
292  << "file = " << filename << ", line = " << nline;
293  return false;
294  }
295  bool err;
296  I i1 = StrToInt64(col[0], filename, nline, allow_negative, &err);
297  if (err) return false;
298  I i2 = StrToInt64(col[1], filename, nline, allow_negative, &err);
299  if (err) return false;
300  pairs->push_back(std::make_pair(i1, i2));
301  }
302  return true;
303 }
304 
305 template <typename I>
306 bool WriteIntPairs(const string &filename,
307  const std::vector<std::pair<I, I>> &pairs) {
308  std::ostream *strm = &std::cout;
309  if (!filename.empty()) {
310  strm = new std::ofstream(filename);
311  if (!*strm) {
312  LOG(ERROR) << "WriteIntPairs: Can't open file: " << filename;
313  return false;
314  }
315  }
316  for (ssize_t n = 0; n < pairs.size(); ++n) {
317  *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
318  }
319  if (!*strm) {
320  LOG(ERROR) << "WriteIntPairs: Write failed: "
321  << (filename.empty() ? "standard output" : filename);
322  return false;
323  }
324  if (strm != &std::cout) delete strm;
325  return true;
326 }
327 
328 // Utilities for reading/writing label pairs.
329 
330 template <typename Label>
331 bool ReadLabelPairs(const string &filename,
332  std::vector<std::pair<Label, Label>> *pairs,
333  bool allow_negative = false) {
334  return ReadIntPairs(filename, pairs, allow_negative);
335 }
336 
337 template <typename Label>
338 bool WriteLabelPairs(const string &filename,
339  const std::vector<std::pair<Label, Label>> &pairs) {
340  return WriteIntPairs(filename, pairs);
341 }
342 
343 // Utilities for converting a type name to a legal C symbol.
344 
345 void ConvertToLegalCSymbol(string *s);
346 
347 // Utilities for stream I/O.
348 
349 bool AlignInput(std::istream &strm);
350 bool AlignOutput(std::ostream &strm);
351 
352 // An associative container for which testing membership is faster than an STL
353 // set if members are restricted to an interval that excludes most non-members.
354 // A Key must have ==, !=, and < operators defined. Element NoKey should be a
355 // key that marks an uninitialized key and is otherwise unused. Find() returns
356 // an STL const_iterator to the match found, otherwise it equals End().
357 template <class Key, Key NoKey>
358 class CompactSet {
359  public:
360  using const_iterator = typename std::set<Key>::const_iterator;
361 
362  CompactSet() : min_key_(NoKey), max_key_(NoKey) {}
363 
364  CompactSet(const CompactSet<Key, NoKey> &compact_set)
365  : set_(compact_set.set_),
366  min_key_(compact_set.min_key_),
367  max_key_(compact_set.max_key_) {}
368 
369  void Insert(Key key) {
370  set_.insert(key);
371  if (min_key_ == NoKey || key < min_key_) min_key_ = key;
372  if (max_key_ == NoKey || max_key_ < key) max_key_ = key;
373  }
374 
375  void Erase(Key key) {
376  set_.erase(key);
377  if (set_.empty()) {
378  min_key_ = max_key_ = NoKey;
379  } else if (key == min_key_) {
380  ++min_key_;
381  } else if (key == max_key_) {
382  --max_key_;
383  }
384  }
385 
386  void Clear() {
387  set_.clear();
388  min_key_ = max_key_ = NoKey;
389  }
390 
391  const_iterator Find(Key key) const {
392  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
393  return set_.end();
394  } else {
395  return set_.find(key);
396  }
397  }
398 
399  bool Member(Key key) const {
400  if (min_key_ == NoKey || key < min_key_ || max_key_ < key) {
401  return false; // out of range
402  } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) {
403  return true; // dense range
404  } else {
405  return set_.count(key);
406  }
407  }
408 
409  const_iterator Begin() const { return set_.begin(); }
410 
411  const_iterator End() const { return set_.end(); }
412 
413  // All stored keys are greater than or equal to this value.
414  Key LowerBound() const { return min_key_; }
415 
416  // All stored keys are less than or equal to this value.
417  Key UpperBound() const { return max_key_; }
418 
419  private:
420  std::set<Key> set_;
421  Key min_key_;
422  Key max_key_;
423 
424  void operator=(const CompactSet &) = delete;
425 };
426 
427 } // namespace fst
428 
429 #endif // FST_UTIL_H_
void ConvertToLegalCSymbol(string *s)
Definition: util.cc:50
CompactSet(const CompactSet< Key, NoKey > &compact_set)
Definition: util.h:364
bool ReadLabelPairs(const string &filename, std::vector< std::pair< Label, Label >> *pairs, bool allow_negative=false)
Definition: util.h:331
Key LowerBound() const
Definition: util.h:414
#define LOG(type)
Definition: log.h:48
void Erase(Key key)
Definition: util.h:375
Key UpperBound() const
Definition: util.h:417
typename std::set< Label >::const_iterator const_iterator
Definition: util.h:360
int64_t int64
Definition: types.h:27
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:155
#define FSTERROR()
Definition: util.h:35
void SplitString(char *line, const char *delim, std::vector< char * > *vec, bool omit_empty_strings)
Definition: util.cc:22
bool WriteIntPairs(const string &filename, const std::vector< std::pair< I, I >> &pairs)
Definition: util.h:306
bool AlignOutput(std::ostream &strm)
Definition: util.cc:76
DECLARE_bool(fst_error_fatal)
Weight StrToWeight(const string &s, const string &src, size_t nline)
Definition: util.h:246
const_iterator Find(Key key) const
Definition: util.h:391
bool ReadIntPairs(const string &filename, std::vector< std::pair< I, I >> *pairs, bool allow_negative=false)
Definition: util.h:273
void Insert(Key key)
Definition: util.h:369
void Clear()
Definition: util.h:386
void WeightToStr(Weight w, string *s)
Definition: util.h:259
bool Member(Key key) const
Definition: util.h:399
int32_t int32
Definition: types.h:26
std::ostream & WriteContainer(std::ostream &strm, const C &c)
Definition: util.h:200
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:47
const_iterator Begin() const
Definition: util.h:409
std::istream & ReadContainerType(std::istream &strm, C *c, ReserveFn reserve)
Definition: util.h:102
bool WriteLabelPairs(const string &filename, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:338
const_iterator End() const
Definition: util.h:411
bool AlignInput(std::istream &strm)
Definition: util.cc:60
int64 StrToInt64(const string &s, const string &src, size_t nline, bool allow_negative, bool *error=nullptr)
Definition: util.cc:34