FST  openfst-1.7.9
OpenFst Library
far.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Finite-State Transducer (FST) archive classes.
5 
6 #ifndef FST_EXTENSIONS_FAR_FAR_H_
7 #define FST_EXTENSIONS_FAR_FAR_H_
8 
9 #include <iostream>
10 #include <sstream>
11 
12 #include <fst/log.h>
15 #include <fstream>
16 #include <fst/fst.h>
17 #include <fst/vector-fst.h>
18 
19 namespace fst {
20 
21 enum class FarEntryType { LINE, FILE };
22 
23 inline bool IsFst(const std::string &source) {
24  std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
25  if (!strm) return false;
26  return IsFstHeader(strm, source);
27 }
28 
29 // FST archive header class
30 class FarHeader {
31  public:
32  const std::string &ArcType() const { return arctype_; }
33 
34  const std::string &FarType() const { return fartype_; }
35 
36  bool Read(const std::string &source) {
37  FstHeader fsthdr;
38  if (source.empty()) {
39  // Header reading unsupported on stdin. Assumes STList and StdArc.
40  fartype_ = "stlist";
41  arctype_ = "standard";
42  return true;
43  } else if (IsSTTable(source)) { // Checks if STTable.
44  ReadSTTableHeader(source, &fsthdr);
45  fartype_ = "sttable";
46  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
47  return true;
48  } else if (IsSTList(source)) { // Checks if STList.
49  ReadSTListHeader(source, &fsthdr);
50  fartype_ = "stlist";
51  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
52  return true;
53  } else if (IsFst(source)) { // Checks if FST.
54  std::ifstream istrm(source,
55  std::ios_base::in | std::ios_base::binary);
56  fsthdr.Read(istrm, source);
57  fartype_ = "fst";
58  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
59  return true;
60  }
61  return false;
62  }
63 
64  private:
65  std::string fartype_;
66  std::string arctype_;
67 };
68 
69 enum class FarType {
70  DEFAULT = 0,
71  STTABLE = 1,
72  STLIST = 2,
73  FST = 3,
74 };
75 
76 // This class creates an archive of FSTs.
77 template <class A>
78 class FarWriter {
79  public:
80  using Arc = A;
81 
82  // Creates a new (empty) FST archive; returns null on error.
83  static FarWriter *Create(const std::string &source,
84  FarType type = FarType::DEFAULT);
85 
86  // Adds an FST to the end of an archive. Keys must be non-empty and
87  // in lexicographic order. FSTs must have a suitable write method.
88  virtual void Add(const std::string &key, const Fst<Arc> &fst) = 0;
89 
90  virtual FarType Type() const = 0;
91 
92  virtual bool Error() const = 0;
93 
94  virtual ~FarWriter() {}
95 
96  protected:
97  FarWriter() {}
98 };
99 
100 // This class iterates through an existing archive of FSTs.
101 template <class A>
102 class FarReader {
103  public:
104  using Arc = A;
105 
106  // Opens an existing FST archive in a single file; returns null on error.
107  // Sets current position to the beginning of the achive.
108  static FarReader *Open(const std::string &source);
109 
110  // Opens an existing FST archive in multiple files; returns null on error.
111  // Sets current position to the beginning of the achive.
112  static FarReader *Open(const std::vector<std::string> &sources);
113 
114  // Resets current position to beginning of archive.
115  virtual void Reset() = 0;
116 
117  // Sets current position to first entry >= key. Returns true if a match.
118  virtual bool Find(const std::string &key) = 0;
119 
120  // Current position at end of archive?
121  virtual bool Done() const = 0;
122 
123  // Move current position to next FST.
124  virtual void Next() = 0;
125 
126  // Returns key at the current position. This reference is invalidated if
127  // the current position in the archive is changed.
128  virtual const std::string &GetKey() const = 0;
129 
130  // Returns pointer to FST at the current position. This is invalidated if
131  // the current position in the archive is changed.
132  virtual const Fst<Arc> *GetFst() const = 0;
133 
134  virtual FarType Type() const = 0;
135 
136  virtual bool Error() const = 0;
137 
138  virtual ~FarReader() {}
139 
140  protected:
142 };
143 
144 template <class Arc>
145 class FstWriter {
146  public:
147  void operator()(std::ostream &strm, const Fst<Arc> &fst) const {
148  fst.Write(strm, FstWriteOptions());
149  }
150 };
151 
152 template <class A>
153 class STTableFarWriter : public FarWriter<A> {
154  public:
155  using Arc = A;
156 
157  static STTableFarWriter *Create(const std::string &source) {
158  auto *writer = STTableWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
159  return new STTableFarWriter(writer);
160  }
161 
162  void Add(const std::string &key, const Fst<Arc> &fst) final {
163  writer_->Add(key, fst);
164  }
165 
166  FarType Type() const final { return FarType::STTABLE; }
167 
168  bool Error() const final { return writer_->Error(); }
169 
170  private:
172  : writer_(writer) {}
173 
174  std::unique_ptr<STTableWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
175 };
176 
177 template <class A>
178 class STListFarWriter : public FarWriter<A> {
179  public:
180  using Arc = A;
181 
182  static STListFarWriter *Create(const std::string &source) {
183  auto *writer = STListWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
184  return new STListFarWriter(writer);
185  }
186 
187  void Add(const std::string &key, const Fst<Arc> &fst) final {
188  writer_->Add(key, fst);
189  }
190 
191  FarType Type() const final { return FarType::STLIST; }
192 
193  bool Error() const final { return writer_->Error(); }
194 
195  private:
197  : writer_(writer) {}
198 
199  std::unique_ptr<STListWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
200 };
201 
202 template <class A>
203 class FstFarWriter final : public FarWriter<A> {
204  public:
205  using Arc = A;
206 
207  explicit FstFarWriter(const std::string &source)
208  : source_(source), error_(false), written_(false) {}
209 
210  static FstFarWriter *Create(const std::string &source) {
211  return new FstFarWriter(source);
212  }
213 
214  void Add(const std::string &key, const Fst<A> &fst) final {
215  if (written_) {
216  LOG(WARNING) << "FstFarWriter::Add: only one FST supported,"
217  << " subsequent entries discarded.";
218  } else {
219  error_ = !fst.Write(source_);
220  written_ = true;
221  }
222  }
223 
224  FarType Type() const final { return FarType::FST; }
225 
226  bool Error() const final { return error_; }
227 
228  ~FstFarWriter() final {}
229 
230  private:
231  std::string source_;
232  bool error_;
233  bool written_;
234 };
235 
236 template <class Arc>
237 FarWriter<Arc> *FarWriter<Arc>::Create(const std::string &source,
238  FarType type) {
239  switch (type) {
240  case FarType::DEFAULT:
241  if (source.empty()) return STListFarWriter<Arc>::Create(source);
242  case FarType::STTABLE:
243  return STTableFarWriter<Arc>::Create(source);
244  case FarType::STLIST:
245  return STListFarWriter<Arc>::Create(source);
246  case FarType::FST:
247  return FstFarWriter<Arc>::Create(source);
248  default:
249  LOG(ERROR) << "FarWriter::Create: Unknown FAR type";
250  return nullptr;
251  }
252 }
253 
254 template <class Arc>
255 class FstReader {
256  public:
257  Fst<Arc> *operator()(std::istream &strm,
258  const FstReadOptions &options = FstReadOptions()) const {
259  return Fst<Arc>::Read(strm, options);
260  }
261 };
262 
263 template <class A>
264 class STTableFarReader : public FarReader<A> {
265  public:
266  using Arc = A;
267 
268  static STTableFarReader *Open(const std::string &source) {
269  auto *reader = STTableReader<Fst<Arc>, FstReader<Arc>>::Open(source);
270  if (!reader || reader->Error()) return nullptr;
271  return new STTableFarReader(reader);
272  }
273 
274  static STTableFarReader *Open(const std::vector<std::string> &sources) {
275  auto *reader = STTableReader<Fst<Arc>, FstReader<Arc>>::Open(sources);
276  if (!reader || reader->Error()) return nullptr;
277  return new STTableFarReader(reader);
278  }
279 
280  void Reset() final { reader_->Reset(); }
281 
282  bool Find(const std::string &key) final { return reader_->Find(key); }
283 
284  bool Done() const final { return reader_->Done(); }
285 
286  void Next() final { return reader_->Next(); }
287 
288  const std::string &GetKey() const final { return reader_->GetKey(); }
289 
290  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
291 
292  FarType Type() const final { return FarType::STTABLE; }
293 
294  bool Error() const final { return reader_->Error(); }
295 
296  private:
298  : reader_(reader) {}
299 
300  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader_;
301 };
302 
303 template <class A>
304 class STListFarReader : public FarReader<A> {
305  public:
306  using Arc = A;
307 
308  static STListFarReader *Open(const std::string &source) {
309  auto *reader = STListReader<Fst<Arc>, FstReader<Arc>>::Open(source);
310  if (!reader || reader->Error()) return nullptr;
311  return new STListFarReader(reader);
312  }
313 
314  static STListFarReader *Open(const std::vector<std::string> &sources) {
315  auto *reader = STListReader<Fst<Arc>, FstReader<Arc>>::Open(sources);
316  if (!reader || reader->Error()) return nullptr;
317  return new STListFarReader(reader);
318  }
319 
320  void Reset() final { reader_->Reset(); }
321 
322  bool Find(const std::string &key) final { return reader_->Find(key); }
323 
324  bool Done() const final { return reader_->Done(); }
325 
326  void Next() final { return reader_->Next(); }
327 
328  const std::string &GetKey() const final { return reader_->GetKey(); }
329 
330  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
331 
332  FarType Type() const final { return FarType::STLIST; }
333 
334  bool Error() const final { return reader_->Error(); }
335 
336  private:
338  : reader_(reader) {}
339 
340  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader_;
341 };
342 
343 template <class A>
344 class FstFarReader final : public FarReader<A> {
345  public:
346  using Arc = A;
347 
348  static FstFarReader *Open(const std::string &source) {
349  std::vector<std::string> sources;
350  sources.push_back(source);
351  return new FstFarReader<Arc>(sources);
352  }
353 
354  static FstFarReader *Open(const std::vector<std::string> &sources) {
355  return new FstFarReader<Arc>(sources);
356  }
357 
358  explicit FstFarReader(const std::vector<std::string> &sources)
359  : keys_(sources), has_stdin_(false), pos_(0), error_(false) {
360  std::sort(keys_.begin(), keys_.end());
361  streams_.resize(keys_.size(), nullptr);
362  for (size_t i = 0; i < keys_.size(); ++i) {
363  if (keys_[i].empty()) {
364  if (!has_stdin_) {
365  streams_[i] = &std::cin;
366  has_stdin_ = true;
367  } else {
368  FSTERROR() << "FstFarReader::FstFarReader: standard input should "
369  "only appear once in the input file list";
370  error_ = true;
371  return;
372  }
373  } else {
374  streams_[i] = new std::ifstream(
375  keys_[i], std::ios_base::in | std::ios_base::binary);
376  if (streams_[i]->fail()) {
377  FSTERROR() << "FstFarReader::FstFarReader: Error reading file: "
378  << sources[i];
379  error_ = true;
380  return;
381  }
382  }
383  }
384  if (pos_ >= keys_.size()) return;
385  ReadFst();
386  }
387 
388  void Reset() final {
389  if (has_stdin_) {
390  FSTERROR()
391  << "FstFarReader::Reset: Operation not supported on standard input";
392  error_ = true;
393  return;
394  }
395  pos_ = 0;
396  ReadFst();
397  }
398 
399  bool Find(const std::string &key) final {
400  if (has_stdin_) {
401  FSTERROR()
402  << "FstFarReader::Find: Operation not supported on standard input";
403  error_ = true;
404  return false;
405  }
406  pos_ = 0; // TODO
407  ReadFst();
408  return true;
409  }
410 
411  bool Done() const final { return error_ || pos_ >= keys_.size(); }
412 
413  void Next() final {
414  ++pos_;
415  ReadFst();
416  }
417 
418  const std::string &GetKey() const final { return keys_[pos_]; }
419 
420  const Fst<Arc> *GetFst() const final { return fst_.get(); }
421 
422  FarType Type() const final { return FarType::FST; }
423 
424  bool Error() const final { return error_; }
425 
426  ~FstFarReader() final {
427  for (size_t i = 0; i < keys_.size(); ++i) {
428  if (streams_[i] != &std::cin) {
429  delete streams_[i];
430  }
431  }
432  }
433 
434  private:
435  void ReadFst() {
436  fst_.reset();
437  if (pos_ >= keys_.size()) return;
438  streams_[pos_]->seekg(0);
439  fst_.reset(Fst<Arc>::Read(*streams_[pos_], FstReadOptions()));
440  if (!fst_) {
441  FSTERROR() << "FstFarReader: Error reading Fst from: " << keys_[pos_];
442  error_ = true;
443  }
444  }
445 
446  std::vector<std::string> keys_;
447  std::vector<std::istream *> streams_;
448  bool has_stdin_;
449  size_t pos_;
450  mutable std::unique_ptr<Fst<Arc>> fst_;
451  mutable bool error_;
452 };
453 
454 template <class Arc>
455 FarReader<Arc> *FarReader<Arc>::Open(const std::string &source) {
456  if (source.empty())
457  return STListFarReader<Arc>::Open(source);
458  else if (IsSTTable(source))
459  return STTableFarReader<Arc>::Open(source);
460  else if (IsSTList(source))
461  return STListFarReader<Arc>::Open(source);
462  else if (IsFst(source))
463  return FstFarReader<Arc>::Open(source);
464  return nullptr;
465 }
466 
467 template <class Arc>
468 FarReader<Arc> *FarReader<Arc>::Open(const std::vector<std::string> &sources) {
469  if (!sources.empty() && sources[0].empty())
470  return STListFarReader<Arc>::Open(sources);
471  else if (!sources.empty() && IsSTTable(sources[0]))
472  return STTableFarReader<Arc>::Open(sources);
473  else if (!sources.empty() && IsSTList(sources[0]))
474  return STListFarReader<Arc>::Open(sources);
475  else if (!sources.empty() && IsFst(sources[0]))
476  return FstFarReader<Arc>::Open(sources);
477  return nullptr;
478 }
479 
480 } // namespace fst
481 
482 #endif // FST_EXTENSIONS_FAR_FAR_H_
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:60
void Reset() final
Definition: far.h:320
virtual ~FarReader()
Definition: far.h:138
const std::string & FarType() const
Definition: far.h:34
static STListFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:314
void Reset() final
Definition: far.h:388
bool Error() const final
Definition: far.h:193
bool IsFst(const std::string &source)
Definition: far.h:23
bool Error() const final
Definition: far.h:168
FarType Type() const final
Definition: far.h:422
const Fst< Arc > * GetFst() const final
Definition: far.h:290
void Reset() final
Definition: far.h:280
static Fst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:241
bool Error() const final
Definition: far.h:226
static FarReader * Open(const std::string &source)
Definition: far.h:455
const std::string & GetKey() const final
Definition: far.h:328
bool Error() const final
Definition: far.h:334
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:277
void Next() final
Definition: far.h:326
#define LOG(type)
Definition: log.h:46
void Add(const std::string &key, const Fst< A > &fst) final
Definition: far.h:214
static STTableFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:274
static STListFarWriter * Create(const std::string &source)
Definition: far.h:182
bool Error() const final
Definition: far.h:424
FarWriter()
Definition: far.h:97
~FstFarWriter() final
Definition: far.h:228
FarType Type() const final
Definition: far.h:332
static FstFarReader * Open(const std::string &source)
Definition: far.h:348
virtual ~FarWriter()
Definition: far.h:94
#define FSTERROR()
Definition: util.h:36
bool Read(const std::string &source)
Definition: far.h:36
bool Done() const final
Definition: far.h:324
bool ReadSTListHeader(const std::string &source, Header *header)
Definition: stlist.h:239
bool Done() const final
Definition: far.h:411
FarReader()
Definition: far.h:141
FarType
Definition: far.h:69
const Fst< Arc > * GetFst() const final
Definition: far.h:330
Fst< Arc > * operator()(std::istream &strm, const FstReadOptions &options=FstReadOptions()) const
Definition: far.h:257
void Next() final
Definition: far.h:413
bool IsFstHeader(std::istream &, const std::string &)
Definition: fst.cc:45
static STListFarReader * Open(const std::string &source)
Definition: far.h:308
FarType Type() const final
Definition: far.h:166
static FstFarWriter * Create(const std::string &source)
Definition: far.h:210
void Next() final
Definition: far.h:286
bool ReadSTTableHeader(const std::string &source, Header *header)
Definition: sttable.h:307
~FstFarReader() final
Definition: far.h:426
static STTableFarReader * Open(const std::string &source)
Definition: far.h:268
FstFarWriter(const std::string &source)
Definition: far.h:207
const std::string & ArcType() const
Definition: far.h:32
FarType Type() const final
Definition: far.h:191
bool IsSTList(const std::string &source)
Definition: stlist.cc:12
bool Find(const std::string &key) final
Definition: far.h:399
bool IsSTTable(const std::string &source)
Definition: sttable.cc:10
FarType Type() const final
Definition: far.h:224
static FstFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:354
static FarWriter * Create(const std::string &source, FarType type=FarType::DEFAULT)
Definition: far.h:237
const Fst< Arc > * GetFst() const final
Definition: far.h:420
bool Find(const std::string &key) final
Definition: far.h:282
bool Error() const final
Definition: far.h:294
FstFarReader(const std::vector< std::string > &sources)
Definition: far.h:358
static STTableFarWriter * Create(const std::string &source)
Definition: far.h:157
const std::string & GetKey() const final
Definition: far.h:288
void operator()(std::ostream &strm, const Fst< Arc > &fst) const
Definition: far.h:147
const std::string & GetKey() const final
Definition: far.h:418
void Add(const std::string &key, const Fst< Arc > &fst) final
Definition: far.h:187
FarEntryType
Definition: far.h:21
void Add(const std::string &key, const Fst< Arc > &fst) final
Definition: far.h:162
bool Find(const std::string &key) final
Definition: far.h:322
const std::string & ArcType() const
Definition: fst.h:128
FarType Type() const final
Definition: far.h:292
bool Done() const final
Definition: far.h:284