FST  openfst-1.7.1
OpenFst Library
far.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Finite-State Transducer (FST) archive classes.
5 
6 #ifndef FST_EXTENSIONS_FAR_FAR_H_
7 #define FST_EXTENSIONS_FAR_FAR_H_
8 
9 #include <iostream>
10 #include <sstream>
11 
12 #include <fst/log.h>
15 #include <fst/fst.h>
16 #include <fst/vector-fst.h>
17 #include <fstream>
18 
19 namespace fst {
20 
22 
24 
25 inline bool IsFst(const string &filename) {
26  std::ifstream strm(filename, std::ios_base::in | std::ios_base::binary);
27  if (!strm) return false;
28  return IsFstHeader(strm, filename);
29 }
30 
31 // FST archive header class
32 class FarHeader {
33  public:
34  const string &ArcType() const { return arctype_; }
35 
36  const string &FarType() const { return fartype_; }
37 
38  bool Read(const string &filename) {
39  FstHeader fsthdr;
40  if (filename.empty()) {
41  // Header reading unsupported on stdin. Assumes STList and StdArc.
42  fartype_ = "stlist";
43  arctype_ = "standard";
44  return true;
45  } else if (IsSTTable(filename)) { // Checks if STTable.
46  ReadSTTableHeader(filename, &fsthdr);
47  fartype_ = "sttable";
48  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
49  return true;
50  } else if (IsSTList(filename)) { // Checks if STList.
51  ReadSTListHeader(filename, &fsthdr);
52  fartype_ = "stlist";
53  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
54  return true;
55  } else if (IsFst(filename)) { // Checks if FST.
56  std::ifstream istrm(filename,
57  std::ios_base::in | std::ios_base::binary);
58  fsthdr.Read(istrm, filename);
59  fartype_ = "fst";
60  arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
61  return true;
62  }
63  return false;
64  }
65 
66  private:
67  string fartype_;
68  string arctype_;
69 };
70 
71 enum FarType {
75  FAR_FST = 3,
76 };
77 
78 // This class creates an archive of FSTs.
79 template <class A>
80 class FarWriter {
81  public:
82  using Arc = A;
83 
84  // Creates a new (empty) FST archive; returns null on error.
85  static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
86 
87  // Adds an FST to the end of an archive. Keys must be non-empty and
88  // in lexicographic order. FSTs must have a suitable write method.
89  virtual void Add(const string &key, const Fst<Arc> &fst) = 0;
90 
91  virtual FarType Type() const = 0;
92 
93  virtual bool Error() const = 0;
94 
95  virtual ~FarWriter() {}
96 
97  protected:
98  FarWriter() {}
99 };
100 
101 // This class iterates through an existing archive of FSTs.
102 template <class A>
103 class FarReader {
104  public:
105  using Arc = A;
106 
107  // Opens an existing FST archive in a single file; returns null on error.
108  // Sets current position to the beginning of the achive.
109  static FarReader *Open(const string &filename);
110 
111  // Opens an existing FST archive in multiple files; returns null on error.
112  // Sets current position to the beginning of the achive.
113  static FarReader *Open(const std::vector<string> &filenames);
114 
115  // Resets current position to beginning of archive.
116  virtual void Reset() = 0;
117 
118  // Sets current position to first entry >= key. Returns true if a match.
119  virtual bool Find(const string &key) = 0;
120 
121  // Current position at end of archive?
122  virtual bool Done() const = 0;
123 
124  // Move current position to next FST.
125  virtual void Next() = 0;
126 
127  // Returns key at the current position. This reference is invalidated if
128  // the current position in the archive is changed.
129  virtual const string &GetKey() const = 0;
130 
131  // Returns pointer to FST at the current position. This is invalidated if
132  // the current position in the archive is changed.
133  virtual const Fst<Arc> *GetFst() const = 0;
134 
135  virtual FarType Type() const = 0;
136 
137  virtual bool Error() const = 0;
138 
139  virtual ~FarReader() {}
140 
141  protected:
143 };
144 
145 template <class Arc>
146 class FstWriter {
147  public:
148  void operator()(std::ostream &strm, const Fst<Arc> &fst) const {
149  fst.Write(strm, FstWriteOptions());
150  }
151 };
152 
153 template <class A>
154 class STTableFarWriter : public FarWriter<A> {
155  public:
156  using Arc = A;
157 
158  static STTableFarWriter *Create(const string &filename) {
159  auto *writer = STTableWriter<Fst<Arc>, FstWriter<Arc>>::Create(filename);
160  return new STTableFarWriter(writer);
161  }
162 
163  void Add(const string &key, const Fst<Arc> &fst) final {
164  writer_->Add(key, fst);
165  }
166 
167  FarType Type() const final { return FAR_STTABLE; }
168 
169  bool Error() const final { return writer_->Error(); }
170 
171  private:
173  : writer_(writer) {}
174 
175  std::unique_ptr<STTableWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
176 };
177 
178 template <class A>
179 class STListFarWriter : public FarWriter<A> {
180  public:
181  using Arc = A;
182 
183  static STListFarWriter *Create(const string &filename) {
184  auto *writer = STListWriter<Fst<Arc>, FstWriter<Arc>>::Create(filename);
185  return new STListFarWriter(writer);
186  }
187 
188  void Add(const string &key, const Fst<Arc> &fst) final {
189  writer_->Add(key, fst);
190  }
191 
192  constexpr FarType Type() const final { return FAR_STLIST; }
193 
194  bool Error() const final { return writer_->Error(); }
195 
196  private:
198  : writer_(writer) {}
199 
200  std::unique_ptr<STListWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
201 };
202 
203 template <class A>
204 class FstFarWriter : public FarWriter<A> {
205  public:
206  using Arc = A;
207 
208  explicit FstFarWriter(const string &filename)
209  : filename_(filename), error_(false), written_(false) {}
210 
211  static FstFarWriter *Create(const string &filename) {
212  return new FstFarWriter(filename);
213  }
214 
215  void Add(const string &key, const Fst<A> &fst) final {
216  if (written_) {
217  LOG(WARNING) << "FstFarWriter::Add: only one FST supported,"
218  << " subsequent entries discarded.";
219  } else {
220  error_ = !fst.Write(filename_);
221  written_ = true;
222  }
223  }
224 
225  constexpr FarType Type() const final { return FAR_FST; }
226 
227  bool Error() const final { return error_; }
228 
229  ~FstFarWriter() final {}
230 
231  private:
232  string filename_;
233  bool error_;
234  bool written_;
235 };
236 
237 template <class Arc>
238 FarWriter<Arc> *FarWriter<Arc>::Create(const string &filename, FarType type) {
239  switch (type) {
240  case FAR_DEFAULT:
241  if (filename.empty()) return STListFarWriter<Arc>::Create(filename);
242  case FAR_STTABLE:
243  return STTableFarWriter<Arc>::Create(filename);
244  case FAR_STLIST:
245  return STListFarWriter<Arc>::Create(filename);
246  case FAR_FST:
247  return FstFarWriter<Arc>::Create(filename);
248  default:
249  LOG(ERROR) << "FarWriter::Create: Unknown FAR type";
250  return nullptr;
251  }
252 }
253 
254 template <class Arc>
255 class FstReader {
256  public:
257  Fst<Arc> *operator()(std::istream &strm) const {
258  return Fst<Arc>::Read(strm, FstReadOptions());
259  }
260 };
261 
262 template <class A>
263 class STTableFarReader : public FarReader<A> {
264  public:
265  using Arc = A;
266 
267  static STTableFarReader *Open(const string &filename) {
268  auto *reader = STTableReader<Fst<Arc>, FstReader<Arc>>::Open(filename);
269  if (!reader || reader->Error()) return nullptr;
270  return new STTableFarReader(reader);
271  }
272 
273  static STTableFarReader *Open(const std::vector<string> &filenames) {
274  auto *reader = STTableReader<Fst<Arc>, FstReader<Arc>>::Open(filenames);
275  if (!reader || reader->Error()) return nullptr;
276  return new STTableFarReader(reader);
277  }
278 
279  void Reset() final { reader_->Reset(); }
280 
281  bool Find(const string &key) final { return reader_->Find(key); }
282 
283  bool Done() const final { return reader_->Done(); }
284 
285  void Next() final { return reader_->Next(); }
286 
287  const string &GetKey() const final { return reader_->GetKey(); }
288 
289  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
290 
291  constexpr FarType Type() const final { return FAR_STTABLE; }
292 
293  bool Error() const final { return reader_->Error(); }
294 
295  private:
297  : reader_(reader) {}
298 
299  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader_;
300 };
301 
302 template <class A>
303 class STListFarReader : public FarReader<A> {
304  public:
305  using Arc = A;
306 
307  static STListFarReader *Open(const string &filename) {
308  auto *reader = STListReader<Fst<Arc>, FstReader<Arc>>::Open(filename);
309  if (!reader || reader->Error()) return nullptr;
310  return new STListFarReader(reader);
311  }
312 
313  static STListFarReader *Open(const std::vector<string> &filenames) {
314  auto *reader = STListReader<Fst<Arc>, FstReader<Arc>>::Open(filenames);
315  if (!reader || reader->Error()) return nullptr;
316  return new STListFarReader(reader);
317  }
318 
319  void Reset() final { reader_->Reset(); }
320 
321  bool Find(const string &key) final { return reader_->Find(key); }
322 
323  bool Done() const final { return reader_->Done(); }
324 
325  void Next() final { return reader_->Next(); }
326 
327  const string &GetKey() const final { return reader_->GetKey(); }
328 
329  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
330 
331  constexpr FarType Type() const final { return FAR_STLIST; }
332 
333  bool Error() const final { return reader_->Error(); }
334 
335  private:
337  : reader_(reader) {}
338 
339  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader_;
340 };
341 
342 template <class A>
343 class FstFarReader : public FarReader<A> {
344  public:
345  using Arc = A;
346 
347  static FstFarReader *Open(const string &filename) {
348  std::vector<string> filenames;
349  filenames.push_back(filename);
350  return new FstFarReader<Arc>(filenames);
351  }
352 
353  static FstFarReader *Open(const std::vector<string> &filenames) {
354  return new FstFarReader<Arc>(filenames);
355  }
356 
357  explicit FstFarReader(const std::vector<string> &filenames)
358  : keys_(filenames), has_stdin_(false), pos_(0), error_(false) {
359  std::sort(keys_.begin(), keys_.end());
360  streams_.resize(keys_.size(), 0);
361  for (size_t i = 0; i < keys_.size(); ++i) {
362  if (keys_[i].empty()) {
363  if (!has_stdin_) {
364  streams_[i] = &std::cin;
365  has_stdin_ = true;
366  } else {
367  FSTERROR() << "FstFarReader::FstFarReader: standard input should "
368  "only appear once in the input file list";
369  error_ = true;
370  return;
371  }
372  } else {
373  streams_[i] = new std::ifstream(
374  keys_[i], std::ios_base::in | std::ios_base::binary);
375  }
376  }
377  if (pos_ >= keys_.size()) return;
378  ReadFst();
379  }
380 
381  void Reset() final {
382  if (has_stdin_) {
383  FSTERROR()
384  << "FstFarReader::Reset: Operation not supported on standard input";
385  error_ = true;
386  return;
387  }
388  pos_ = 0;
389  ReadFst();
390  }
391 
392  bool Find(const string &key) final {
393  if (has_stdin_) {
394  FSTERROR()
395  << "FstFarReader::Find: Operation not supported on standard input";
396  error_ = true;
397  return false;
398  }
399  pos_ = 0; // TODO
400  ReadFst();
401  return true;
402  }
403 
404  bool Done() const final { return error_ || pos_ >= keys_.size(); }
405 
406  void Next() final {
407  ++pos_;
408  ReadFst();
409  }
410 
411  const string &GetKey() const final { return keys_[pos_]; }
412 
413  const Fst<Arc> *GetFst() const final { return fst_.get(); }
414 
415  constexpr FarType Type() const final { return FAR_FST; }
416 
417  bool Error() const final { return error_; }
418 
419  ~FstFarReader() final {
420  for (size_t i = 0; i < keys_.size(); ++i) {
421  if (streams_[i] != &std::cin) {
422  delete streams_[i];
423  }
424  }
425  }
426 
427  private:
428  void ReadFst() {
429  fst_.reset();
430  if (pos_ >= keys_.size()) return;
431  streams_[pos_]->seekg(0);
432  fst_.reset(Fst<Arc>::Read(*streams_[pos_], FstReadOptions()));
433  if (!fst_) {
434  FSTERROR() << "FstFarReader: Error reading Fst from: " << keys_[pos_];
435  error_ = true;
436  }
437  }
438 
439  std::vector<string> keys_;
440  std::vector<std::istream *> streams_;
441  bool has_stdin_;
442  size_t pos_;
443  mutable std::unique_ptr<Fst<Arc>> fst_;
444  mutable bool error_;
445 };
446 
447 template <class Arc>
448 FarReader<Arc> *FarReader<Arc>::Open(const string &filename) {
449  if (filename.empty())
450  return STListFarReader<Arc>::Open(filename);
451  else if (IsSTTable(filename))
452  return STTableFarReader<Arc>::Open(filename);
453  else if (IsSTList(filename))
454  return STListFarReader<Arc>::Open(filename);
455  else if (IsFst(filename))
456  return FstFarReader<Arc>::Open(filename);
457  return nullptr;
458 }
459 
460 template <class Arc>
461 FarReader<Arc> *FarReader<Arc>::Open(const std::vector<string> &filenames) {
462  if (!filenames.empty() && filenames[0].empty())
463  return STListFarReader<Arc>::Open(filenames);
464  else if (!filenames.empty() && IsSTTable(filenames[0]))
465  return STTableFarReader<Arc>::Open(filenames);
466  else if (!filenames.empty() && IsSTList(filenames[0]))
467  return STListFarReader<Arc>::Open(filenames);
468  else if (!filenames.empty() && IsFst(filenames[0]))
469  return FstFarReader<Arc>::Open(filenames);
470  return nullptr;
471 }
472 
473 } // namespace fst
474 
475 #endif // FST_EXTENSIONS_FAR_FAR_H_
static STListFarReader * Open(const std::vector< string > &filenames)
Definition: far.h:313
bool Find(const string &key) final
Definition: far.h:281
void Reset() final
Definition: far.h:319
constexpr FarType Type() const final
Definition: far.h:291
virtual ~FarReader()
Definition: far.h:139
static FstFarWriter * Create(const string &filename)
Definition: far.h:211
const string & FarType() const
Definition: far.h:36
void Reset() final
Definition: far.h:381
bool Error() const final
Definition: far.h:194
bool Find(const string &key) final
Definition: far.h:392
bool Read(const string &filename)
Definition: far.h:38
bool Error() const final
Definition: far.h:169
const Fst< Arc > * GetFst() const final
Definition: far.h:289
bool IsSTTable(const string &filename)
Definition: sttable.cc:9
void Reset() final
Definition: far.h:279
bool ReadSTTableHeader(const string &filename, Header *header)
Definition: sttable.h:300
static FarWriter * Create(const string &filename, FarType type=FAR_DEFAULT)
Definition: far.h:238
bool Error() const final
Definition: far.h:227
FstFarReader(const std::vector< string > &filenames)
Definition: far.h:357
Fst< Arc > * operator()(std::istream &strm) const
Definition: far.h:257
bool IsFstHeader(std::istream &, const string &)
Definition: fst.cc:44
bool Error() const final
Definition: far.h:333
void Add(const string &key, const Fst< A > &fst) final
Definition: far.h:215
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:270
void Next() final
Definition: far.h:325
static STListFarWriter * Create(const string &filename)
Definition: far.h:183
#define LOG(type)
Definition: log.h:48
constexpr FarType Type() const final
Definition: far.h:192
bool Error() const final
Definition: far.h:417
bool Find(const string &key) final
Definition: far.h:321
FarWriter()
Definition: far.h:98
~FstFarWriter() final
Definition: far.h:229
bool IsSTList(const string &filename)
Definition: stlist.cc:11
const string & GetKey() const final
Definition: far.h:287
static STTableFarWriter * Create(const string &filename)
Definition: far.h:158
static Fst< Arc > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:234
FarTokenType
Definition: far.h:23
const string & GetKey() const final
Definition: far.h:411
virtual ~FarWriter()
Definition: far.h:95
#define FSTERROR()
Definition: util.h:35
static FarReader * Open(const string &filename)
Definition: far.h:448
bool Done() const final
Definition: far.h:323
static STTableFarReader * Open(const string &filename)
Definition: far.h:267
const string & ArcType() const
Definition: fst.h:121
bool Done() const final
Definition: far.h:404
FarReader()
Definition: far.h:142
void Add(const string &key, const Fst< Arc > &fst) final
Definition: far.h:188
FarType
Definition: far.h:71
constexpr FarType Type() const final
Definition: far.h:225
bool Read(std::istream &strm, const string &source, bool rewind=false)
Definition: fst.cc:58
const Fst< Arc > * GetFst() const final
Definition: far.h:329
static STTableFarReader * Open(const std::vector< string > &filenames)
Definition: far.h:273
void Next() final
Definition: far.h:406
const string & GetKey() const final
Definition: far.h:327
static FstFarReader * Open(const std::vector< string > &filenames)
Definition: far.h:353
void Add(const string &key, const Fst< Arc > &fst) final
Definition: far.h:163
static STListFarReader * Open(const string &filename)
Definition: far.h:307
FarType Type() const final
Definition: far.h:167
static FstFarReader * Open(const string &filename)
Definition: far.h:347
void Next() final
Definition: far.h:285
~FstFarReader() final
Definition: far.h:419
bool ReadSTListHeader(const string &filename, Header *header)
Definition: stlist.h:231
bool IsFst(const string &filename)
Definition: far.h:25
const string & ArcType() const
Definition: far.h:34
FstFarWriter(const string &filename)
Definition: far.h:208
const Fst< Arc > * GetFst() const final
Definition: far.h:413
bool Error() const final
Definition: far.h:293
constexpr FarType Type() const final
Definition: far.h:415
void operator()(std::ostream &strm, const Fst< Arc > &fst) const
Definition: far.h:148
FarEntryType
Definition: far.h:21
constexpr FarType Type() const final
Definition: far.h:331
bool Done() const final
Definition: far.h:283