FST  openfst-1.8.2
OpenFst Library
far.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Finite-State Transducer (FST) archive classes.
19 
20 #ifndef FST_EXTENSIONS_FAR_FAR_H_
21 #define FST_EXTENSIONS_FAR_FAR_H_
22 
23 #include <algorithm>
24 #include <cstdint>
25 #include <iostream>
26 #include <sstream>
27 #include <string>
28 #include <utility>
29 
30 #include <fst/log.h>
33 #include <fst/arc.h>
34 #include <fstream>
35 #include <fst/fst.h>
36 #include <fst/vector-fst.h>
37 #include <string_view>
38 
39 namespace fst {
40 
41 enum class FarEntryType { LINE, FILE };
42 
43 enum class FarType {
44  DEFAULT = 0,
45  STTABLE = 1,
46  STLIST = 2,
47  FST = 3,
48 };
49 
50 // Checks for FST magic number in an input stream (to be opened given the source
51 // name), to indicate to the caller function that the stream content is an FST
52 // header.
53 inline bool IsFst(const std::string &source) {
54  std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
55  if (!strm) return false;
56  int32_t magic_number = 0;
57  ReadType(strm, &magic_number);
58  bool match = magic_number == kFstMagicNumber;
59  return match;
60 }
61 
62 // FST archive header class
63 class FarHeader {
64  public:
65  const std::string &ArcType() const { return arctype_; }
66 
67  enum FarType FarType() const { return fartype_; }
68 
69  bool Read(const std::string &source) {
70  FstHeader fsthdr;
71  arctype_ = "unknown";
72  if (source.empty()) {
73  // Header reading unsupported on stdin. Assumes STList and StdArc.
74  fartype_ = FarType::STLIST;
75  arctype_ = "standard";
76  return true;
77  } else if (IsSTTable(source)) { // Checks if STTable.
78  fartype_ = FarType::STTABLE;
79  if (!ReadSTTableHeader(source, &fsthdr)) return false;
80  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
81  return true;
82  } else if (IsSTList(source)) { // Checks if STList.
83  fartype_ = FarType::STLIST;
84  if (!ReadSTListHeader(source, &fsthdr)) return false;
85  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
86  return true;
87  } else if (IsFst(source)) { // Checks if FST.
88  fartype_ = FarType::FST;
89  std::ifstream istrm(source,
90  std::ios_base::in | std::ios_base::binary);
91  if (!fsthdr.Read(istrm, source)) return false;
92  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
93  return true;
94  }
95  return false;
96  }
97 
98  private:
99  enum FarType fartype_;
100  std::string arctype_;
101 };
102 
103 // This class creates an archive of FSTs.
104 template <class A>
105 class FarWriter {
106  public:
107  using Arc = A;
108 
109  // Creates a new (empty) FST archive; returns null on error.
110  static FarWriter *Create(const std::string &source,
111  FarType type = FarType::DEFAULT);
112 
113  // Adds an FST to the end of an archive. Keys must be non-empty and
114  // in lexicographic order. FSTs must have a suitable write method.
115  virtual void Add(std::string_view key, const Fst<Arc> &fst) = 0;
116 
117  virtual FarType Type() const = 0;
118 
119  virtual bool Error() const = 0;
120 
121  virtual ~FarWriter() {}
122 
123  protected:
125 };
126 
127 // This class iterates through an existing archive of FSTs.
128 template <class A>
129 class FarReader {
130  public:
131  using Arc = A;
132 
133  // Opens an existing FST archive in a single file; returns null on error.
134  // Sets current position to the beginning of the achive.
135  static FarReader *Open(const std::string &source);
136 
137  // Opens an existing FST archive in multiple files; returns null on error.
138  // Sets current position to the beginning of the achive.
139  static FarReader *Open(const std::vector<std::string> &sources);
140 
141  // Resets current position to beginning of archive.
142  virtual void Reset() = 0;
143 
144  // Sets current position to first entry >= key. Returns true if a match.
145  virtual bool Find(std::string_view key) = 0;
146 
147  // Current position at end of archive?
148  virtual bool Done() const = 0;
149 
150  // Move current position to next FST.
151  virtual void Next() = 0;
152 
153  // Returns key at the current position. This reference is invalidated if
154  // the current position in the archive is changed.
155  virtual const std::string &GetKey() const = 0;
156 
157  // Returns pointer to FST at the current position. This is invalidated if
158  // the current position in the archive is changed.
159  virtual const Fst<Arc> *GetFst() const = 0;
160 
161  virtual FarType Type() const = 0;
162 
163  virtual bool Error() const = 0;
164 
165  virtual ~FarReader() {}
166 
167  protected:
169 };
170 
171 template <class Arc>
172 class FstWriter {
173  public:
174  void operator()(std::ostream &strm, const Fst<Arc> &fst) const {
175  fst.Write(strm, FstWriteOptions());
176  }
177 };
178 
179 template <class A>
180 class STTableFarWriter : public FarWriter<A> {
181  public:
182  using Arc = A;
183 
184  static STTableFarWriter *Create(const std::string &source) {
185  auto *writer = STTableWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
186  return new STTableFarWriter(writer);
187  }
188 
189  void Add(std::string_view key, const Fst<Arc> &fst) final {
190  writer_->Add(key, fst);
191  }
192 
193  FarType Type() const final { return FarType::STTABLE; }
194 
195  bool Error() const final { return writer_->Error(); }
196 
197  private:
199  : writer_(writer) {}
200 
201  std::unique_ptr<STTableWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
202 };
203 
204 template <class A>
205 class STListFarWriter : public FarWriter<A> {
206  public:
207  using Arc = A;
208 
209  static STListFarWriter *Create(const std::string &source) {
210  auto *writer = STListWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
211  return new STListFarWriter(writer);
212  }
213 
214  void Add(std::string_view key, const Fst<Arc> &fst) final {
215  writer_->Add(key, fst);
216  }
217 
218  FarType Type() const final { return FarType::STLIST; }
219 
220  bool Error() const final { return writer_->Error(); }
221 
222  private:
224  : writer_(writer) {}
225 
226  std::unique_ptr<STListWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
227 };
228 
229 template <class A>
230 class FstFarWriter final : public FarWriter<A> {
231  public:
232  using Arc = A;
233 
234  explicit FstFarWriter(const std::string &source)
235  : source_(source), error_(false), written_(false) {}
236 
237  static FstFarWriter *Create(const std::string &source) {
238  return new FstFarWriter(source);
239  }
240 
241  void Add(std::string_view key, const Fst<A> &fst) final {
242  if (written_) {
243  LOG(WARNING) << "FstFarWriter::Add: only one FST supported,"
244  << " subsequent entries discarded.";
245  } else {
246  error_ = !fst.Write(source_);
247  written_ = true;
248  }
249  }
250 
251  FarType Type() const final { return FarType::FST; }
252 
253  bool Error() const final { return error_; }
254 
255  ~FstFarWriter() final {}
256 
257  private:
258  std::string source_;
259  bool error_;
260  bool written_;
261 };
262 
263 template <class Arc>
264 FarWriter<Arc> *FarWriter<Arc>::Create(const std::string &source,
265  FarType type) {
266  switch (type) {
267  case FarType::DEFAULT:
268  if (source.empty()) return STListFarWriter<Arc>::Create(source);
269  case FarType::STTABLE:
270  return STTableFarWriter<Arc>::Create(source);
271  case FarType::STLIST:
272  return STListFarWriter<Arc>::Create(source);
273  case FarType::FST:
274  return FstFarWriter<Arc>::Create(source);
275  default:
276  LOG(ERROR) << "FarWriter::Create: Unknown FAR type";
277  return nullptr;
278  }
279 }
280 
281 template <class Arc>
282 class FstReader {
283  public:
284  Fst<Arc> *operator()(std::istream &strm,
285  const FstReadOptions &options = FstReadOptions()) const {
286  return Fst<Arc>::Read(strm, options);
287  }
288 };
289 
290 template <class A>
291 class STTableFarReader : public FarReader<A> {
292  public:
293  using Arc = A;
294 
295  static STTableFarReader *Open(const std::string &source) {
296  auto reader =
298  if (!reader || reader->Error()) return nullptr;
299  return new STTableFarReader(std::move(reader));
300  }
301 
302  static STTableFarReader *Open(const std::vector<std::string> &sources) {
303  auto reader = fst::WrapUnique(
304  STTableReader<Fst<Arc>, FstReader<Arc>>::Open(sources));
305  if (!reader || reader->Error()) return nullptr;
306  return new STTableFarReader(std::move(reader));
307  }
308 
309  void Reset() final { reader_->Reset(); }
310 
311  bool Find(std::string_view key) final { return reader_->Find(key); }
312 
313  bool Done() const final { return reader_->Done(); }
314 
315  void Next() final { return reader_->Next(); }
316 
317  const std::string &GetKey() const final { return reader_->GetKey(); }
318 
319  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
320 
321  FarType Type() const final { return FarType::STTABLE; }
322 
323  bool Error() const final { return reader_->Error(); }
324 
325  private:
326  explicit STTableFarReader(
327  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader)
328  : reader_(std::move(reader)) {}
329 
330  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader_;
331 };
332 
333 template <class A>
334 class STListFarReader : public FarReader<A> {
335  public:
336  using Arc = A;
337 
338  static STListFarReader *Open(const std::string &source) {
339  auto reader =
341  if (!reader || reader->Error()) return nullptr;
342  return new STListFarReader(std::move(reader));
343  }
344 
345  static STListFarReader *Open(const std::vector<std::string> &sources) {
346  auto reader =
348  if (!reader || reader->Error()) return nullptr;
349  return new STListFarReader(std::move(reader));
350  }
351 
352  void Reset() final { reader_->Reset(); }
353 
354  bool Find(std::string_view key) final { return reader_->Find(key); }
355 
356  bool Done() const final { return reader_->Done(); }
357 
358  void Next() final { return reader_->Next(); }
359 
360  const std::string &GetKey() const final { return reader_->GetKey(); }
361 
362  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
363 
364  FarType Type() const final { return FarType::STLIST; }
365 
366  bool Error() const final { return reader_->Error(); }
367 
368  private:
369  explicit STListFarReader(
370  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader)
371  : reader_(std::move(reader)) {}
372 
373  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader_;
374 };
375 
376 template <class A>
377 class FstFarReader final : public FarReader<A> {
378  public:
379  using Arc = A;
380 
381  static FstFarReader *Open(const std::string &source) {
382  std::vector<std::string> sources;
383  sources.push_back(source);
384  return new FstFarReader<Arc>(sources);
385  }
386 
387  static FstFarReader *Open(const std::vector<std::string> &sources) {
388  return new FstFarReader<Arc>(sources);
389  }
390 
391  explicit FstFarReader(const std::vector<std::string> &sources)
392  : keys_(sources), has_stdin_(false), pos_(0), error_(false) {
393  std::sort(keys_.begin(), keys_.end());
394  streams_.resize(keys_.size(), nullptr);
395  for (size_t i = 0; i < keys_.size(); ++i) {
396  if (keys_[i].empty()) {
397  if (!has_stdin_) {
398  streams_[i] = &std::cin;
399  has_stdin_ = true;
400  } else {
401  FSTERROR() << "FstFarReader::FstFarReader: standard input should "
402  "only appear once in the input file list";
403  error_ = true;
404  return;
405  }
406  } else {
407  streams_[i] = new std::ifstream(
408  keys_[i], std::ios_base::in | std::ios_base::binary);
409  if (streams_[i]->fail()) {
410  FSTERROR() << "FstFarReader::FstFarReader: Error reading file: "
411  << sources[i];
412  error_ = true;
413  return;
414  }
415  }
416  }
417  if (pos_ >= keys_.size()) return;
418  ReadFst();
419  }
420 
421  void Reset() final {
422  if (has_stdin_) {
423  FSTERROR()
424  << "FstFarReader::Reset: Operation not supported on standard input";
425  error_ = true;
426  return;
427  }
428  pos_ = 0;
429  ReadFst();
430  }
431 
432  bool Find(std::string_view key) final {
433  if (has_stdin_) {
434  FSTERROR()
435  << "FstFarReader::Find: Operation not supported on standard input";
436  error_ = true;
437  return false;
438  }
439  pos_ = 0; // TODO
440  ReadFst();
441  return true;
442  }
443 
444  bool Done() const final { return error_ || pos_ >= keys_.size(); }
445 
446  void Next() final {
447  ++pos_;
448  ReadFst();
449  }
450 
451  const std::string &GetKey() const final { return keys_[pos_]; }
452 
453  const Fst<Arc> *GetFst() const final { return fst_.get(); }
454 
455  FarType Type() const final { return FarType::FST; }
456 
457  bool Error() const final { return error_; }
458 
459  ~FstFarReader() final {
460  for (size_t i = 0; i < keys_.size(); ++i) {
461  if (streams_[i] != &std::cin) {
462  delete streams_[i];
463  }
464  }
465  }
466 
467  private:
468  void ReadFst() {
469  fst_.reset();
470  if (pos_ >= keys_.size()) return;
471  streams_[pos_]->seekg(0);
472  fst_.reset(Fst<Arc>::Read(*streams_[pos_], FstReadOptions()));
473  if (!fst_) {
474  FSTERROR() << "FstFarReader: Error reading Fst from: " << keys_[pos_];
475  error_ = true;
476  }
477  }
478 
479  std::vector<std::string> keys_;
480  std::vector<std::istream *> streams_;
481  bool has_stdin_;
482  size_t pos_;
483  mutable std::unique_ptr<Fst<Arc>> fst_;
484  mutable bool error_;
485 };
486 
487 template <class Arc>
488 FarReader<Arc> *FarReader<Arc>::Open(const std::string &source) {
489  if (source.empty())
490  return STListFarReader<Arc>::Open(source);
491  else if (IsSTTable(source))
492  return STTableFarReader<Arc>::Open(source);
493  else if (IsSTList(source))
494  return STListFarReader<Arc>::Open(source);
495  else if (IsFst(source))
496  return FstFarReader<Arc>::Open(source);
497  return nullptr;
498 }
499 
500 template <class Arc>
501 FarReader<Arc> *FarReader<Arc>::Open(const std::vector<std::string> &sources) {
502  if (!sources.empty() && sources[0].empty())
503  return STListFarReader<Arc>::Open(sources);
504  else if (!sources.empty() && IsSTTable(sources[0]))
505  return STTableFarReader<Arc>::Open(sources);
506  else if (!sources.empty() && IsSTList(sources[0]))
507  return STListFarReader<Arc>::Open(sources);
508  else if (!sources.empty() && IsFst(sources[0]))
509  return FstFarReader<Arc>::Open(sources);
510  return nullptr;
511 }
512 
513 } // namespace fst
514 
515 #endif // FST_EXTENSIONS_FAR_FAR_H_
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:50
void Reset() final
Definition: far.h:352
virtual ~FarReader()
Definition: far.h:165
constexpr int32_t kFstMagicNumber
Definition: fst.h:55
static STListFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:345
bool Find(std::string_view key) final
Definition: far.h:311
void Reset() final
Definition: far.h:421
bool Error() const final
Definition: far.h:220
bool IsFst(const std::string &source)
Definition: far.h:53
bool Error() const final
Definition: far.h:195
FarType Type() const final
Definition: far.h:455
const Fst< Arc > * GetFst() const final
Definition: far.h:319
void Create(const std::vector< std::string > &sources, FarWriter< Arc > &writer, int32_t generate_keys, const std::string &key_prefix, const std::string &key_suffix)
Definition: create.h:35
void Reset() final
Definition: far.h:309
static Fst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:257
bool Error() const final
Definition: far.h:253
static FarReader * Open(const std::string &source)
Definition: far.h:488
const std::string & GetKey() const final
Definition: far.h:360
bool Error() const final
Definition: far.h:366
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:293
void Next() final
Definition: far.h:358
#define LOG(type)
Definition: log.h:49
static STTableFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:302
static const std::string & Type()
Definition: arc.h:68
static STListFarWriter * Create(const std::string &source)
Definition: far.h:209
bool Error() const final
Definition: far.h:457
FarWriter()
Definition: far.h:124
std::unique_ptr< T > WrapUnique(T *ptr)
Definition: compat.h:125
~FstFarWriter() final
Definition: far.h:255
FarType Type() const final
Definition: far.h:364
static FstFarReader * Open(const std::string &source)
Definition: far.h:381
virtual ~FarWriter()
Definition: far.h:121
#define FSTERROR()
Definition: util.h:53
bool Read(const std::string &source)
Definition: far.h:69
bool Done() const final
Definition: far.h:356
bool ReadSTListHeader(const std::string &source, Header *header)
Definition: stlist.h:256
bool Done() const final
Definition: far.h:444
FarReader()
Definition: far.h:168
FarType
Definition: far.h:43
bool Find(std::string_view key) final
Definition: far.h:354
const Fst< Arc > * GetFst() const final
Definition: far.h:362
void Add(std::string_view key, const Fst< Arc > &fst) final
Definition: far.h:214
Fst< Arc > * operator()(std::istream &strm, const FstReadOptions &options=FstReadOptions()) const
Definition: far.h:284
void Next() final
Definition: far.h:446
static STListFarReader * Open(const std::string &source)
Definition: far.h:338
FarType Type() const final
Definition: far.h:193
static FstFarWriter * Create(const std::string &source)
Definition: far.h:237
bool Find(std::string_view key) final
Definition: far.h:432
void Next() final
Definition: far.h:315
bool ReadSTTableHeader(const std::string &source, Header *header)
Definition: sttable.h:330
~FstFarReader() final
Definition: far.h:459
static STTableFarReader * Open(const std::string &source)
Definition: far.h:295
FstFarWriter(const std::string &source)
Definition: far.h:234
const std::string & ArcType() const
Definition: far.h:65
FarType Type() const final
Definition: far.h:218
bool IsSTList(const std::string &source)
Definition: stlist.cc:28
bool IsSTTable(const std::string &source)
Definition: sttable.cc:27
FarType Type() const final
Definition: far.h:251
static FstFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:387
static FarWriter * Create(const std::string &source, FarType type=FarType::DEFAULT)
Definition: far.h:264
const Fst< Arc > * GetFst() const final
Definition: far.h:453
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:65
bool Error() const final
Definition: far.h:323
FstFarReader(const std::vector< std::string > &sources)
Definition: far.h:391
void Add(std::string_view key, const Fst< A > &fst) final
Definition: far.h:241
static STTableFarWriter * Create(const std::string &source)
Definition: far.h:184
const std::string & GetKey() const final
Definition: far.h:317
void operator()(std::ostream &strm, const Fst< Arc > &fst) const
Definition: far.h:174
const std::string & GetKey() const final
Definition: far.h:451
void Add(std::string_view key, const Fst< Arc > &fst) final
Definition: far.h:189
FarEntryType
Definition: far.h:41
const std::string & ArcType() const
Definition: fst.h:144
FarType Type() const final
Definition: far.h:321
bool Done() const final
Definition: far.h:313