FST  openfst-1.8.3
OpenFst Library
far.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Finite-State Transducer (FST) archive classes.
19 
20 #ifndef FST_EXTENSIONS_FAR_FAR_H_
21 #define FST_EXTENSIONS_FAR_FAR_H_
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <cstdint>
26 #include <ios>
27 #include <iostream>
28 #include <istream>
29 #include <memory>
30 #include <ostream>
31 #include <sstream>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 #include <fst/log.h>
39 #include <fst/arc.h>
40 #include <fstream>
41 #include <fst/fst.h>
42 #include <fst/properties.h>
43 #include <fst/util.h>
44 #include <fst/vector-fst.h>
45 #include <string_view>
46 
47 namespace fst {
48 
49 enum class FarEntryType { LINE, FILE };
50 
51 enum class FarType {
52  DEFAULT = 0,
53  STTABLE = 1,
54  STLIST = 2,
55  FST = 3,
56 };
57 
58 // Checks for FST magic number in an input stream (to be opened given the source
59 // name), to indicate to the caller function that the stream content is an FST
60 // header.
61 inline bool IsFst(std::string_view source) {
62  std::ifstream strm(std::string(source),
63  std::ios_base::in | std::ios_base::binary);
64  if (!strm) return false;
65  int32_t magic_number = 0;
66  ReadType(strm, &magic_number);
67  bool match = magic_number == kFstMagicNumber;
68  return match;
69 }
70 
71 // FST archive header class
72 class FarHeader {
73  public:
74  const std::string &ArcType() const { return arctype_; }
75 
76  enum FarType FarType() const { return fartype_; }
77 
78  bool Read(const std::string &source) {
79  FstHeader fsthdr;
80  arctype_ = "unknown";
81  if (source.empty()) {
82  // Header reading unsupported on stdin. Assumes STList and StdArc.
83  fartype_ = FarType::STLIST;
84  arctype_ = "standard";
85  return true;
86  } else if (IsSTTable(source)) { // Checks if STTable.
87  fartype_ = FarType::STTABLE;
88  if (!ReadSTTableHeader(source, &fsthdr)) return false;
89  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
90  return true;
91  } else if (IsSTList(source)) { // Checks if STList.
92  fartype_ = FarType::STLIST;
93  if (!ReadSTListHeader(source, &fsthdr)) return false;
94  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
95  return true;
96  } else if (IsFst(source)) { // Checks if FST.
97  fartype_ = FarType::FST;
98  std::ifstream istrm(source,
99  std::ios_base::in | std::ios_base::binary);
100  if (!fsthdr.Read(istrm, source)) return false;
101  arctype_ = fsthdr.ArcType().empty() ? ErrorArc::Type() : fsthdr.ArcType();
102  return true;
103  }
104  return false;
105  }
106 
107  private:
108  enum FarType fartype_;
109  std::string arctype_;
110 };
111 
112 // This class creates an archive of FSTs.
113 template <class A>
114 class FarWriter {
115  public:
116  using Arc = A;
117 
118  // Creates a new (empty) FST archive; returns null on error.
119  static FarWriter *Create(std::string_view source,
120  FarType type = FarType::DEFAULT);
121 
122  // Adds an FST to the end of an archive. Keys must be non-empty and
123  // in lexicographic order. FSTs must have a suitable write method.
124  virtual void Add(std::string_view key, const Fst<Arc> &fst) = 0;
125 
126  virtual FarType Type() const = 0;
127 
128  virtual bool Error() const = 0;
129 
130  virtual ~FarWriter() = default;
131 
132  protected:
133  FarWriter() = default;
134 };
135 
136 // This class iterates through an existing archive of FSTs.
137 template <class A>
138 class FarReader {
139  public:
140  using Arc = A;
141 
142  // Opens an existing FST archive in a single file; returns null on error.
143  // Sets current position to the beginning of the achive.
144  static FarReader *Open(const std::string &source);
145 
146  // Opens an existing FST archive in multiple files; returns null on error.
147  // Sets current position to the beginning of the achive.
148  static FarReader *Open(const std::vector<std::string> &sources);
149 
150  // Resets current position to beginning of archive.
151  virtual void Reset() = 0;
152 
153  // Sets current position to first entry >= key. Returns true if a match.
154  virtual bool Find(std::string_view key) = 0;
155 
156  // Current position at end of archive?
157  virtual bool Done() const = 0;
158 
159  // Move current position to next FST.
160  virtual void Next() = 0;
161 
162  // Returns key at the current position. This reference is invalidated if
163  // the current position in the archive is changed.
164  virtual const std::string &GetKey() const = 0;
165 
166  // Returns pointer to FST at the current position. This is invalidated if
167  // the current position in the archive is changed.
168  virtual const Fst<Arc> *GetFst() const = 0;
169 
170  virtual FarType Type() const = 0;
171 
172  virtual bool Error() const = 0;
173 
174  virtual ~FarReader() = default;
175 
176  protected:
177  FarReader() = default;
178 };
179 
180 template <class Arc>
181 class FstWriter {
182  public:
183  void operator()(std::ostream &strm, const Fst<Arc> &fst) const {
184  fst.Write(strm, FstWriteOptions());
185  }
186 };
187 
188 template <class A>
189 class STTableFarWriter : public FarWriter<A> {
190  public:
191  using Arc = A;
192 
193  static STTableFarWriter *Create(std::string_view source) {
194  auto *writer = STTableWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
195  return new STTableFarWriter(writer);
196  }
197 
198  void Add(std::string_view key, const Fst<Arc> &fst) final {
199  writer_->Add(key, fst);
200  }
201 
202  FarType Type() const final { return FarType::STTABLE; }
203 
204  bool Error() const final { return writer_->Error(); }
205 
206  private:
208  : writer_(writer) {}
209 
210  std::unique_ptr<STTableWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
211 };
212 
213 template <class A>
214 class STListFarWriter : public FarWriter<A> {
215  public:
216  using Arc = A;
217 
218  static STListFarWriter *Create(std::string_view source) {
219  auto *writer = STListWriter<Fst<Arc>, FstWriter<Arc>>::Create(source);
220  return new STListFarWriter(writer);
221  }
222 
223  void Add(std::string_view key, const Fst<Arc> &fst) final {
224  writer_->Add(key, fst);
225  }
226 
227  FarType Type() const final { return FarType::STLIST; }
228 
229  bool Error() const final { return writer_->Error(); }
230 
231  private:
233  : writer_(writer) {}
234 
235  std::unique_ptr<STListWriter<Fst<Arc>, FstWriter<Arc>>> writer_;
236 };
237 
238 template <class A>
239 class FstFarWriter final : public FarWriter<A> {
240  public:
241  using Arc = A;
242 
243  explicit FstFarWriter(std::string_view source)
244  : source_(source), error_(false), written_(false) {}
245 
246  static FstFarWriter *Create(std::string_view source) {
247  return new FstFarWriter(source);
248  }
249 
250  void Add(std::string_view key, const Fst<A> &fst) final {
251  if (written_) {
252  LOG(WARNING) << "FstFarWriter::Add: only one FST supported,"
253  << " subsequent entries discarded.";
254  } else {
255  error_ = !fst.Write(source_);
256  written_ = true;
257  }
258  }
259 
260  FarType Type() const final { return FarType::FST; }
261 
262  bool Error() const final { return error_; }
263 
264  ~FstFarWriter() final = default;
265 
266  private:
267  std::string source_;
268  bool error_;
269  bool written_;
270 };
271 
272 template <class Arc>
273 FarWriter<Arc> *FarWriter<Arc>::Create(std::string_view source, FarType type) {
274  switch (type) {
275  case FarType::DEFAULT:
276  if (source.empty()) return STListFarWriter<Arc>::Create(source);
277  [[fallthrough]];
278  case FarType::STTABLE:
279  return STTableFarWriter<Arc>::Create(source);
280  case FarType::STLIST:
281  return STListFarWriter<Arc>::Create(source);
282  case FarType::FST:
283  return FstFarWriter<Arc>::Create(source);
284  default:
285  LOG(ERROR) << "FarWriter::Create: Unknown FAR type";
286  return nullptr;
287  }
288 }
289 
290 template <class Arc>
291 class FstReader {
292  public:
293  Fst<Arc> *operator()(std::istream &strm,
294  const FstReadOptions &options = FstReadOptions()) const {
295  return Fst<Arc>::Read(strm, options);
296  }
297 };
298 
299 template <class A>
300 class STTableFarReader : public FarReader<A> {
301  public:
302  using Arc = A;
303 
304  static STTableFarReader *Open(std::string_view source) {
305  auto reader =
307  if (!reader || reader->Error()) return nullptr;
308  return new STTableFarReader(std::move(reader));
309  }
310 
311  static STTableFarReader *Open(const std::vector<std::string> &sources) {
312  auto reader = fst::WrapUnique(
313  STTableReader<Fst<Arc>, FstReader<Arc>>::Open(sources));
314  if (!reader || reader->Error()) return nullptr;
315  return new STTableFarReader(std::move(reader));
316  }
317 
318  void Reset() final { reader_->Reset(); }
319 
320  bool Find(std::string_view key) final { return reader_->Find(key); }
321 
322  bool Done() const final { return reader_->Done(); }
323 
324  void Next() final { return reader_->Next(); }
325 
326  const std::string &GetKey() const final { return reader_->GetKey(); }
327 
328  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
329 
330  FarType Type() const final { return FarType::STTABLE; }
331 
332  bool Error() const final { return reader_->Error(); }
333 
334  private:
335  explicit STTableFarReader(
336  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader)
337  : reader_(std::move(reader)) {}
338 
339  std::unique_ptr<STTableReader<Fst<Arc>, FstReader<Arc>>> reader_;
340 };
341 
342 template <class A>
343 class STListFarReader : public FarReader<A> {
344  public:
345  using Arc = A;
346 
347  static STListFarReader *Open(std::string_view source) {
348  auto reader =
350  if (!reader || reader->Error()) return nullptr;
351  return new STListFarReader(std::move(reader));
352  }
353 
354  static STListFarReader *Open(const std::vector<std::string> &sources) {
355  auto reader =
357  if (!reader || reader->Error()) return nullptr;
358  return new STListFarReader(std::move(reader));
359  }
360 
361  void Reset() final { reader_->Reset(); }
362 
363  bool Find(std::string_view key) final { return reader_->Find(key); }
364 
365  bool Done() const final { return reader_->Done(); }
366 
367  void Next() final { return reader_->Next(); }
368 
369  const std::string &GetKey() const final { return reader_->GetKey(); }
370 
371  const Fst<Arc> *GetFst() const final { return reader_->GetEntry(); }
372 
373  FarType Type() const final { return FarType::STLIST; }
374 
375  bool Error() const final { return reader_->Error(); }
376 
377  private:
378  explicit STListFarReader(
379  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader)
380  : reader_(std::move(reader)) {}
381 
382  std::unique_ptr<STListReader<Fst<Arc>, FstReader<Arc>>> reader_;
383 };
384 
385 template <class A>
386 class FstFarReader final : public FarReader<A> {
387  public:
388  using Arc = A;
389 
390  static FstFarReader *Open(std::string_view source) {
391  std::vector<std::string> sources;
392  sources.push_back(std::string(source));
393  return new FstFarReader<Arc>(sources);
394  }
395 
396  static FstFarReader *Open(const std::vector<std::string> &sources) {
397  return new FstFarReader<Arc>(sources);
398  }
399 
400  explicit FstFarReader(const std::vector<std::string> &sources)
401  : keys_(sources), has_stdin_(false), pos_(0), error_(false) {
402  std::sort(keys_.begin(), keys_.end());
403  streams_.resize(keys_.size(), nullptr);
404  for (size_t i = 0; i < keys_.size(); ++i) {
405  if (keys_[i].empty()) {
406  if (!has_stdin_) {
407  streams_[i] = &std::cin;
408  has_stdin_ = true;
409  } else {
410  FSTERROR() << "FstFarReader::FstFarReader: standard input should "
411  "only appear once in the input file list";
412  error_ = true;
413  return;
414  }
415  } else {
416  streams_[i] = new std::ifstream(
417  keys_[i], std::ios_base::in | std::ios_base::binary);
418  if (streams_[i]->fail()) {
419  FSTERROR() << "FstFarReader::FstFarReader: Error reading file: "
420  << sources[i];
421  error_ = true;
422  return;
423  }
424  }
425  }
426  if (pos_ >= keys_.size()) return;
427  ReadFst();
428  }
429 
430  void Reset() final {
431  if (has_stdin_) {
432  FSTERROR()
433  << "FstFarReader::Reset: Operation not supported on standard input";
434  error_ = true;
435  return;
436  }
437  pos_ = 0;
438  ReadFst();
439  }
440 
441  bool Find(std::string_view key) final {
442  if (has_stdin_) {
443  FSTERROR()
444  << "FstFarReader::Find: Operation not supported on standard input";
445  error_ = true;
446  return false;
447  }
448  pos_ = 0; // TODO
449  ReadFst();
450  return true;
451  }
452 
453  bool Done() const final { return error_ || pos_ >= keys_.size(); }
454 
455  void Next() final {
456  ++pos_;
457  ReadFst();
458  }
459 
460  const std::string &GetKey() const final { return keys_[pos_]; }
461 
462  const Fst<Arc> *GetFst() const final { return fst_.get(); }
463 
464  FarType Type() const final { return FarType::FST; }
465 
466  bool Error() const final { return error_; }
467 
468  ~FstFarReader() final {
469  for (size_t i = 0; i < keys_.size(); ++i) {
470  if (streams_[i] != &std::cin) {
471  delete streams_[i];
472  }
473  }
474  }
475 
476  private:
477  void ReadFst() {
478  fst_.reset();
479  if (pos_ >= keys_.size()) return;
480  streams_[pos_]->seekg(0);
481  fst_.reset(Fst<Arc>::Read(*streams_[pos_], FstReadOptions()));
482  if (!fst_) {
483  FSTERROR() << "FstFarReader: Error reading Fst from: " << keys_[pos_];
484  error_ = true;
485  }
486  }
487 
488  std::vector<std::string> keys_;
489  std::vector<std::istream *> streams_;
490  bool has_stdin_;
491  size_t pos_;
492  mutable std::unique_ptr<Fst<Arc>> fst_;
493  mutable bool error_;
494 };
495 
496 template <class Arc>
497 FarReader<Arc> *FarReader<Arc>::Open(const std::string &source) {
498  if (source.empty())
499  return STListFarReader<Arc>::Open(source);
500  else if (IsSTTable(source))
501  return STTableFarReader<Arc>::Open(source);
502  else if (IsSTList(source))
503  return STListFarReader<Arc>::Open(source);
504  else if (IsFst(source))
505  return FstFarReader<Arc>::Open(source);
506  return nullptr;
507 }
508 
509 template <class Arc>
510 FarReader<Arc> *FarReader<Arc>::Open(const std::vector<std::string> &sources) {
511  if (!sources.empty() && sources[0].empty())
512  return STListFarReader<Arc>::Open(sources);
513  else if (!sources.empty() && IsSTTable(sources[0]))
514  return STTableFarReader<Arc>::Open(sources);
515  else if (!sources.empty() && IsSTList(sources[0]))
516  return STListFarReader<Arc>::Open(sources);
517  else if (!sources.empty() && IsFst(sources[0]))
518  return FstFarReader<Arc>::Open(sources);
519  return nullptr;
520 }
521 
522 } // namespace fst
523 
524 #endif // FST_EXTENSIONS_FAR_FAR_H_
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:56
void Reset() final
Definition: far.h:361
constexpr int32_t kFstMagicNumber
Definition: fst.h:57
static STListFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:354
bool Find(std::string_view key) final
Definition: far.h:320
void Reset() final
Definition: far.h:430
bool Error() const final
Definition: far.h:229
bool Error() const final
Definition: far.h:204
FarType Type() const final
Definition: far.h:464
const Fst< Arc > * GetFst() const final
Definition: far.h:328
void Create(const std::vector< std::string > &sources, FarWriter< Arc > &writer, int32_t generate_keys, const std::string &key_prefix, const std::string &key_suffix)
Definition: create.h:39
void Reset() final
Definition: far.h:318
static Fst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:257
bool Error() const final
Definition: far.h:262
static FarReader * Open(const std::string &source)
Definition: far.h:497
const std::string & GetKey() const final
Definition: far.h:369
bool Error() const final
Definition: far.h:375
static STTableFarWriter * Create(std::string_view source)
Definition: far.h:193
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:293
void Next() final
Definition: far.h:367
#define LOG(type)
Definition: log.h:53
static const std::string & Type()
Definition: arc.h:68
static STTableFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:311
bool Error() const final
Definition: far.h:466
std::unique_ptr< T > WrapUnique(T *ptr)
Definition: compat.h:132
FarType Type() const final
Definition: far.h:373
#define FSTERROR()
Definition: util.h:56
bool Read(const std::string &source)
Definition: far.h:78
bool Done() const final
Definition: far.h:365
bool ReadSTListHeader(const std::string &source, Header *header)
Definition: stlist.h:262
bool Done() const final
Definition: far.h:453
FarType
Definition: far.h:51
bool Find(std::string_view key) final
Definition: far.h:363
const Fst< Arc > * GetFst() const final
Definition: far.h:371
void Add(std::string_view key, const Fst< Arc > &fst) final
Definition: far.h:223
Fst< Arc > * operator()(std::istream &strm, const FstReadOptions &options=FstReadOptions()) const
Definition: far.h:293
void Next() final
Definition: far.h:455
FarType Type() const final
Definition: far.h:202
bool Find(std::string_view key) final
Definition: far.h:441
static STListFarWriter * Create(std::string_view source)
Definition: far.h:218
void Next() final
Definition: far.h:324
bool ReadSTTableHeader(const std::string &source, Header *header)
Definition: sttable.h:335
~FstFarReader() final
Definition: far.h:468
static STTableFarReader * Open(std::string_view source)
Definition: far.h:304
const std::string & ArcType() const
Definition: far.h:74
FarType Type() const final
Definition: far.h:227
static FstFarReader * Open(std::string_view source)
Definition: far.h:390
bool IsSTList(std::string_view source)
Definition: stlist.cc:30
FarType Type() const final
Definition: far.h:260
static FstFarReader * Open(const std::vector< std::string > &sources)
Definition: far.h:396
const Fst< Arc > * GetFst() const final
Definition: far.h:462
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:80
static STListFarReader * Open(std::string_view source)
Definition: far.h:347
bool Error() const final
Definition: far.h:332
static FarWriter * Create(std::string_view source, FarType type=FarType::DEFAULT)
Definition: far.h:273
bool IsSTTable(std::string_view source)
Definition: sttable.cc:29
FstFarReader(const std::vector< std::string > &sources)
Definition: far.h:400
void Add(std::string_view key, const Fst< A > &fst) final
Definition: far.h:250
const std::string & GetKey() const final
Definition: far.h:326
void operator()(std::ostream &strm, const Fst< Arc > &fst) const
Definition: far.h:183
const std::string & GetKey() const final
Definition: far.h:460
void Add(std::string_view key, const Fst< Arc > &fst) final
Definition: far.h:198
bool IsFst(std::string_view source)
Definition: far.h:61
FarEntryType
Definition: far.h:49
FstFarWriter(std::string_view source)
Definition: far.h:243
const std::string & ArcType() const
Definition: fst.h:138
static FstFarWriter * Create(std::string_view source)
Definition: far.h:246
FarType Type() const final
Definition: far.h:330
bool Done() const final
Definition: far.h:322