FST  openfst-1.7.1
OpenFst Library
fst.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // FST abstract base class definition, state and arc iterator interface, and
5 // suggested base implementation.
6 
7 #ifndef FST_FST_H_
8 #define FST_FST_H_
9 
10 #include <sys/types.h>
11 
12 #include <cmath>
13 #include <cstddef>
14 
15 #include <iostream>
16 #include <memory>
17 #include <sstream>
18 #include <string>
19 #include <utility>
20 
21 #include <fst/compat.h>
22 #include <fst/flags.h>
23 #include <fst/log.h>
24 #include <fstream>
25 
26 #include <fst/arc.h>
27 #include <fst/memory.h>
28 #include <fst/properties.h>
29 #include <fst/register.h>
30 #include <fst/symbol-table.h>
31 #include <fst/util.h>
32 
33 
34 DECLARE_bool(fst_align);
35 
36 namespace fst {
37 
38 bool IsFstHeader(std::istream &, const string &);
39 
40 class FstHeader;
41 
42 template <class Arc>
44 
45 template <class Arc>
47 
48 template <class Arc>
50 
52  // FileReadMode(s) are advisory, there are many conditions than prevent a
53  // file from being mapped, READ mode will be selected in these cases with
54  // a warning indicating why it was chosen.
55  enum FileReadMode { READ, MAP };
56 
57  string source; // Where you're reading from.
58  const FstHeader *header; // Pointer to FST header; if non-zero, use
59  // this info (don't read a stream header).
60  const SymbolTable *isymbols; // Pointer to input symbols; if non-zero, use
61  // this info (read and skip stream isymbols)
62  const SymbolTable *osymbols; // Pointer to output symbols; if non-zero, use
63  // this info (read and skip stream osymbols)
64  FileReadMode mode; // Read or map files (advisory, if possible)
65  bool read_isymbols; // Read isymbols, if any (default: true).
66  bool read_osymbols; // Read osymbols, if any (default: true).
67 
68  explicit FstReadOptions(const string &source = "<unspecified>",
69  const FstHeader *header = nullptr,
70  const SymbolTable *isymbols = nullptr,
71  const SymbolTable *osymbols = nullptr);
72 
73  explicit FstReadOptions(const string &source, const SymbolTable *isymbols,
74  const SymbolTable *osymbols = nullptr);
75 
76  // Helper function to convert strings FileReadModes into their enum value.
77  static FileReadMode ReadMode(const string &mode);
78 
79  // Outputs a debug string for the FstReadOptions object.
80  string DebugString() const;
81 };
82 
84  string source; // Where you're writing to.
85  bool write_header; // Write the header?
86  bool write_isymbols; // Write input symbols?
87  bool write_osymbols; // Write output symbols?
88  bool align; // Write data aligned (may fail on pipes)?
89  bool stream_write; // Avoid seek operations in writing.
90 
91  explicit FstWriteOptions(const string &source = "<unspecifed>",
92  bool write_header = true, bool write_isymbols = true,
93  bool write_osymbols = true,
94  bool align = FLAGS_fst_align,
95  bool stream_write = false)
96  : source(source),
97  write_header(write_header),
98  write_isymbols(write_isymbols),
99  write_osymbols(write_osymbols),
100  align(align),
101  stream_write(stream_write) {}
102 };
103 
104 // Header class.
105 //
106 // This is the recommended file header representation.
107 
108 class FstHeader {
109  public:
110  enum {
111  HAS_ISYMBOLS = 0x1, // Has input symbol table.
112  HAS_OSYMBOLS = 0x2, // Has output symbol table.
113  IS_ALIGNED = 0x4, // Memory-aligned (where appropriate).
114  } Flags;
115 
116  FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
117  numstates_(0), numarcs_(0) {}
118 
119  const string &FstType() const { return fsttype_; }
120 
121  const string &ArcType() const { return arctype_; }
122 
123  int32 Version() const { return version_; }
124 
125  int32 GetFlags() const { return flags_; }
126 
127  uint64 Properties() const { return properties_; }
128 
129  int64 Start() const { return start_; }
130 
131  int64 NumStates() const { return numstates_; }
132 
133  int64 NumArcs() const { return numarcs_; }
134 
135  void SetFstType(const string &type) { fsttype_ = type; }
136 
137  void SetArcType(const string &type) { arctype_ = type; }
138 
139  void SetVersion(int32 version) { version_ = version; }
140 
141  void SetFlags(int32 flags) { flags_ = flags; }
142 
143  void SetProperties(uint64 properties) { properties_ = properties; }
144 
145  void SetStart(int64 start) { start_ = start; }
146 
147  void SetNumStates(int64 numstates) { numstates_ = numstates; }
148 
149  void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
150 
151  bool Read(std::istream &strm, const string &source,
152  bool rewind = false);
153 
154  bool Write(std::ostream &strm, const string &source) const;
155 
156  // Outputs a debug string for the FstHeader object.
157  string DebugString() const;
158 
159  private:
160  string fsttype_; // E.g. "vector".
161  string arctype_; // E.g. "standard".
162  int32 version_; // Type version number.
163  int32 flags_; // File format bits.
164  uint64 properties_; // FST property bits.
165  int64 start_; // Start state.
166  int64 numstates_; // # of states.
167  int64 numarcs_; // # of arcs.
168 };
169 
170 // Specifies matcher action.
171 enum MatchType {
172  MATCH_INPUT = 1, // Match input label.
173  MATCH_OUTPUT = 2, // Match output label.
174  MATCH_BOTH = 3, // Match input or output label.
175  MATCH_NONE = 4, // Match nothing.
177 }; // Otherwise, match type unknown.
178 
179 constexpr int kNoLabel = -1; // Not a valid label.
180 constexpr int kNoStateId = -1; // Not a valid state ID.
181 
182 // A generic FST, templated on the arc definition, with common-demoninator
183 // methods (use StateIterator and ArcIterator to iterate over its states and
184 // arcs).
185 template <class A>
186 class Fst {
187  public:
188  using Arc = A;
189  using StateId = typename Arc::StateId;
190  using Weight = typename Arc::Weight;
191 
192  virtual ~Fst() {}
193 
194  // Initial state.
195  virtual StateId Start() const = 0;
196 
197  // State's final weight.
198  virtual Weight Final(StateId) const = 0;
199 
200  // State's arc count.
201  virtual size_t NumArcs(StateId) const = 0;
202 
203  // State's input epsilon count.
204  virtual size_t NumInputEpsilons(StateId) const = 0;
205 
206  // State's output epsilon count.
207  virtual size_t NumOutputEpsilons(StateId) const = 0;
208 
209  // Property bits. If test = false, return stored properties bits for mask
210  // (some possibly unknown); if test = true, return property bits for mask
211  // (computing o.w. unknown).
212  virtual uint64 Properties(uint64 mask, bool test) const = 0;
213 
214  // FST type name.
215  virtual const string &Type() const = 0;
216 
217  // Gets a copy of this Fst. The copying behaves as follows:
218  //
219  // (1) The copying is constant time if safe = false or if safe = true
220  // and is on an otherwise unaccessed FST.
221  //
222  // (2) If safe = true, the copy is thread-safe in that the original
223  // and copy can be safely accessed (but not necessarily mutated) by
224  // separate threads. For some FST types, 'Copy(true)' should only be
225  // called on an FST that has not otherwise been accessed. Behavior is
226  // otherwise undefined.
227  //
228  // (3) If a MutableFst is copied and then mutated, then the original is
229  // unmodified and vice versa (often by a copy-on-write on the initial
230  // mutation, which may not be constant time).
231  virtual Fst<Arc> *Copy(bool safe = false) const = 0;
232 
233  // Reads an FST from an input stream; returns nullptr on error.
234  static Fst<Arc> *Read(std::istream &strm, const FstReadOptions &opts) {
235  FstReadOptions ropts(opts);
236  FstHeader hdr;
237  if (ropts.header) {
238  hdr = *opts.header;
239  } else {
240  if (!hdr.Read(strm, opts.source)) return nullptr;
241  ropts.header = &hdr;
242  }
243  const auto &fst_type = hdr.FstType();
244  const auto reader = FstRegister<Arc>::GetRegister()->GetReader(fst_type);
245  if (!reader) {
246  LOG(ERROR) << "Fst::Read: Unknown FST type " << fst_type
247  << " (arc type = " << Arc::Type() << "): " << ropts.source;
248  return nullptr;
249  }
250  return reader(strm, ropts);
251  }
252 
253  // Reads an FST from a file; returns nullptr on error. An empty filename
254  // results in reading from standard input.
255  static Fst<Arc> *Read(const string &filename) {
256  if (!filename.empty()) {
257  std::ifstream strm(filename,
258  std::ios_base::in | std::ios_base::binary);
259  if (!strm) {
260  LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
261  return nullptr;
262  }
263  return Read(strm, FstReadOptions(filename));
264  } else {
265  return Read(std::cin, FstReadOptions("standard input"));
266  }
267  }
268 
269  // Writes an FST to an output stream; returns false on error.
270  virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
271  LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
272  << " FST type";
273  return false;
274  }
275 
276  // Writes an FST to a file; returns false on error; an empty filename
277  // results in writing to standard output.
278  virtual bool Write(const string &filename) const {
279  LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
280  << " FST type";
281  return false;
282  }
283 
284  // Returns input label symbol table; return nullptr if not specified.
285  virtual const SymbolTable *InputSymbols() const = 0;
286 
287  // Return output label symbol table; return nullptr if not specified.
288  virtual const SymbolTable *OutputSymbols() const = 0;
289 
290  // For generic state iterator construction (not normally called directly by
291  // users). Does not copy the FST.
292  virtual void InitStateIterator(StateIteratorData<Arc> *data) const = 0;
293 
294  // For generic arc iterator construction (not normally called directly by
295  // users). Does not copy the FST.
296  virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const = 0;
297 
298  // For generic matcher construction (not normally called directly by users).
299  // Does not copy the FST.
300  virtual MatcherBase<Arc> *InitMatcher(MatchType match_type) const;
301 
302  protected:
303  bool WriteFile(const string &filename) const {
304  if (!filename.empty()) {
305  std::ofstream strm(filename,
306  std::ios_base::out | std::ios_base::binary);
307  if (!strm) {
308  LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
309  return false;
310  }
311  bool val = Write(strm, FstWriteOptions(filename));
312  if (!val) LOG(ERROR) << "Fst::Write failed: " << filename;
313  return val;
314  } else {
315  return Write(std::cout, FstWriteOptions("standard output"));
316  }
317  }
318 };
319 
320 // A useful alias when using StdArc.
321 using StdFst = Fst<StdArc>;
322 
323 // State and arc iterator definitions.
324 //
325 // State iterator interface templated on the Arc definition; used for
326 // StateIterator specializations returned by the InitStateIterator FST method.
327 template <class Arc>
329  public:
330  using StateId = typename Arc::StateId;
331 
332  virtual ~StateIteratorBase() {}
333 
334  // End of iterator?
335  virtual bool Done() const = 0;
336  // Returns current state (when !Done()).
337  virtual StateId Value() const = 0;
338  // Advances to next state (when !Done()).
339  virtual void Next() = 0;
340  // Resets to initial condition.
341  virtual void Reset() = 0;
342 };
343 
344 // StateIterator initialization data.
345 
346 template <class Arc>
347 struct StateIteratorData {
348  using StateId = typename Arc::StateId;
349 
350  // Specialized iterator if non-zero.
352  // Otherwise, the total number of states.
354 
355  StateIteratorData() : base(nullptr), nstates(0) {}
356 
357  StateIteratorData(const StateIteratorData &) = delete;
358  StateIteratorData &operator=(const StateIteratorData &) = delete;
359 };
360 
361 // Generic state iterator, templated on the FST definition (a wrapper
362 // around a pointer to a specific one). Here is a typical use:
363 //
364 // for (StateIterator<StdFst> siter(fst);
365 // !siter.Done();
366 // siter.Next()) {
367 // StateId s = siter.Value();
368 // ...
369 // }
370 // There is no copying of the FST.
371 template <class FST>
373  public:
374  using Arc = typename FST::Arc;
375  using StateId = typename Arc::StateId;
376 
377  explicit StateIterator(const FST &fst) : s_(0) {
378  fst.InitStateIterator(&data_);
379  }
380 
381  ~StateIterator() { delete data_.base; }
382 
383  bool Done() const {
384  return data_.base ? data_.base->Done() : s_ >= data_.nstates;
385  }
386 
387  StateId Value() const { return data_.base ? data_.base->Value() : s_; }
388 
389  void Next() {
390  if (data_.base) {
391  data_.base->Next();
392  } else {
393  ++s_;
394  }
395  }
396 
397  void Reset() {
398  if (data_.base) {
399  data_.base->Reset();
400  } else {
401  s_ = 0;
402  }
403  }
404 
405  private:
407  StateId s_;
408 };
409 
410 // Flags to control the behavior on an arc iterator.
411 static constexpr uint32 kArcILabelValue =
412  0x0001; // Value() gives valid ilabel.
413 static constexpr uint32 kArcOLabelValue = 0x0002; // " " " olabel.
414 static constexpr uint32 kArcWeightValue = 0x0004; // " " " weight.
415 static constexpr uint32 kArcNextStateValue =
416  0x0008; // " " " nextstate.
417 static constexpr uint32 kArcNoCache = 0x0010; // No need to cache arcs.
418 
419 static constexpr uint32 kArcValueFlags =
420  kArcILabelValue | kArcOLabelValue | kArcWeightValue | kArcNextStateValue;
421 
422 static constexpr uint32 kArcFlags = kArcValueFlags | kArcNoCache;
423 
424 // Arc iterator interface, templated on the arc definition; used for arc
425 // iterator specializations that are returned by the InitArcIterator FST method.
426 template <class Arc>
428  public:
429  using StateId = typename Arc::StateId;
430 
431  virtual ~ArcIteratorBase() {}
432 
433  // End of iterator?
434  virtual bool Done() const = 0;
435  // Returns current arc (when !Done()).
436  virtual const Arc &Value() const = 0;
437  // Advances to next arc (when !Done()).
438  virtual void Next() = 0;
439  // Returns current position.
440  virtual size_t Position() const = 0;
441  // Returns to initial condition.
442  virtual void Reset() = 0;
443  // Advances to arbitrary arc by position.
444  virtual void Seek(size_t) = 0;
445  // Returns current behavorial flags
446  virtual uint32 Flags() const = 0;
447  // Sets behavorial flags.
448  virtual void SetFlags(uint32, uint32) = 0;
449 };
450 
451 // ArcIterator initialization data.
452 template <class Arc>
453 struct ArcIteratorData {
455  : base(nullptr), arcs(nullptr), narcs(0), ref_count(nullptr) {}
456 
457  ArcIteratorData(const ArcIteratorData &) = delete;
458 
459  ArcIteratorData &operator=(const ArcIteratorData &) = delete;
460 
461  ArcIteratorBase<Arc> *base; // Specialized iterator if non-zero.
462  const Arc *arcs; // O.w. arcs pointer
463  size_t narcs; // ... and arc count.
464  int *ref_count; // ... and reference count if non-zero.
465 };
466 
467 // Generic arc iterator, templated on the FST definition (a wrapper around a
468 // pointer to a specific one). Here is a typical use:
469 //
470 // for (ArcIterator<StdFst> aiter(fst, s);
471 // !aiter.Done();
472 // aiter.Next()) {
473 // StdArc &arc = aiter.Value();
474 // ...
475 // }
476 // There is no copying of the FST.
477 template <class FST>
478 class ArcIterator {
479  public:
480  using Arc = typename FST::Arc;
481  using StateId = typename Arc::StateId;
482 
483  ArcIterator(const FST &fst, StateId s) : i_(0) {
484  fst.InitArcIterator(s, &data_);
485  }
486 
487  explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
488  if (data_.ref_count) ++(*data_.ref_count);
489  }
490 
492  if (data_.base) {
493  delete data_.base;
494  } else if (data_.ref_count) {
495  --(*data_.ref_count);
496  }
497  }
498 
499  bool Done() const {
500  return data_.base ? data_.base->Done() : i_ >= data_.narcs;
501  }
502 
503  const Arc &Value() const {
504  return data_.base ? data_.base->Value() : data_.arcs[i_];
505  }
506 
507  void Next() {
508  if (data_.base) {
509  data_.base->Next();
510  } else {
511  ++i_;
512  }
513  }
514 
515  void Reset() {
516  if (data_.base) {
517  data_.base->Reset();
518  } else {
519  i_ = 0;
520  }
521  }
522 
523  void Seek(size_t a) {
524  if (data_.base) {
525  data_.base->Seek(a);
526  } else {
527  i_ = a;
528  }
529  }
530 
531  size_t Position() const { return data_.base ? data_.base->Position() : i_; }
532 
533  uint32 Flags() const {
534  if (data_.base) {
535  return data_.base->Flags();
536  } else {
537  return kArcValueFlags;
538  }
539  }
540 
541  void SetFlags(uint32 flags, uint32 mask) {
542  if (data_.base) data_.base->SetFlags(flags, mask);
543  }
544 
545  private:
546  ArcIteratorData<Arc> data_;
547  size_t i_;
548 };
549 
550 } // namespace fst
551 
552 // ArcIterator placement operator new and destroy function; new needs to be in
553 // the global namespace.
554 
555 template <class FST>
556 void *operator new(size_t size,
558  return pool->Allocate();
559 }
560 
561 namespace fst {
562 
563 template <class FST>
565  if (aiter) {
566  aiter->~ArcIterator<FST>();
567  pool->Free(aiter);
568  }
569 }
570 
571 // Matcher definitions.
572 
573 template <class Arc>
575  return nullptr; // One should just use the default matcher.
576 }
577 
578 // FST accessors, useful in high-performance applications.
579 
580 namespace internal {
581 
582 // General case, requires non-abstract, 'final' methods. Use for inlining.
583 
584 template <class F>
585 inline typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
586  return fst.F::Final(s);
587 }
588 
589 template <class F>
590 inline ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
591  return fst.F::NumArcs(s);
592 }
593 
594 template <class F>
595 inline ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
596  return fst.F::NumInputEpsilons(s);
597 }
598 
599 template <class F>
600 inline ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
601  return fst.F::NumOutputEpsilons(s);
602 }
603 
604 // Fst<Arc> case, abstract methods.
605 
606 template <class Arc>
607 inline typename Arc::Weight Final(const Fst<Arc> &fst,
608  typename Arc::StateId s) {
609  return fst.Final(s);
610 }
611 
612 template <class Arc>
613 inline size_t NumArcs(const Fst<Arc> &fst, typename Arc::StateId s) {
614  return fst.NumArcs(s);
615 }
616 
617 template <class Arc>
618 inline size_t NumInputEpsilons(const Fst<Arc> &fst, typename Arc::StateId s) {
619  return fst.NumInputEpsilons(s);
620 }
621 
622 template <class Arc>
623 inline size_t NumOutputEpsilons(const Fst<Arc> &fst, typename Arc::StateId s) {
624  return fst.NumOutputEpsilons(s);
625 }
626 
627 // FST implementation base.
628 //
629 // This is the recommended FST implementation base class. It will handle
630 // reference counts, property bits, type information and symbols.
631 //
632 // Users are discouraged, but not prohibited, from subclassing this outside the
633 // FST library.
634 template <class Arc>
635 class FstImpl {
636  public:
637  using StateId = typename Arc::StateId;
638  using Weight = typename Arc::Weight;
639 
640  FstImpl() : properties_(0), type_("null") {}
641 
642  FstImpl(const FstImpl<Arc> &impl)
643  : properties_(impl.properties_),
644  type_(impl.type_),
645  isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : nullptr),
646  osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : nullptr) {}
647 
648  FstImpl(FstImpl<Arc> &&impl) noexcept;
649 
650  virtual ~FstImpl() {}
651 
653  properties_ = impl.properties_;
654  type_ = impl.type_;
655  isymbols_ = impl.isymbols_ ? impl.isymbols_->Copy() : nullptr;
656  osymbols_ = impl.osymbols_ ? impl.osymbols_->Copy() : nullptr;
657  return *this;
658  }
659 
660  FstImpl &operator=(FstImpl<Arc> &&impl) noexcept;
661 
662  const string &Type() const { return type_; }
663 
664  void SetType(const string &type) { type_ = type; }
665 
666  virtual uint64 Properties() const { return properties_; }
667 
668  virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
669 
670  void SetProperties(uint64 props) {
671  properties_ &= kError; // kError can't be cleared.
672  properties_ |= props;
673  }
674 
675  void SetProperties(uint64 props, uint64 mask) {
676  properties_ &= ~mask | kError; // kError can't be cleared.
677  properties_ |= props & mask;
678  }
679 
680  // Allows (only) setting error bit on const FST implementations.
681  void SetProperties(uint64 props, uint64 mask) const {
682  if (mask != kError) {
683  FSTERROR() << "FstImpl::SetProperties() const: Can only set kError";
684  }
685  properties_ |= kError;
686  }
687 
688  const SymbolTable *InputSymbols() const { return isymbols_.get(); }
689 
690  const SymbolTable *OutputSymbols() const { return osymbols_.get(); }
691 
692  SymbolTable *InputSymbols() { return isymbols_.get(); }
693 
694  SymbolTable *OutputSymbols() { return osymbols_.get(); }
695 
696  void SetInputSymbols(const SymbolTable *isyms) {
697  isymbols_.reset(isyms ? isyms->Copy() : nullptr);
698  }
699 
700  void SetOutputSymbols(const SymbolTable *osyms) {
701  osymbols_.reset(osyms ? osyms->Copy() : nullptr);
702  }
703 
704  // Reads header and symbols from input stream, initializes FST, and returns
705  // the header. If opts.header is non-null, skips reading and uses the option
706  // value instead. If opts.[io]symbols is non-null, reads in (if present), but
707  // uses the option value.
708  bool ReadHeader(std::istream &strm, const FstReadOptions &opts,
709  int min_version, FstHeader *hdr);
710 
711  // Writes header and symbols to output stream. If opts.header is false, skips
712  // writing header. If opts.[io]symbols is false, skips writing those symbols.
713  // This method is needed for implementations that implement Write methods.
714  void WriteHeader(std::ostream &strm, const FstWriteOptions &opts,
715  int version, FstHeader *hdr) const {
716  if (opts.write_header) {
717  hdr->SetFstType(type_);
718  hdr->SetArcType(Arc::Type());
719  hdr->SetVersion(version);
720  hdr->SetProperties(properties_);
721  int32 file_flags = 0;
722  if (isymbols_ && opts.write_isymbols) {
723  file_flags |= FstHeader::HAS_ISYMBOLS;
724  }
725  if (osymbols_ && opts.write_osymbols) {
726  file_flags |= FstHeader::HAS_OSYMBOLS;
727  }
728  if (opts.align) file_flags |= FstHeader::IS_ALIGNED;
729  hdr->SetFlags(file_flags);
730  hdr->Write(strm, opts.source);
731  }
732  if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
733  if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
734  }
735 
736  // Writes out header and symbols to output stream. If opts.header is false,
737  // skips writing header. If opts.[io]symbols is false, skips writing those
738  // symbols. `type` is the FST type being written. This method is used in the
739  // cross-type serialization methods Fst::WriteFst.
740  static void WriteFstHeader(const Fst<Arc> &fst, std::ostream &strm,
741  const FstWriteOptions &opts, int version,
742  const string &type, uint64 properties,
743  FstHeader *hdr) {
744  if (opts.write_header) {
745  hdr->SetFstType(type);
746  hdr->SetArcType(Arc::Type());
747  hdr->SetVersion(version);
748  hdr->SetProperties(properties);
749  int32 file_flags = 0;
750  if (fst.InputSymbols() && opts.write_isymbols) {
751  file_flags |= FstHeader::HAS_ISYMBOLS;
752  }
753  if (fst.OutputSymbols() && opts.write_osymbols) {
754  file_flags |= FstHeader::HAS_OSYMBOLS;
755  }
756  if (opts.align) file_flags |= FstHeader::IS_ALIGNED;
757  hdr->SetFlags(file_flags);
758  hdr->Write(strm, opts.source);
759  }
760  if (fst.InputSymbols() && opts.write_isymbols) {
761  fst.InputSymbols()->Write(strm);
762  }
763  if (fst.OutputSymbols() && opts.write_osymbols) {
764  fst.OutputSymbols()->Write(strm);
765  }
766  }
767 
768  // In serialization routines where the header cannot be written until after
769  // the machine has been serialized, this routine can be called to seek to the
770  // beginning of the file an rewrite the header with updated fields. It
771  // repositions the file pointer back at the end of the file. Returns true on
772  // success, false on failure.
773  static bool UpdateFstHeader(const Fst<Arc> &fst, std::ostream &strm,
774  const FstWriteOptions &opts, int version,
775  const string &type, uint64 properties,
776  FstHeader *hdr, size_t header_offset) {
777  strm.seekp(header_offset);
778  if (!strm) {
779  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
780  return false;
781  }
782  WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
783  if (!strm) {
784  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
785  return false;
786  }
787  strm.seekp(0, std::ios_base::end);
788  if (!strm) {
789  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
790  return false;
791  }
792  return true;
793  }
794 
795  protected:
796  mutable uint64 properties_; // Property bits.
797 
798  private:
799  string type_; // Unique name of FST class.
800  std::unique_ptr<SymbolTable> isymbols_;
801  std::unique_ptr<SymbolTable> osymbols_;
802 };
803 
804 template <class Arc>
805 inline FstImpl<Arc>::FstImpl(FstImpl<Arc> &&) noexcept = default;
806 
807 template <class Arc>
808 inline FstImpl<Arc> &FstImpl<Arc>::operator=(
809  FstImpl<Arc> &&) noexcept = default;
810 
811 template <class Arc>
812 bool FstImpl<Arc>::ReadHeader(std::istream &strm, const FstReadOptions &opts,
813  int min_version, FstHeader *hdr) {
814  if (opts.header) {
815  *hdr = *opts.header;
816  } else if (!hdr->Read(strm, opts.source)) {
817  return false;
818  }
819  if (FLAGS_v >= 2) {
820  LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
821  << ", fst_type: " << hdr->FstType()
822  << ", arc_type: " << Arc::Type()
823  << ", version: " << hdr->Version()
824  << ", flags: " << hdr->GetFlags();
825  }
826  if (hdr->FstType() != type_) {
827  LOG(ERROR) << "FstImpl::ReadHeader: FST not of type " << type_
828  << ": " << opts.source;
829  return false;
830  }
831  if (hdr->ArcType() != Arc::Type()) {
832  LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type " << Arc::Type()
833  << ": " << opts.source;
834  return false;
835  }
836  if (hdr->Version() < min_version) {
837  LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
838  << " FST version: " << opts.source;
839  return false;
840  }
841  properties_ = hdr->Properties();
842  if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) {
843  isymbols_.reset(SymbolTable::Read(strm, opts.source));
844  }
845  // Deletes input symbol table.
846  if (!opts.read_isymbols) SetInputSymbols(nullptr);
847  if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) {
848  osymbols_.reset(SymbolTable::Read(strm, opts.source));
849  }
850  // Deletes output symbol table.
851  if (!opts.read_osymbols) SetOutputSymbols(nullptr);
852  if (opts.isymbols) {
853  isymbols_.reset(opts.isymbols->Copy());
854  }
855  if (opts.osymbols) {
856  osymbols_.reset(opts.osymbols->Copy());
857  }
858  return true;
859 }
860 
861 } // namespace internal
862 
863 template <class Arc>
864 uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
865 
866 // This is a helper class template useful for attaching an FST interface to
867 // its implementation, handling reference counting.
868 template <class Impl, class FST = Fst<typename Impl::Arc>>
869 class ImplToFst : public FST {
870  public:
871  using Arc = typename Impl::Arc;
872  using StateId = typename Arc::StateId;
873  using Weight = typename Arc::Weight;
874 
875  StateId Start() const override { return impl_->Start(); }
876 
877  Weight Final(StateId s) const override { return impl_->Final(s); }
878 
879  size_t NumArcs(StateId s) const override { return impl_->NumArcs(s); }
880 
881  size_t NumInputEpsilons(StateId s) const override {
882  return impl_->NumInputEpsilons(s);
883  }
884 
885  size_t NumOutputEpsilons(StateId s) const override {
886  return impl_->NumOutputEpsilons(s);
887  }
888 
889  uint64 Properties(uint64 mask, bool test) const override {
890  if (test) {
891  uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
892  impl_->SetProperties(testprops, knownprops);
893  return testprops & mask;
894  } else {
895  return impl_->Properties(mask);
896  }
897  }
898 
899  const string &Type() const override { return impl_->Type(); }
900 
901  const SymbolTable *InputSymbols() const override {
902  return impl_->InputSymbols();
903  }
904 
905  const SymbolTable *OutputSymbols() const override {
906  return impl_->OutputSymbols();
907  }
908 
909  protected:
910  explicit ImplToFst(std::shared_ptr<Impl> impl) : impl_(std::move(impl)) {}
911 
912  // This constructor presumes there is a copy constructor for the
913  // implementation.
914  ImplToFst(const ImplToFst<Impl, FST> &fst, bool safe) {
915  if (safe) {
916  impl_ = std::make_shared<Impl>(*(fst.impl_));
917  } else {
918  impl_ = fst.impl_;
919  }
920  }
921 
922  ImplToFst() = delete;
923 
924  ImplToFst(const ImplToFst<Impl, FST> &fst) : impl_(fst.impl_) {}
925 
927  : impl_(std::move(fst.impl_)) {
928  fst.impl_ = std::make_shared<Impl>();
929  }
930 
932  impl_ = fst.impl_;
933  return *this;
934  }
935 
937  if (this != &fst) {
938  impl_ = std::move(fst.impl_);
939  fst.impl_ = std::make_shared<Impl>();
940  }
941  return *this;
942  }
943 
944  // Returns raw pointers to the shared object.
945  const Impl *GetImpl() const { return impl_.get(); }
946 
947  Impl *GetMutableImpl() const { return impl_.get(); }
948 
949  // Returns a ref-counted smart poiner to the implementation.
950  std::shared_ptr<Impl> GetSharedImpl() const { return impl_; }
951 
952  bool Unique() const { return impl_.unique(); }
953 
954  void SetImpl(std::shared_ptr<Impl> impl) { impl_ = std::move(impl); }
955 
956  private:
957  template <class IFST, class OFST>
958  friend void Cast(const IFST &ifst, OFST *ofst);
959 
960  std::shared_ptr<Impl> impl_;
961 };
962 
963 // Converts FSTs by casting their implementations, where this makes sense
964 // (which excludes implementations with weight-dependent virtual methods).
965 // Must be a friend of the FST classes involved (currently the concrete FSTs:
966 // ConstFst, CompactFst, and VectorFst). This can only be safely used for arc
967 // types that have identical storage characteristics. As with an FST
968 // copy constructor and Copy() method, this is a constant time operation
969 // (but subject to copy-on-write if it is a MutableFst and modified).
970 template <class IFST, class OFST>
971 void Cast(const IFST &ifst, OFST *ofst) {
972  using OImpl = typename OFST::Impl;
973  ofst->impl_ = std::shared_ptr<OImpl>(ifst.impl_,
974  reinterpret_cast<OImpl *>(ifst.impl_.get()));
975 }
976 
977 // FST serialization.
978 
979 template <class Arc>
980 string FstToString(const Fst<Arc> &fst,
981  const FstWriteOptions &options =
982  FstWriteOptions("FstToString")) {
983  std::ostringstream ostrm;
984  fst.Write(ostrm, options);
985  return ostrm.str();
986 }
987 
988 template <class Arc>
989 void FstToString(const Fst<Arc> &fst, string *result) {
990  *result = FstToString(fst);
991 }
992 
993 template <class Arc>
994 void FstToString(const Fst<Arc> &fst, string *result,
995  const FstWriteOptions &options) {
996  *result = FstToString(fst, options);
997 }
998 
999 template <class Arc>
1000 Fst<Arc> *StringToFst(const string &s) {
1001  std::istringstream istrm(s);
1002  return Fst<Arc>::Read(istrm, FstReadOptions("StringToFst"));
1003 }
1004 
1005 } // namespace fst
1006 
1007 #endif // FST_FST_H_
size_t Position() const
Definition: fst.h:531
int32 Version() const
Definition: fst.h:123
ImplToFst< Impl, FST > & operator=(ImplToFst< Impl, FST > &&fst) noexcept
Definition: fst.h:936
void SetProperties(uint64 props, uint64 mask) const
Definition: fst.h:681
const SymbolTable * OutputSymbols() const override
Definition: fst.h:905
string source
Definition: fst.h:84
typename ArcMapFst< Arc, Arc, EncodeMapper< Arc > >::Arc Arc
Definition: fst.h:480
constexpr int kNoLabel
Definition: fst.h:179
const FstHeader * header
Definition: fst.h:58
void Cast(const F &, G *)
uint64_t uint64
Definition: types.h:32
void Reset()
Definition: fst.h:515
std::shared_ptr< Impl > GetSharedImpl() const
Definition: fst.h:950
virtual bool Write(std::ostream &strm) const
Definition: symbol-table.h:334
virtual size_t NumArcs(StateId) const =0
FstHeader()
Definition: fst.h:116
void SetNumArcs(int64 numarcs)
Definition: fst.h:149
Fst< Arc > * StringToFst(const string &s)
Definition: fst.h:1000
int64 NumStates() const
Definition: fst.h:131
FstImpl & operator=(const FstImpl< Arc > &impl)
Definition: fst.h:652
bool IsFstHeader(std::istream &, const string &)
Definition: fst.cc:44
uint64 TestProperties(const Fst< Arc > &fst, uint64 mask, uint64 *known)
const SymbolTable * OutputSymbols() const
Definition: fst.h:690
MatchType
Definition: fst.h:171
void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags, const char *src="")
Definition: flags.cc:46
virtual SymbolTable * Copy() const
Definition: symbol-table.h:264
int64 Start() const
Definition: fst.h:129
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:270
size_t NumArcs(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:613
#define LOG(type)
Definition: log.h:48
StateId Start() const override
Definition: fst.h:875
virtual Weight Final(StateId) const =0
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:700
typename RandGenFst< FromArc, ToArc, Sampler >::Arc::StateId StateId
Definition: fst.h:330
typename ArcMapFst< Arc, Arc, EncodeMapper< Arc > >::Arc Arc
Definition: fst.h:374
typename FST::Arc::StateId StateId
Definition: fst.h:429
virtual uint64 Properties(uint64 mask) const
Definition: fst.h:668
bool read_isymbols
Definition: fst.h:65
DECLARE_bool(fst_align)
~ArcIterator()
Definition: fst.h:491
static bool UpdateFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, const string &type, uint64 properties, FstHeader *hdr, size_t header_offset)
Definition: fst.h:773
size_t NumArcs(StateId s) const override
Definition: fst.h:879
bool stream_write
Definition: fst.h:89
const SymbolTable * osymbols
Definition: fst.h:62
virtual uint64 Properties() const
Definition: fst.h:666
constexpr int kNoStateId
Definition: fst.h:180
SymbolTable * InputSymbols()
Definition: fst.h:692
const Arc & Value() const
Definition: fst.h:503
static Fst< Arc > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:234
virtual size_t NumInputEpsilons(StateId) const =0
size_t NumInputEpsilons(StateId s) const override
Definition: fst.h:881
void SetType(const string &type)
Definition: fst.h:664
ArcIterator(const FST &fst, StateId s)
Definition: fst.h:483
int64_t int64
Definition: types.h:27
const string & Type() const override
Definition: fst.h:899
FstReadOptions(const string &source="<unspecified>", const FstHeader *header=nullptr, const SymbolTable *isymbols=nullptr, const SymbolTable *osymbols=nullptr)
Definition: fst.cc:108
#define FSTERROR()
Definition: util.h:35
size_t NumOutputEpsilons(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:623
SymbolTable * OutputSymbols()
Definition: fst.h:694
void SetProperties(uint64 props, uint64 mask)
Definition: fst.h:675
bool WriteFile(const string &filename) const
Definition: fst.h:303
bool write_osymbols
Definition: fst.h:87
StateIteratorBase< Arc > * base
Definition: fst.h:351
static SymbolTable * Read(std::istream &strm, const SymbolTableReadOptions &opts)
Definition: symbol-table.h:236
ImplToFst(const ImplToFst< Impl, FST > &fst, bool safe)
Definition: fst.h:914
virtual ~FstImpl()
Definition: fst.h:650
const string & ArcType() const
Definition: fst.h:121
virtual ~StateIteratorBase()
Definition: fst.h:332
typename Arc::StateId StateId
Definition: fst.h:348
static FileReadMode ReadMode(const string &mode)
Definition: fst.cc:132
void Seek(size_t a)
Definition: fst.h:523
ImplToFst(ImplToFst< Impl, FST > &&fst) noexcept
Definition: fst.h:926
void SetFstType(const string &type)
Definition: fst.h:135
bool write_isymbols
Definition: fst.h:86
bool Read(std::istream &strm, const string &source, bool rewind=false)
Definition: fst.cc:58
void SetVersion(int32 version)
Definition: fst.h:139
uint32 Flags() const
Definition: fst.h:533
int32 GetFlags() const
Definition: fst.h:125
void SetFlags(uint32 flags, uint32 mask)
Definition: fst.h:541
void SetNumStates(int64 numstates)
Definition: fst.h:147
StateId nstates
Definition: fst.h:353
size_t NumOutputEpsilons(StateId s) const override
Definition: fst.h:885
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, const string &type, uint64 properties, FstHeader *hdr)
Definition: fst.h:740
string DebugString() const
Definition: fst.cc:139
void WriteHeader(std::ostream &strm, const FstWriteOptions &opts, int version, FstHeader *hdr) const
Definition: fst.h:714
const string & Type() const
Definition: fst.h:662
StateId Value() const
Definition: fst.h:387
bool Done() const
Definition: fst.h:499
typename S::Arc::Weight Weight
Definition: fst.h:638
FstWriteOptions(const string &source="<unspecifed>", bool write_header=true, bool write_isymbols=true, bool write_osymbols=true, bool align=FLAGS_fst_align, bool stream_write=false)
Definition: fst.h:91
static Fst< Arc > * Read(const string &filename)
Definition: fst.h:255
string source
Definition: fst.h:57
const SymbolTable * InputSymbols() const override
Definition: fst.h:901
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:696
void SetImpl(std::shared_ptr< Impl > impl)
Definition: fst.h:954
bool write_header
Definition: fst.h:85
virtual bool Write(const string &filename) const
Definition: fst.h:278
virtual ~ArcIteratorBase()
Definition: fst.h:431
ImplToFst(const ImplToFst< Impl, FST > &fst)
Definition: fst.h:924
const string & FstType() const
Definition: fst.h:119
FstImpl(const FstImpl< Arc > &impl)
Definition: fst.h:642
void SetFlags(int32 flags)
Definition: fst.h:141
virtual ~Fst()
Definition: fst.h:192
uint64 properties_
Definition: fst.h:796
uint32_t uint32
Definition: types.h:31
Arc::Weight Final(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:607
typename internal::SynchronizeFstImpl< A >::Arc Arc
Definition: fst.h:188
void SetStart(int64 start)
Definition: fst.h:145
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:688
void Next()
Definition: fst.h:389
Weight Final(StateId s) const override
Definition: fst.h:877
void SetProperties(uint64 props)
Definition: fst.h:670
bool Done() const
Definition: fst.h:383
int32_t int32
Definition: types.h:26
const Arc * arcs
Definition: fst.h:462
constexpr uint64 kError
Definition: properties.h:33
ImplToFst< Impl, FST > & operator=(const ImplToFst< Impl, FST > &fst)
Definition: fst.h:931
void SetProperties(uint64 properties)
Definition: fst.h:143
ImplToFst(std::shared_ptr< Impl > impl)
Definition: fst.h:910
virtual MatcherBase< Arc > * InitMatcher(MatchType match_type) const
Definition: fst.h:574
FileReadMode mode
Definition: fst.h:64
ArcIteratorBase< Arc > * base
Definition: fst.h:461
const SymbolTable * isymbols
Definition: fst.h:60
string FstToString(const Fst< Arc > &fst, const FstWriteOptions &options=FstWriteOptions("FstToString"))
Definition: fst.h:980
void SetArcType(const string &type)
Definition: fst.h:137
Impl * GetMutableImpl() const
Definition: fst.h:947
void Reset()
Definition: fst.h:397
size_t NumInputEpsilons(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:618
uint64 Properties(uint64 mask, bool test) const override
Definition: fst.h:889
bool Unique() const
Definition: fst.h:952
virtual size_t NumOutputEpsilons(StateId) const =0
bool read_osymbols
Definition: fst.h:66
int * ref_count
Definition: fst.h:464
void Destroy(ArcIterator< FST > *aiter, MemoryPool< ArcIterator< FST >> *pool)
Definition: fst.h:564
size_t narcs
Definition: fst.h:463
uint64 Properties() const
Definition: fst.h:127
int64 NumArcs() const
Definition: fst.h:133
typename S::Arc::StateId StateId
Definition: fst.h:637
bool Write(std::ostream &strm, const string &source) const
Definition: fst.cc:85
const Impl * GetImpl() const
Definition: fst.h:945
ArcIterator(const ArcIteratorData< Arc > &data)
Definition: fst.h:487
virtual const SymbolTable * OutputSymbols() const =0
void Next()
Definition: fst.h:507
StateIterator(const FST &fst)
Definition: fst.h:377