FST  openfst-1.8.3
OpenFst Library
fst.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // FST abstract base class definition, state and arc iterator interface, and
19 // suggested base implementation.
20 
21 #ifndef FST_FST_H_
22 #define FST_FST_H_
23 
24 #include <sys/types.h>
25 
26 #include <atomic>
27 #include <cmath>
28 #include <cstddef>
29 #include <cstdint>
30 #include <ios>
31 #include <iostream>
32 #include <istream>
33 #include <memory>
34 #include <optional>
35 #include <ostream>
36 #include <sstream>
37 #include <string>
38 #include <utility>
39 
40 #include <fst/compat.h>
41 #include <fst/flags.h>
42 #include <fst/log.h>
43 #include <fst/arc.h>
44 #include <fstream>
45 #include <fst/memory.h>
46 #include <fst/properties.h>
47 #include <fst/register.h>
48 #include <fst/symbol-table.h>
49 #include <fst/util.h>
50 #include <string_view>
51 
52 DECLARE_bool(fst_align);
53 
54 namespace fst {
55 
56 // Identifies stream data as an FST (and its endianity).
57 inline constexpr int32_t kFstMagicNumber = 2125659606;
58 
59 class FstHeader;
60 template <class Arc>
62 template <class Arc>
64 template <class Arc>
66 
68  // FileReadMode(s) are advisory, there are many conditions than prevent a
69  // file from being mapped, READ mode will be selected in these cases with
70  // a warning indicating why it was chosen.
71  enum FileReadMode { READ, MAP };
72 
73  std::string source; // Where you're reading from.
74  const FstHeader *header; // Pointer to FST header; if non-zero, use
75  // this info (don't read a stream header).
76  const SymbolTable *isymbols; // Pointer to input symbols; if non-zero, use
77  // this info (read and skip stream isymbols)
78  const SymbolTable *osymbols; // Pointer to output symbols; if non-zero, use
79  // this info (read and skip stream osymbols)
80  FileReadMode mode; // Read or map files (advisory, if possible)
81  bool read_isymbols; // Read isymbols, if any (default: true).
82  bool read_osymbols; // Read osymbols, if any (default: true).
83 
84  explicit FstReadOptions(
85  const std::string_view source = "<unspecified>",
86  const FstHeader * header = nullptr,
87  const SymbolTable * isymbols = nullptr,
88  const SymbolTable * osymbols = nullptr);
89 
90  explicit FstReadOptions(const std::string_view source,
91  const SymbolTable *isymbols,
92  const SymbolTable *osymbols = nullptr);
93 
94  // Helper function to convert strings FileReadModes into their enum value.
95  static FileReadMode ReadMode(std::string_view mode);
96 
97  // Outputs a debug string for the FstReadOptions object.
98  std::string DebugString() const;
99 };
100 
102  std::string source; // Where you're writing to.
103  bool write_header; // Write the header?
104  bool write_isymbols; // Write input symbols?
105  bool write_osymbols; // Write output symbols?
106  bool align; // Write data aligned (may fail on pipes)?
107  bool stream_write; // Avoid seek operations in writing.
108 
109  explicit FstWriteOptions(std::string_view source = "<unspecified>",
110  bool write_header = true, bool write_isymbols = true,
111  bool write_osymbols = true,
112  bool align = FST_FLAGS_fst_align,
113  bool stream_write = false)
114  : source(source),
115  write_header(write_header),
116  write_isymbols(write_isymbols),
117  write_osymbols(write_osymbols),
118  align(align),
119  stream_write(stream_write) {}
120 };
121 
122 // Header class.
123 //
124 // This is the recommended file header representation.
125 
126 class FstHeader {
127  public:
128  enum Flags {
129  HAS_ISYMBOLS = 0x1, // Has input symbol table.
130  HAS_OSYMBOLS = 0x2, // Has output symbol table.
131  IS_ALIGNED = 0x4, // Memory-aligned (where appropriate).
132  };
133 
134  FstHeader() = default;
135 
136  const std::string &FstType() const { return fsttype_; }
137 
138  const std::string &ArcType() const { return arctype_; }
139 
140  int32_t Version() const { return version_; }
141 
142  uint32_t GetFlags() const { return flags_; }
143 
144  uint64_t Properties() const { return properties_; }
145 
146  int64_t Start() const { return start_; }
147 
148  int64_t NumStates() const { return numstates_; }
149 
150  int64_t NumArcs() const { return numarcs_; }
151 
152  void SetFstType(std::string_view type) { fsttype_ = std::string(type); }
153 
154  void SetArcType(std::string_view type) { arctype_ = std::string(type); }
155 
156  void SetVersion(int32_t version) { version_ = version; }
157 
158  void SetFlags(uint32_t flags) { flags_ = flags; }
159 
160  void SetProperties(uint64_t properties) { properties_ = properties; }
161 
162  void SetStart(int64_t start) { start_ = start; }
163 
164  void SetNumStates(int64_t numstates) { numstates_ = numstates; }
165 
166  void SetNumArcs(int64_t numarcs) { numarcs_ = numarcs; }
167 
168  bool Read(std::istream &strm, const std::string &source, bool rewind = false);
169 
170  bool Write(std::ostream &strm, std::string_view source) const;
171 
172  // Outputs a debug string for the FstHeader object.
173  std::string DebugString() const;
174 
175  private:
176  std::string fsttype_; // E.g. "vector".
177  std::string arctype_; // E.g. "standard".
178  int32_t version_ = 0; // Type version number.
179  uint32_t flags_ = 0; // File format bits.
180  uint64_t properties_ = 0; // FST property bits.
181  int64_t start_ = -1; // Start state.
182  int64_t numstates_ = 0; // # of states.
183  int64_t numarcs_ = 0; // # of arcs.
184 };
185 
186 // Specifies matcher action.
187 enum MatchType {
188  MATCH_INPUT = 1, // Match input label.
189  MATCH_OUTPUT = 2, // Match output label.
190  MATCH_BOTH = 3, // Match input or output label.
191  MATCH_NONE = 4, // Match nothing.
193 }; // Otherwise, match type unknown.
194 
195 inline constexpr int kNoLabel = -1; // Not a valid label.
196 inline constexpr int kNoStateId = -1; // Not a valid state ID.
197 
198 // A generic FST, templated on the arc definition, with common-demoninator
199 // methods (use StateIterator and ArcIterator to iterate over its states and
200 // arcs). Derived classes should be assumed to be thread-unsafe unless
201 // otherwise specified.
202 template <class A>
203 class Fst {
204  public:
205  using Arc = A;
206  using StateId = typename Arc::StateId;
207  using Weight = typename Arc::Weight;
208 
209  virtual ~Fst() = default;
210 
211  // Initial state.
212  virtual StateId Start() const = 0;
213 
214  // State's final weight.
215  virtual Weight Final(StateId) const = 0;
216 
217  // State's arc count.
218  virtual size_t NumArcs(StateId) const = 0;
219 
220  // State's input epsilon count.
221  virtual size_t NumInputEpsilons(StateId) const = 0;
222 
223  // State's output epsilon count.
224  virtual size_t NumOutputEpsilons(StateId) const = 0;
225 
226  // Returns the number of states if it is finite and can be computed in O(1)
227  // time. Otherwise returns nullopt.
228  virtual std::optional<StateId> NumStatesIfKnown() const {
229  return std::nullopt;
230  }
231 
232  // Property bits. If test = false, return stored properties bits for mask
233  // (some possibly unknown); if test = true, return property bits for mask
234  // (computing o.w. unknown).
235  virtual uint64_t Properties(uint64_t mask, bool test) const = 0;
236 
237  // FST type name.
238  virtual const std::string &Type() const = 0;
239 
240  // Gets a copy of this Fst. The copying behaves as follows:
241  //
242  // (1) The copying is constant time if safe = false or if safe = true
243  // and is on an otherwise unaccessed FST.
244  //
245  // (2) If safe = true, the copy is thread-safe in that the original
246  // and copy can be safely accessed (but not necessarily mutated) by
247  // separate threads. For some FST types, 'Copy(true)' should only be
248  // called on an FST that has not otherwise been accessed. Behavior is
249  // otherwise undefined.
250  //
251  // (3) If a MutableFst is copied and then mutated, then the original is
252  // unmodified and vice versa (often by a copy-on-write on the initial
253  // mutation, which may not be constant time).
254  virtual Fst *Copy(bool safe = false) const = 0;
255 
256  // Reads an FST from an input stream; returns nullptr on error.
257  static Fst *Read(std::istream &strm, const FstReadOptions &opts) {
258  FstReadOptions ropts(opts);
259  FstHeader hdr;
260  if (ropts.header) {
261  hdr = *opts.header;
262  } else {
263  if (!hdr.Read(strm, opts.source)) return nullptr;
264  ropts.header = &hdr;
265  }
266  const auto &fst_type = hdr.FstType();
267  const auto reader = FstRegister<Arc>::GetRegister()->GetReader(fst_type);
268  if (!reader) {
269  LOG(ERROR) << "Fst::Read: Unknown FST type " << fst_type
270  << " (arc type = " << Arc::Type() << "): " << ropts.source;
271  return nullptr;
272  }
273  return reader(strm, ropts);
274  }
275 
276  // Reads an FST from a file; returns nullptr on error. An empty source
277  // results in reading from standard input.
278  static Fst *Read(const std::string &source) {
279  if (!source.empty()) {
280  std::ifstream strm(source,
281  std::ios_base::in | std::ios_base::binary);
282  if (!strm) {
283  LOG(ERROR) << "Fst::Read: Can't open file: " << source;
284  return nullptr;
285  }
286  return Read(strm, FstReadOptions(source));
287  } else {
288  return Read(std::cin, FstReadOptions("standard input"));
289  }
290  }
291 
292  // Writes an FST to an output stream; returns false on error.
293  virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
294  LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
295  << " FST type";
296  return false;
297  }
298 
299  // Writes an FST to a file; returns false on error; an empty source
300  // results in writing to standard output.
301  virtual bool Write(const std::string &source) const {
302  LOG(ERROR) << "Fst::Write: No write source method for " << Type()
303  << " FST type";
304  return false;
305  }
306 
307  // Some Fst implementations support
308  // template <class Fst2>
309  // static bool Fst1::WriteFst(const Fst2 &fst2, ...);
310  // which is equivalent to Fst1(fst2).Write(...), but uses less memory.
311  // WriteFst is not part of the general Fst interface.
312 
313  // Returns input label symbol table; return nullptr if not specified.
314  virtual const SymbolTable *InputSymbols() const = 0;
315 
316  // Return output label symbol table; return nullptr if not specified.
317  virtual const SymbolTable *OutputSymbols() const = 0;
318 
319  // For generic state iterator construction (not normally called directly by
320  // users). Does not copy the FST.
321  virtual void InitStateIterator(StateIteratorData<Arc> *data) const = 0;
322 
323  // For generic arc iterator construction (not normally called directly by
324  // users). Does not copy the FST.
325  virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const = 0;
326 
327  // For generic matcher construction (not normally called directly by users).
328  // Does not copy the FST.
329  virtual MatcherBase<Arc> *InitMatcher(MatchType match_type) const;
330 
331  protected:
332  bool WriteFile(const std::string &source) const {
333  if (!source.empty()) {
334  std::ofstream strm(source,
335  std::ios_base::out | std::ios_base::binary);
336  if (!strm) {
337  LOG(ERROR) << "Fst::WriteFile: Can't open file: " << source;
338  return false;
339  }
340  if (!Write(strm, FstWriteOptions(source))) {
341  LOG(ERROR) << "Fst::WriteFile: Write failed: " << source;
342  return false;
343  }
344  return true;
345  } else {
346  return Write(std::cout, FstWriteOptions("standard output"));
347  }
348  }
349 };
350 
351 // A useful alias when using StdArc.
352 using StdFst = Fst<StdArc>;
353 
354 // State and arc iterator definitions.
355 //
356 // State iterator interface templated on the Arc definition; used for
357 // StateIterator specializations returned by the InitStateIterator FST method.
358 template <class Arc>
360  public:
361  using StateId = typename Arc::StateId;
362 
363  virtual ~StateIteratorBase() = default;
364 
365  // End of iterator?
366  virtual bool Done() const = 0;
367  // Returns current state (when !Done()).
368  virtual StateId Value() const = 0;
369  // Advances to next state (when !Done()).
370  virtual void Next() = 0;
371  // Resets to initial condition.
372  virtual void Reset() = 0;
373 };
374 
375 // StateIterator initialization data.
376 
377 template <class Arc>
378 struct StateIteratorData {
379  using StateId = typename Arc::StateId;
380 
381  // Specialized iterator if non-null.
382  std::unique_ptr<StateIteratorBase<Arc>> base;
383  // Otherwise, the total number of states.
384  StateId nstates = 0;
385 
386  StateIteratorData() = default;
387 
388  StateIteratorData(const StateIteratorData &) = delete;
389  StateIteratorData &operator=(const StateIteratorData &) = delete;
390 };
391 
392 // Generic state iterator, templated on the FST definition (a wrapper
393 // around a pointer to a specific one). Here is a typical use:
394 //
395 // for (StateIterator<StdFst> siter(fst);
396 // !siter.Done();
397 // siter.Next()) {
398 // StateId s = siter.Value();
399 // ...
400 // }
401 // There is no copying of the FST.
402 //
403 // Specializations may exist for some FST types.
404 // StateIterators are thread-unsafe unless otherwise specified.
405 template <class FST>
407  public:
408  using Arc = typename FST::Arc;
409  using StateId = typename Arc::StateId;
410 
411  explicit StateIterator(const FST &fst) {
412  fst.InitStateIterator(&data_);
413  }
414 
415  bool Done() const {
416  return data_.base ? data_.base->Done() : s_ >= data_.nstates;
417  }
418 
419  StateId Value() const { return data_.base ? data_.base->Value() : s_; }
420 
421  void Next() {
422  if (data_.base) {
423  data_.base->Next();
424  } else {
425  ++s_;
426  }
427  }
428 
429  void Reset() {
430  if (data_.base) {
431  data_.base->Reset();
432  } else {
433  s_ = 0;
434  }
435  }
436 
437  private:
439  StateId s_ = 0;
440 };
441 
442 // Flags to control the behavior on an arc iterator via SetFlags().
443 // Value() gives valid ilabel.
444 inline constexpr uint8_t kArcILabelValue = 0x01;
445 // Value() call gives valid olabel.
446 inline constexpr uint8_t kArcOLabelValue = 0x02;
447 // Value() call gives valid weight.
448 inline constexpr uint8_t kArcWeightValue = 0x04;
449 // Value() call gives valid nextstate.
450 inline constexpr uint8_t kArcNextStateValue = 0x08;
451 // Arcs need not be cached.
452 inline constexpr uint8_t kArcNoCache = 0x10;
453 inline constexpr uint8_t kArcValueFlags =
454  kArcILabelValue | kArcOLabelValue | kArcWeightValue | kArcNextStateValue;
455 inline constexpr uint8_t kArcFlags = kArcValueFlags | kArcNoCache;
456 
457 // Arc iterator interface, templated on the arc definition; used for arc
458 // iterator specializations that are returned by the InitArcIterator FST method.
459 template <class Arc>
461  public:
462  using StateId = typename Arc::StateId;
463 
464  virtual ~ArcIteratorBase() = default;
465 
466  // End of iterator?
467  virtual bool Done() const = 0;
468  // Returns current arc (when !Done()).
469  virtual const Arc &Value() const = 0;
470  // Advances to next arc (when !Done()).
471  virtual void Next() = 0;
472  // Returns current position.
473  virtual size_t Position() const = 0;
474  // Returns to initial condition.
475  virtual void Reset() = 0;
476  // Advances to arbitrary arc by position.
477  virtual void Seek(size_t) = 0;
478  // Returns current behavorial flags, a bitmask of kArcFlags.
479  virtual uint8_t Flags() const = 0;
480  // Sets behavorial flags, a bitmask of kArcFlags.
481  virtual void SetFlags(uint8_t, uint8_t) = 0;
482 };
483 
484 // ArcIterator initialization data.
485 template <class Arc>
486 struct ArcIteratorData {
487  ArcIteratorData() = default;
488 
489  ArcIteratorData(const ArcIteratorData &) = delete;
490 
491  ArcIteratorData &operator=(const ArcIteratorData &) = delete;
492 
493  std::unique_ptr<ArcIteratorBase<Arc>>
494  base; // Specialized iterator if non-null.
495  const Arc *arcs = nullptr; // O.w. arcs pointer
496  size_t narcs = 0; // ... and arc count.
497  int *ref_count = nullptr; // ... and a reference count of the
498  // `narcs`-length `arcs` array if non-null.
499 };
500 
501 // Generic arc iterator, templated on the FST definition (a wrapper around a
502 // pointer to a specific one). Here is a typical use:
503 //
504 // for (ArcIterator<StdFst> aiter(fst, s);
505 // !aiter.Done();
506 // aiter.Next()) {
507 // StdArc &arc = aiter.Value();
508 // ...
509 // }
510 // There is no copying of the FST.
511 //
512 // Specializations may exist for some FST types.
513 // ArcIterators are thread-unsafe unless otherwise specified.
514 template <class FST>
515 class ArcIterator {
516  public:
517  using Arc = typename FST::Arc;
518  using StateId = typename Arc::StateId;
519 
520  ArcIterator(const FST &fst, StateId s) {
521  fst.InitArcIterator(s, &data_);
522  }
523 
524  explicit ArcIterator(const ArcIteratorData<Arc> &data) = delete;
525 
527  if (data_.ref_count) {
528  --(*data_.ref_count);
529  }
530  }
531 
532  bool Done() const {
533  return data_.base ? data_.base->Done() : i_ >= data_.narcs;
534  }
535 
536  const Arc &Value() const {
537  return data_.base ? data_.base->Value() : data_.arcs[i_];
538  }
539 
540  void Next() {
541  if (data_.base) {
542  data_.base->Next();
543  } else {
544  ++i_;
545  }
546  }
547 
548  void Reset() {
549  if (data_.base) {
550  data_.base->Reset();
551  } else {
552  i_ = 0;
553  }
554  }
555 
556  void Seek(size_t a) {
557  if (data_.base) {
558  data_.base->Seek(a);
559  } else {
560  i_ = a;
561  }
562  }
563 
564  size_t Position() const { return data_.base ? data_.base->Position() : i_; }
565 
566  uint8_t Flags() const {
567  return data_.base ? data_.base->Flags() : kArcValueFlags;
568  }
569 
570  void SetFlags(uint8_t flags, uint8_t mask) {
571  if (data_.base) data_.base->SetFlags(flags, mask);
572  }
573 
574  private:
575  ArcIteratorData<Arc> data_;
576  size_t i_ = 0;
577 };
578 
579 } // namespace fst
580 
581 // ArcIterator placement operator new and destroy function; new needs to be in
582 // the global namespace.
583 
584 template <class FST>
585 void *operator new(size_t size,
587  return pool->Allocate();
588 }
589 
590 namespace fst {
591 
592 template <class FST>
594  if (aiter) {
595  aiter->~ArcIterator<FST>();
596  pool->Free(aiter);
597  }
598 }
599 
600 // Matcher definitions.
601 
602 template <class Arc>
604  return nullptr; // One should just use the default matcher.
605 }
606 
607 // FST accessors, useful in high-performance applications.
608 
609 namespace internal {
610 
611 // General case, requires non-abstract, 'final' methods. Use for inlining.
612 
613 template <class F>
614 inline typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
615  return fst.F::Final(s);
616 }
617 
618 template <class F>
619 inline ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
620  return fst.F::NumArcs(s);
621 }
622 
623 template <class F>
624 inline ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
625  return fst.F::NumInputEpsilons(s);
626 }
627 
628 template <class F>
629 inline ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
630  return fst.F::NumOutputEpsilons(s);
631 }
632 
633 // Fst<Arc> case, abstract methods.
634 
635 template <class Arc>
636 inline typename Arc::Weight Final(const Fst<Arc> &fst,
637  typename Arc::StateId s) {
638  return fst.Final(s);
639 }
640 
641 template <class Arc>
642 inline size_t NumArcs(const Fst<Arc> &fst, typename Arc::StateId s) {
643  return fst.NumArcs(s);
644 }
645 
646 template <class Arc>
647 inline size_t NumInputEpsilons(const Fst<Arc> &fst, typename Arc::StateId s) {
648  return fst.NumInputEpsilons(s);
649 }
650 
651 template <class Arc>
652 inline size_t NumOutputEpsilons(const Fst<Arc> &fst, typename Arc::StateId s) {
653  return fst.NumOutputEpsilons(s);
654 }
655 
656 // FST implementation base.
657 //
658 // This is the recommended FST implementation base class. It will handle
659 // reference counts, property bits, type information and symbols.
660 //
661 // Users are discouraged, but not prohibited, from subclassing this outside the
662 // FST library.
663 //
664 // This class is thread-compatible except for the const SetProperties
665 // overload. Derived classes should be assumed to be thread-unsafe unless
666 // otherwise specified. Derived-class copy constructors must produce a
667 // thread-safe copy.
668 template <class Arc>
669 class FstImpl {
670  public:
671  using StateId = typename Arc::StateId;
672  using Weight = typename Arc::Weight;
673 
674  FstImpl() = default;
675 
676  FstImpl(const FstImpl<Arc> &impl)
677  : properties_(impl.properties_.load(std::memory_order_relaxed)),
678  type_(impl.type_),
679  isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : nullptr),
680  osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : nullptr) {}
681 
682  FstImpl(FstImpl<Arc> &&impl) noexcept;
683 
684  virtual ~FstImpl() = default;
685 
686  FstImpl &operator=(const FstImpl &impl) {
687  properties_.store(impl.properties_.load(std::memory_order_relaxed),
688  std::memory_order_relaxed);
689  type_ = impl.type_;
690  isymbols_ = impl.isymbols_ ? impl.isymbols_->Copy() : nullptr;
691  osymbols_ = impl.osymbols_ ? impl.osymbols_->Copy() : nullptr;
692  return *this;
693  }
694 
695  FstImpl &operator=(FstImpl &&impl) noexcept;
696 
697  const std::string &Type() const { return type_; }
698 
699  void SetType(std::string_view type) { type_ = std::string(type); }
700 
701  virtual uint64_t Properties() const {
702  return properties_.load(std::memory_order_relaxed);
703  }
704 
705  virtual uint64_t Properties(uint64_t mask) const {
706  return properties_.load(std::memory_order_relaxed) & mask;
707  }
708 
709  void SetProperties(uint64_t props) {
710  uint64_t properties = properties_.load(std::memory_order_relaxed);
711  properties &= kError; // kError can't be cleared.
712  properties |= props;
713  properties_.store(properties, std::memory_order_relaxed);
714  }
715 
716  void SetProperties(uint64_t props, uint64_t mask) {
717  // Unlike UpdateProperties, does not require compatibility between props
718  // and properties_, since it may be used to update properties after
719  // a mutation.
720  uint64_t properties = properties_.load(std::memory_order_relaxed);
721  properties &= ~mask | kError; // kError can't be cleared.
722  properties |= props & mask;
723  properties_.store(properties, std::memory_order_relaxed);
724  }
725 
726  // Allows (only) setting error bit on const FST implementations.
727  void SetProperties(uint64_t props, uint64_t mask) const {
728  if (mask != kError) {
729  FSTERROR() << "FstImpl::SetProperties() const: Can only set kError";
730  }
731  properties_.fetch_or(kError, std::memory_order_relaxed);
732  }
733 
734  // Sets the subset of the properties that have changed, in a thread-safe
735  // manner via atomic bitwise-or..
736  void UpdateProperties(uint64_t props, uint64_t mask) {
737  // If properties_ and props are compatible (for example kAcceptor and
738  // kNoAcceptor cannot both be set), the props can be or-ed in.
739  // Compatibility is ensured if props comes from ComputeProperties
740  // and properties_ is set correctly initially. However
741  // relying on properties to be set correctly is too large an
742  // assumption, as many places set them incorrectly.
743  // Therefore, we or in only the newly discovered properties.
744  // These cannot become inconsistent, but this means that
745  // incorrectly set properties will remain incorrect.
746  const uint64_t properties = properties_.load(std::memory_order_relaxed);
747  DCHECK(internal::CompatProperties(properties, props));
748  const uint64_t old_props = properties & mask;
749  const uint64_t old_mask = internal::KnownProperties(old_props);
750  const uint64_t discovered_mask = mask & ~old_mask;
751  const uint64_t discovered_props = props & discovered_mask;
752  // It is always correct to or these bits in, but do this only when
753  // necessary to avoid extra stores and possible cache flushes.
754  if (discovered_props != 0) {
755  properties_.fetch_or(discovered_props, std::memory_order_relaxed);
756  }
757  }
758 
759  const SymbolTable *InputSymbols() const { return isymbols_.get(); }
760 
761  const SymbolTable *OutputSymbols() const { return osymbols_.get(); }
762 
763  SymbolTable *InputSymbols() { return isymbols_.get(); }
764 
765  SymbolTable *OutputSymbols() { return osymbols_.get(); }
766 
767  void SetInputSymbols(const SymbolTable *isyms) {
768  isymbols_.reset(isyms ? isyms->Copy() : nullptr);
769  }
770 
771  void SetOutputSymbols(const SymbolTable *osyms) {
772  osymbols_.reset(osyms ? osyms->Copy() : nullptr);
773  }
774 
775  // Reads header and symbols from input stream, initializes FST, and returns
776  // the header. If opts.header is non-null, skips reading and uses the option
777  // value instead. If opts.[io]symbols is non-null, reads in (if present), but
778  // uses the option value.
779  bool ReadHeader(std::istream &strm, const FstReadOptions &opts,
780  int min_version, FstHeader *hdr);
781 
782  // Writes header and symbols to output stream. If opts.header is false, skips
783  // writing header. If opts.[io]symbols is false, skips writing those symbols.
784  // This method is needed for implementations that implement Write methods.
785  void WriteHeader(std::ostream &strm, const FstWriteOptions &opts, int version,
786  FstHeader *hdr) const {
787  if (opts.write_header) {
788  hdr->SetFstType(type_);
789  hdr->SetArcType(Arc::Type());
790  hdr->SetVersion(version);
791  hdr->SetProperties(properties_.load(std::memory_order_relaxed));
792  int32_t file_flags = 0;
793  if (isymbols_ && opts.write_isymbols) {
794  file_flags |= FstHeader::HAS_ISYMBOLS;
795  }
796  if (osymbols_ && opts.write_osymbols) {
797  file_flags |= FstHeader::HAS_OSYMBOLS;
798  }
799  if (opts.align) file_flags |= FstHeader::IS_ALIGNED;
800  hdr->SetFlags(file_flags);
801  hdr->Write(strm, opts.source);
802  }
803  if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
804  if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
805  }
806 
807  // Writes out header and symbols to output stream. If opts.header is false,
808  // skips writing header. If opts.[io]symbols is false, skips writing those
809  // symbols. `type` is the FST type being written. This method is used in the
810  // cross-type serialization methods Fst::WriteFst.
811  static void WriteFstHeader(const Fst<Arc> &fst, std::ostream &strm,
812  const FstWriteOptions &opts, int version,
813  std::string_view type, uint64_t properties,
814  FstHeader *hdr) {
815  if (opts.write_header) {
816  hdr->SetFstType(type);
817  hdr->SetArcType(Arc::Type());
818  hdr->SetVersion(version);
819  hdr->SetProperties(properties);
820  int32_t file_flags = 0;
821  if (fst.InputSymbols() && opts.write_isymbols) {
822  file_flags |= FstHeader::HAS_ISYMBOLS;
823  }
824  if (fst.OutputSymbols() && opts.write_osymbols) {
825  file_flags |= FstHeader::HAS_OSYMBOLS;
826  }
827  if (opts.align) file_flags |= FstHeader::IS_ALIGNED;
828  hdr->SetFlags(file_flags);
829  hdr->Write(strm, opts.source);
830  }
831  if (fst.InputSymbols() && opts.write_isymbols) {
832  fst.InputSymbols()->Write(strm);
833  }
834  if (fst.OutputSymbols() && opts.write_osymbols) {
835  fst.OutputSymbols()->Write(strm);
836  }
837  }
838 
839  // In serialization routines where the header cannot be written until after
840  // the machine has been serialized, this routine can be called to seek to the
841  // beginning of the file an rewrite the header with updated fields. It
842  // repositions the file pointer back at the end of the file. Returns true on
843  // success, false on failure.
844  static bool UpdateFstHeader(const Fst<Arc> &fst, std::ostream &strm,
845  const FstWriteOptions &opts, int version,
846  std::string_view type, uint64_t properties,
847  FstHeader *hdr, size_t header_offset) {
848  strm.seekp(header_offset);
849  if (!strm) {
850  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
851  return false;
852  }
853  WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
854  if (!strm) {
855  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
856  return false;
857  }
858  strm.seekp(0, std::ios_base::end);
859  if (!strm) {
860  LOG(ERROR) << "Fst::UpdateFstHeader: Write failed: " << opts.source;
861  return false;
862  }
863  return true;
864  }
865 
866  protected:
867  // Use atomic so that UpdateProperties() can be thread-safe.
868  // This is always used with memory_order_relaxed because it's only used
869  // as a cache and not used to synchronize other operations.
870  mutable std::atomic<uint64_t> properties_ = 0; // Property bits.
871 
872  private:
873  std::string type_ = "null"; // Unique name of FST class.
874  std::unique_ptr<SymbolTable> isymbols_;
875  std::unique_ptr<SymbolTable> osymbols_;
876 };
877 
878 template <class Arc>
879 inline FstImpl<Arc>::FstImpl(FstImpl<Arc> &&) noexcept = default;
880 
881 template <class Arc>
882 inline FstImpl<Arc> &FstImpl<Arc>::operator=(FstImpl<Arc> &&) noexcept =
883  default;
884 
885 template <class Arc>
886 bool FstImpl<Arc>::ReadHeader(std::istream &strm, const FstReadOptions &opts,
887  int min_version, FstHeader *hdr) {
888  if (opts.header) {
889  *hdr = *opts.header;
890  } else if (!hdr->Read(strm, opts.source)) {
891  return false;
892  }
893  VLOG(2) << "FstImpl::ReadHeader: source: " << opts.source
894  << ", fst_type: " << hdr->FstType() << ", arc_type: " << Arc::Type()
895  << ", version: " << hdr->Version() << ", flags: " << hdr->GetFlags();
896  if (hdr->FstType() != type_) {
897  LOG(ERROR) << "FstImpl::ReadHeader: FST not of type " << type_ << ", found "
898  << hdr->FstType() << ": " << opts.source;
899  return false;
900  }
901  if (hdr->ArcType() != Arc::Type()) {
902  LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type " << Arc::Type()
903  << ", found " << hdr->ArcType() << ": " << opts.source;
904  return false;
905  }
906  if (hdr->Version() < min_version) {
907  LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_ << " FST version "
908  << hdr->Version() << ", min_version=" << min_version << ": "
909  << opts.source;
910  return false;
911  }
912  properties_.store(hdr->Properties(), std::memory_order_relaxed);
913  if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) {
914  isymbols_.reset(SymbolTable::Read(strm, opts.source));
915  }
916  // Deletes input symbol table.
917  if (!opts.read_isymbols) SetInputSymbols(nullptr);
918  if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) {
919  osymbols_.reset(SymbolTable::Read(strm, opts.source));
920  }
921  // Deletes output symbol table.
922  if (!opts.read_osymbols) SetOutputSymbols(nullptr);
923  if (opts.isymbols) {
924  isymbols_.reset(opts.isymbols->Copy());
925  }
926  if (opts.osymbols) {
927  osymbols_.reset(opts.osymbols->Copy());
928  }
929  return true;
930 }
931 
932 } // namespace internal
933 
934 // Converts FSTs by casting their implementations, where this makes sense
935 // (which excludes implementations with weight-dependent virtual methods).
936 // Must be a friend of the FST classes involved (currently the concrete FSTs:
937 // ConstFst, CompactFst, and VectorFst). This can only be safely used for arc
938 // types that have identical storage characteristics. As with an FST
939 // copy constructor and Copy() method, this is a constant time operation
940 // (but subject to copy-on-write if it is a MutableFst and modified).
941 template <class IFST, class OFST>
942 void Cast(const IFST &ifst, OFST *ofst) {
943  using OImpl = typename OFST::Impl;
944  ofst->impl_ = std::shared_ptr<OImpl>(
945  ifst.impl_, reinterpret_cast<OImpl *>(ifst.impl_.get()));
946 }
947 
948 // FST serialization.
949 
950 template <class Arc>
951 std::string FstToString(
952  const Fst<Arc> &fst,
953  const FstWriteOptions &options = FstWriteOptions("FstToString")) {
954  std::ostringstream ostrm;
955  fst.Write(ostrm, options);
956  return ostrm.str();
957 }
958 
959 template <class Arc>
960 void FstToString(const Fst<Arc> &fst, std::string *result) {
961  *result = FstToString(fst);
962 }
963 
964 template <class Arc>
965 void FstToString(const Fst<Arc> &fst, std::string *result,
966  const FstWriteOptions &options) {
967  *result = FstToString(fst, options);
968 }
969 
970 template <class Arc>
971 Fst<Arc> *StringToFst(std::string_view s) {
972  std::istringstream istrm((std::string(s)));
973  return Fst<Arc>::Read(istrm, FstReadOptions("StringToFst"));
974 }
975 
976 } // namespace fst
977 
978 #endif // FST_FST_H_
size_t Position() const
Definition: fst.h:564
bool Write(std::ostream &strm) const
Definition: symbol-table.h:483
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:56
void SetProperties(uint64_t props, uint64_t mask) const
Definition: fst.h:727
void SetProperties(uint64_t props)
Definition: fst.h:709
constexpr int32_t kFstMagicNumber
Definition: fst.h:57
void SetArcType(std::string_view type)
Definition: fst.h:154
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
typename ArcMapFst< Arc, Arc, EncodeMapper< Arc > >::Arc Arc
Definition: fst.h:517
constexpr int kNoLabel
Definition: fst.h:195
const FstHeader * header
Definition: fst.h:74
void Cast(const F &, G *)
constexpr uint8_t kArcNoCache
Definition: fst.h:452
void Reset()
Definition: fst.h:548
static Fst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: fst.h:257
virtual size_t NumArcs(StateId) const =0
bool CompatProperties(uint64_t props1, uint64_t props2)
Definition: properties.h:507
std::string source
Definition: fst.h:73
const SymbolTable * OutputSymbols() const
Definition: fst.h:761
Fst< Arc > * StringToFst(std::string_view s)
Definition: fst.h:971
MatchType
Definition: fst.h:187
void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags, const char *src="")
Definition: flags.cc:57
virtual SymbolTable * Copy() const
Definition: symbol-table.h:411
constexpr uint8_t kArcFlags
Definition: fst.h:455
bool Write(std::ostream &strm, std::string_view source) const
Definition: fst.cc:85
constexpr uint64_t kError
Definition: properties.h:52
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:293
size_t NumArcs(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:642
#define LOG(type)
Definition: log.h:53
virtual Weight Final(StateId) const =0
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:771
void SetNumArcs(int64_t numarcs)
Definition: fst.h:166
typename RandGenFst< FromArc, ToArc, Sampler >::Arc::StateId StateId
Definition: fst.h:361
static Fst * Read(const std::string &source)
Definition: fst.h:278
typename ArcMapFst< Arc, Arc, EncodeMapper< Arc > >::Arc Arc
Definition: fst.h:408
typename FST::Arc::StateId StateId
Definition: fst.h:462
void SetProperties(uint64_t properties)
Definition: fst.h:160
constexpr uint8_t kArcILabelValue
Definition: fst.h:444
bool read_isymbols
Definition: fst.h:81
DECLARE_bool(fst_align)
~ArcIterator()
Definition: fst.h:526
bool stream_write
Definition: fst.h:107
const SymbolTable * osymbols
Definition: fst.h:78
FstReadOptions(const std::string_view source="<unspecified>", const FstHeader *header=nullptr, const SymbolTable *isymbols=nullptr, const SymbolTable *osymbols=nullptr)
Definition: fst.cc:108
constexpr int kNoStateId
Definition: fst.h:196
SymbolTable * InputSymbols()
Definition: fst.h:763
const Arc & Value() const
Definition: fst.h:536
uint32_t GetFlags() const
Definition: fst.h:142
virtual size_t NumInputEpsilons(StateId) const =0
ArcIterator(const FST &fst, StateId s)
Definition: fst.h:520
std::string source
Definition: fst.h:102
#define FSTERROR()
Definition: util.h:56
const std::string & FstType() const
Definition: fst.h:136
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr)
Definition: fst.h:811
void SetFlags(uint8_t flags, uint8_t mask)
Definition: fst.h:570
size_t NumOutputEpsilons(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:652
SymbolTable * OutputSymbols()
Definition: fst.h:765
bool write_osymbols
Definition: fst.h:105
typename Arc::StateId StateId
Definition: fst.h:379
virtual uint64_t Properties() const
Definition: fst.h:701
void Seek(size_t a)
Definition: fst.h:556
bool write_isymbols
Definition: fst.h:104
uint8_t Flags() const
Definition: fst.h:566
static bool UpdateFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr, size_t header_offset)
Definition: fst.h:844
std::string DebugString() const
Definition: fst.cc:133
uint64_t KnownProperties(uint64_t props)
Definition: properties.h:500
constexpr uint8_t kArcOLabelValue
Definition: fst.h:446
#define VLOG(level)
Definition: log.h:54
void WriteHeader(std::ostream &strm, const FstWriteOptions &opts, int version, FstHeader *hdr) const
Definition: fst.h:785
StateId Value() const
Definition: fst.h:419
bool Done() const
Definition: fst.h:532
typename S::Arc::Weight Weight
Definition: fst.h:672
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
std::unique_ptr< ArcIteratorBase< Arc > > base
Definition: fst.h:494
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:767
bool write_header
Definition: fst.h:103
FstWriteOptions(std::string_view source="<unspecified>", bool write_header=true, bool write_isymbols=true, bool write_osymbols=true, bool align=FST_FLAGS_fst_align, bool stream_write=false)
Definition: fst.h:109
const std::string & Type() const
Definition: fst.h:697
virtual std::optional< StateId > NumStatesIfKnown() const
Definition: fst.h:228
bool WriteFile(const std::string &source) const
Definition: fst.h:332
static FileReadMode ReadMode(std::string_view mode)
Definition: fst.cc:126
constexpr uint8_t kArcWeightValue
Definition: fst.h:448
FstImpl(const FstImpl< Arc > &impl)
Definition: fst.h:676
int64_t NumArcs() const
Definition: fst.h:150
void SetNumStates(int64_t numstates)
Definition: fst.h:164
Arc::Weight Final(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:636
typename internal::SynchronizeFstImpl< A >::Arc Arc
Definition: fst.h:205
virtual bool Write(const std::string &source) const
Definition: fst.h:301
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:759
void Next()
Definition: fst.h:421
constexpr uint8_t kArcNextStateValue
Definition: fst.h:450
int64_t NumStates() const
Definition: fst.h:148
void SetType(std::string_view type)
Definition: fst.h:699
bool Done() const
Definition: fst.h:415
void SetFstType(std::string_view type)
Definition: fst.h:152
void SetStart(int64_t start)
Definition: fst.h:162
virtual MatcherBase< Arc > * InitMatcher(MatchType match_type) const
Definition: fst.h:603
FileReadMode mode
Definition: fst.h:80
int64_t Start() const
Definition: fst.h:146
#define DCHECK(x)
Definition: log.h:74
const SymbolTable * isymbols
Definition: fst.h:76
int32_t Version() const
Definition: fst.h:140
void Reset()
Definition: fst.h:429
size_t NumInputEpsilons(const Fst< Arc > &fst, typename Arc::StateId s)
Definition: fst.h:647
void SetVersion(int32_t version)
Definition: fst.h:156
virtual size_t NumOutputEpsilons(StateId) const =0
void SetFlags(uint32_t flags)
Definition: fst.h:158
bool read_osymbols
Definition: fst.h:82
std::string FstToString(const Fst< Arc > &fst, const FstWriteOptions &options=FstWriteOptions("FstToString"))
Definition: fst.h:951
static SymbolTable * Read(std::istream &strm, std::string_view source)
Definition: symbol-table.h:395
const std::string & ArcType() const
Definition: fst.h:138
virtual uint64_t Properties(uint64_t mask) const
Definition: fst.h:705
void Destroy(ArcIterator< FST > *aiter, MemoryPool< ArcIterator< FST >> *pool)
Definition: fst.h:593
FstImpl & operator=(const FstImpl &impl)
Definition: fst.h:686
void UpdateProperties(uint64_t props, uint64_t mask)
Definition: fst.h:736
typename S::Arc::StateId StateId
Definition: fst.h:671
void SetProperties(uint64_t props, uint64_t mask)
Definition: fst.h:716
uint64_t Properties() const
Definition: fst.h:144
std::atomic< uint64_t > properties_
Definition: fst.h:870
virtual const SymbolTable * OutputSymbols() const =0
void Next()
Definition: fst.h:540
StateIterator(const FST &fst)
Definition: fst.h:411