FST  openfst-1.7.2
OpenFst Library
compact-fst.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // FST Class for memory-efficient representation of common types of
5 // FSTs: linear automata, acceptors, unweighted FSTs, ...
6 
7 #ifndef FST_COMPACT_FST_H_
8 #define FST_COMPACT_FST_H_
9 
10 #include <climits>
11 #include <iterator>
12 #include <memory>
13 #include <tuple>
14 #include <utility>
15 #include <vector>
16 
17 #include <fst/log.h>
18 
19 #include <fst/cache.h>
20 #include <fst/expanded-fst.h>
21 #include <fst/fst-decl.h> // For optional argument declarations
22 #include <fst/mapped-file.h>
23 #include <fst/matcher.h>
24 #include <fst/test-properties.h>
25 #include <fst/util.h>
26 
27 
28 namespace fst {
29 
31  // The default caching behaviour is to do no caching. Most compactors are
32  // cheap and therefore we save memory by not doing caching.
34 
35  explicit CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
36 };
37 
38 // New upcoming (Fst) Compactor interface - currently used internally
39 // by CompactFstImpl.
40 //
41 // class Compactor {
42 // public:
43 // // Constructor from the Fst to be compacted.
44 // Compactor(const Fst<Arc> &fst, ...);
45 // // Copy constructor
46 // Compactor(const Compactor &compactor, bool safe = false)
47 // // Default constructor (optional, see comment below).
48 // Compactor();
49 //
50 // // Returns the start state, number of states, and total number of arcs
51 // // of the compacted Fst
52 // StateId Start() const;
53 // StateId NumStates() const;
54 // size_t NumArcs() const;
55 //
56 // // Accessor class for state attributes.
57 // class State {
58 // public:
59 // State(); // Required, corresponds to kNoStateId.
60 // State(const Compactor *c, StateId); // Accessor for StateId 's'.
61 // StateId GetStateId() const;
62 // Weight Final() const;
63 // size_t NumArcs() const;
64 // Arc GetArc(size_t i, uint32 f) const;
65 // };
66 //
67 // // Modifies 'state' accessor to provide access to state id 's'.
68 // void SetState(StateId s, State *state);
69 // // Tests whether 'fst' can be compacted by this compactor.
70 // bool IsCompatible(const Fst<A> &fst) const;
71 // // Return the properties that are always true for an fst
72 // // compacted using this compactor
73 // uint64 Properties() const;
74 // // Return a string identifying the type of compactor.
75 // static const string &Type();
76 // // Return true if an error has occured.
77 // bool Error() const;
78 // // Writes a compactor to a file.
79 // bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
80 // // Reads a compactor from a file.
81 // static Compactor*Read(std::istream &strm, const FstReadOptions &opts,
82 // const FstHeader &hdr);
83 // };
84 //
85 
86 // Old (Arc) Compactor Interface:
87 //
88 // The ArcCompactor class determines how arcs and final weights are compacted
89 // and expanded.
90 //
91 // Final weights are treated as transitions to the superfinal state, i.e.,
92 // ilabel = olabel = kNoLabel and nextstate = kNoStateId.
93 //
94 // There are two types of compactors:
95 //
96 // * Fixed out-degree compactors: 'compactor.Size()' returns a positive integer
97 // 's'. An FST can be compacted by this compactor only if each state has
98 // exactly 's' outgoing transitions (counting a non-Zero() final weight as a
99 // transition). A typical example is a compactor for string FSTs, i.e.,
100 // 's == 1'.
101 //
102 // * Variable out-degree compactors: 'compactor.Size() == -1'. There are no
103 // out-degree restrictions for these compactors.
104 //
105 // Interface:
106 //
107 // class ArcCompactor {
108 // public:
109 // // Element is the type of the compacted transitions.
110 // using Element = ...
111 //
112 // // Returns the compacted representation of a transition 'arc'
113 // // at a state 's'.
114 // Element Compact(StateId s, const Arc &arc);
115 //
116 // // Returns the transition at state 's' represented by the compacted
117 // // transition 'e'.
118 // Arc Expand(StateId s, const Element &e) const;
119 //
120 // // Returns -1 for variable out-degree compactors, and the mandatory
121 // // out-degree otherwise.
122 // ssize_t Size() const;
123 //
124 // // Tests whether an FST can be compacted by this compactor.
125 // bool Compatible(const Fst<A> &fst) const;
126 //
127 // // Returns the properties that are always true for an FST compacted using
128 // // this compactor
129 // uint64 Properties() const;
130 //
131 // // Returns a string identifying the type of compactor.
132 // static const string &Type();
133 //
134 // // Writes a compactor to a file.
135 // bool Write(std::ostream &strm) const;
136 //
137 // // Reads a compactor from a file.
138 // static ArcCompactor *Read(std::istream &strm);
139 //
140 // // Default constructor (optional, see comment below).
141 // ArcCompactor();
142 // };
143 //
144 // The default constructor is only required for FST_REGISTER to work (i.e.,
145 // enabling Convert() and the command-line utilities to work with this new
146 // compactor). However, a default constructor always needs to be specified for
147 // this code to compile, but one can have it simply raise an error when called,
148 // like so:
149 //
150 // Compactor::Compactor() {
151 // FSTERROR() << "Compactor: No default constructor";
152 // }
153 
154 // Default implementation data for CompactFst, which can shared between
155 // otherwise independent copies.
156 //
157 // The implementation contains two arrays: 'states_' and 'compacts_'.
158 //
159 // For fixed out-degree compactors, the 'states_' array is unallocated. The
160 // 'compacts_' contains the compacted transitions. Its size is 'ncompacts_'.
161 // The outgoing transitions at a given state are stored consecutively. For a
162 // given state 's', its 'compactor.Size()' outgoing transitions (including
163 // superfinal transition when 's' is final), are stored in position
164 // ['s*compactor.Size()', '(s+1)*compactor.Size()').
165 //
166 // For variable out-degree compactors, the states_ array has size
167 // 'nstates_ + 1' and contains pointers to positions into 'compacts_'. For a
168 // given state 's', the compacted transitions of 's' are stored in positions
169 // ['states_[s]', 'states_[s + 1]') in 'compacts_'. By convention,
170 // 'states_[nstates_] == ncompacts_'.
171 //
172 // In both cases, the superfinal transitions (when 's' is final, i.e.,
173 // 'Final(s) != Weight::Zero()') are stored first.
174 //
175 // The unsigned type U is used to represent indices into the compacts_ array.
176 template <class Element, class Unsigned>
178  public:
180  : states_(nullptr),
181  compacts_(nullptr),
182  nstates_(0),
183  ncompacts_(0),
184  narcs_(0),
185  start_(kNoStateId),
186  error_(false) {}
187 
188  template <class Arc, class Compactor>
189  DefaultCompactStore(const Fst<Arc> &fst, const Compactor &compactor);
190 
191  template <class Iterator, class Compactor>
192  DefaultCompactStore(const Iterator &begin, const Iterator &end,
193  const Compactor &compactor);
194 
196  if (!states_region_) delete[] states_;
197  if (!compacts_region_) delete[] compacts_;
198  }
199 
200  template <class Compactor>
202  std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr,
203  const Compactor &compactor);
204 
205  bool Write(std::ostream &strm, const FstWriteOptions &opts) const;
206 
207  Unsigned States(ssize_t i) const { return states_[i]; }
208 
209  const Element &Compacts(size_t i) const { return compacts_[i]; }
210 
211  size_t NumStates() const { return nstates_; }
212 
213  size_t NumCompacts() const { return ncompacts_; }
214 
215  size_t NumArcs() const { return narcs_; }
216 
217  ssize_t Start() const { return start_; }
218 
219  bool Error() const { return error_; }
220 
221  // Returns a string identifying the type of data storage container.
222  static const string &Type();
223 
224  private:
225  std::unique_ptr<MappedFile> states_region_;
226  std::unique_ptr<MappedFile> compacts_region_;
227  Unsigned *states_;
228  Element *compacts_;
229  size_t nstates_;
230  size_t ncompacts_;
231  size_t narcs_;
232  ssize_t start_;
233  bool error_;
234 };
235 
236 template <class Element, class Unsigned>
237 template <class Arc, class Compactor>
239  const Fst<Arc> &fst, const Compactor &compactor)
240  : states_(nullptr),
241  compacts_(nullptr),
242  nstates_(0),
243  ncompacts_(0),
244  narcs_(0),
245  start_(kNoStateId),
246  error_(false) {
247  using StateId = typename Arc::StateId;
248  using Weight = typename Arc::Weight;
249  start_ = fst.Start();
250  // Counts # of states and arcs.
251  StateId nfinals = 0;
252  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
253  ++nstates_;
254  const auto s = siter.Value();
255  narcs_ += fst.NumArcs(s);
256  if (fst.Final(s) != Weight::Zero()) ++nfinals;
257  }
258  if (compactor.Size() == -1) {
259  states_ = new Unsigned[nstates_ + 1];
260  ncompacts_ = narcs_ + nfinals;
261  compacts_ = new Element[ncompacts_];
262  states_[nstates_] = ncompacts_;
263  } else {
264  states_ = nullptr;
265  ncompacts_ = nstates_ * compactor.Size();
266  if ((narcs_ + nfinals) != ncompacts_) {
267  FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST";
268  error_ = true;
269  return;
270  }
271  compacts_ = new Element[ncompacts_];
272  }
273  size_t pos = 0;
274  size_t fpos = 0;
275  for (size_t s = 0; s < nstates_; ++s) {
276  fpos = pos;
277  if (compactor.Size() == -1) states_[s] = pos;
278  if (fst.Final(s) != Weight::Zero()) {
279  compacts_[pos++] = compactor.Compact(
280  s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
281  }
282  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
283  compacts_[pos++] = compactor.Compact(s, aiter.Value());
284  }
285  if ((compactor.Size() != -1) && (pos != fpos + compactor.Size())) {
286  FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST";
287  error_ = true;
288  return;
289  }
290  }
291  if (pos != ncompacts_) {
292  FSTERROR() << "DefaultCompactStore: Compactor incompatible with FST";
293  error_ = true;
294  return;
295  }
296 }
297 
298 template <class Element, class Unsigned>
299 template <class Iterator, class Compactor>
301  const Iterator &begin, const Iterator &end, const Compactor &compactor)
302  : states_(nullptr),
303  compacts_(nullptr),
304  nstates_(0),
305  ncompacts_(0),
306  narcs_(0),
307  start_(kNoStateId),
308  error_(false) {
309  using Arc = typename Compactor::Arc;
310  using Weight = typename Arc::Weight;
311  if (compactor.Size() != -1) {
312  ncompacts_ = std::distance(begin, end);
313  if (compactor.Size() == 1) {
314  // For strings, allows implicit final weight. Empty input is the empty
315  // string.
316  if (ncompacts_ == 0) {
317  ++ncompacts_;
318  } else {
319  const auto arc =
320  compactor.Expand(ncompacts_ - 1, *(begin + (ncompacts_ - 1)));
321  if (arc.ilabel != kNoLabel) ++ncompacts_;
322  }
323  }
324  if (ncompacts_ % compactor.Size()) {
325  FSTERROR() << "DefaultCompactStore: Size of input container incompatible"
326  << " with compactor";
327  error_ = true;
328  return;
329  }
330  if (ncompacts_ == 0) return;
331  start_ = 0;
332  nstates_ = ncompacts_ / compactor.Size();
333  compacts_ = new Element[ncompacts_];
334  size_t i = 0;
335  Iterator it = begin;
336  for (; it != end; ++it, ++i) {
337  compacts_[i] = *it;
338  if (compactor.Expand(i, *it).ilabel != kNoLabel) ++narcs_;
339  }
340  if (i < ncompacts_) {
341  compacts_[i] = compactor.Compact(
342  i, Arc(kNoLabel, kNoLabel, Weight::One(), kNoStateId));
343  }
344  } else {
345  if (std::distance(begin, end) == 0) return;
346  // Count # of states, arcs and compacts.
347  auto it = begin;
348  for (size_t i = 0; it != end; ++it, ++i) {
349  const auto arc = compactor.Expand(i, *it);
350  if (arc.ilabel != kNoLabel) {
351  ++narcs_;
352  ++ncompacts_;
353  } else {
354  ++nstates_;
355  if (arc.weight != Weight::Zero()) ++ncompacts_;
356  }
357  }
358  start_ = 0;
359  compacts_ = new Element[ncompacts_];
360  states_ = new Unsigned[nstates_ + 1];
361  states_[nstates_] = ncompacts_;
362  size_t i = 0;
363  size_t s = 0;
364  for (it = begin; it != end; ++it) {
365  const auto arc = compactor.Expand(i, *it);
366  if (arc.ilabel != kNoLabel) {
367  compacts_[i++] = *it;
368  } else {
369  states_[s++] = i;
370  if (arc.weight != Weight::Zero()) compacts_[i++] = *it;
371  }
372  }
373  if ((s != nstates_) || (i != ncompacts_)) {
374  FSTERROR() << "DefaultCompactStore: Ill-formed input container";
375  error_ = true;
376  return;
377  }
378  }
379 }
380 
381 template <class Element, class Unsigned>
382 template <class Compactor>
385  const FstReadOptions &opts,
386  const FstHeader &hdr,
387  const Compactor &compactor) {
388  std::unique_ptr<DefaultCompactStore<Element, Unsigned>> data(
390  data->start_ = hdr.Start();
391  data->nstates_ = hdr.NumStates();
392  data->narcs_ = hdr.NumArcs();
393  if (compactor.Size() == -1) {
394  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
395  LOG(ERROR) << "DefaultCompactStore::Read: Alignment failed: "
396  << opts.source;
397  return nullptr;
398  }
399  auto b = (data->nstates_ + 1) * sizeof(Unsigned);
400  data->states_region_.reset(MappedFile::Map(
401  &strm, opts.mode == FstReadOptions::MAP, opts.source, b));
402  if (!strm || !data->states_region_) {
403  LOG(ERROR) << "DefaultCompactStore::Read: Read failed: " << opts.source;
404  return nullptr;
405  }
406  data->states_ =
407  static_cast<Unsigned *>(data->states_region_->mutable_data());
408  } else {
409  data->states_ = nullptr;
410  }
411  data->ncompacts_ = compactor.Size() == -1 ? data->states_[data->nstates_]
412  : data->nstates_ * compactor.Size();
413  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
414  LOG(ERROR) << "DefaultCompactStore::Read: Alignment failed: "
415  << opts.source;
416  return nullptr;
417  }
418  size_t b = data->ncompacts_ * sizeof(Element);
419  data->compacts_region_.reset(
420  MappedFile::Map(&strm, opts.mode == FstReadOptions::MAP, opts.source, b));
421  if (!strm || !data->compacts_region_) {
422  LOG(ERROR) << "DefaultCompactStore::Read: Read failed: " << opts.source;
423  return nullptr;
424  }
425  data->compacts_ =
426  static_cast<Element *>(data->compacts_region_->mutable_data());
427  return data.release();
428 }
429 
430 template <class Element, class Unsigned>
432  std::ostream &strm, const FstWriteOptions &opts) const {
433  if (states_) {
434  if (opts.align && !AlignOutput(strm)) {
435  LOG(ERROR) << "DefaultCompactStore::Write: Alignment failed: "
436  << opts.source;
437  return false;
438  }
439  strm.write(reinterpret_cast<char *>(states_),
440  (nstates_ + 1) * sizeof(Unsigned));
441  }
442  if (opts.align && !AlignOutput(strm)) {
443  LOG(ERROR) << "DefaultCompactStore::Write: Alignment failed: "
444  << opts.source;
445  return false;
446  }
447  strm.write(reinterpret_cast<char *>(compacts_), ncompacts_ * sizeof(Element));
448  strm.flush();
449  if (!strm) {
450  LOG(ERROR) << "DefaultCompactStore::Write: Write failed: " << opts.source;
451  return false;
452  }
453  return true;
454 }
455 
456 template <class Element, class Unsigned>
458  static const string *const type = new string("compact");
459  return *type;
460 }
461 
462 template <class C, class U, class S> class DefaultCompactState;
463 
464 // Wraps an arc compactor and a compact store as a new Fst compactor.
465 template <class C, class U,
468  public:
469  using ArcCompactor = C;
470  using Unsigned = U;
471  using CompactStore = S;
472  using Element = typename C::Element;
473  using Arc = typename C::Arc;
474  using StateId = typename Arc::StateId;
475  using Weight = typename Arc::Weight;
477  friend State;
478 
480  : arc_compactor_(nullptr), compact_store_(nullptr) {}
481 
482  // Constructs from Fst.
484  std::shared_ptr<ArcCompactor> arc_compactor)
485  : arc_compactor_(std::move(arc_compactor)),
486  compact_store_(std::make_shared<S>(fst, *arc_compactor_)) {}
487 
489  std::shared_ptr<DefaultCompactor<C, U, S>> compactor)
490  : arc_compactor_(compactor->arc_compactor_),
491  compact_store_(compactor->compact_store_ == nullptr ?
492  std::make_shared<S>(fst, *arc_compactor_) :
493  compactor->compact_store_) {}
494 
495  // Constructs from CompactStore.
496  DefaultCompactor(std::shared_ptr<ArcCompactor> arc_compactor,
497  std::shared_ptr<CompactStore> compact_store)
498  : arc_compactor_(std::move(arc_compactor)),
499  compact_store_(std::move(compact_store)) {}
500 
501  // Constructs from set of compact elements (when arc_compactor.Size() != -1).
502  template <class Iterator>
503  DefaultCompactor(const Iterator &b, const Iterator &e,
504  std::shared_ptr<C> arc_compactor)
505  : arc_compactor_(std::move(arc_compactor)),
506  compact_store_(std::make_shared<S>(b, e, *arc_compactor_)) {}
507 
508  // Copy constructor.
510  : arc_compactor_(std::make_shared<C>(*compactor.GetArcCompactor())),
511  compact_store_(compactor.SharedCompactStore()) {}
512 
513  template <class OtherC>
515  : arc_compactor_(std::make_shared<C>(*compactor.GetArcCompactor())),
516  compact_store_(compactor.SharedCompactStore()) {}
517 
518  StateId Start() const { return compact_store_->Start(); }
519  StateId NumStates() const { return compact_store_->NumStates(); }
520  size_t NumArcs() const { return compact_store_->NumArcs(); }
521 
522  void SetState(StateId s, State *state) const {
523  if (state->GetStateId() != s) state->Set(this, s);
524  }
525 
526  static DefaultCompactor<C, U, S> *Read(std::istream &strm,
527  const FstReadOptions &opts,
528  const FstHeader &hdr) {
529  std::shared_ptr<C> arc_compactor(C::Read(strm));
530  if (arc_compactor == nullptr) return nullptr;
531  std::shared_ptr<S> compact_store(S::Read(strm, opts, hdr, *arc_compactor));
532  if (compact_store == nullptr) return nullptr;
533  return new DefaultCompactor<C, U, S>(arc_compactor, compact_store);
534  }
535 
536  bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
537  return arc_compactor_->Write(strm) && compact_store_->Write(strm, opts);
538  }
539 
540  uint64 Properties() const { return arc_compactor_->Properties(); }
541 
542  bool IsCompatible(const Fst<Arc> &fst) const {
543  return arc_compactor_->Compatible(fst);
544  }
545 
546  bool Error() const { return compact_store_->Error(); }
547 
548  bool HasFixedOutdegree() const { return arc_compactor_->Size() != -1; }
549 
550  static const string &Type() {
551  static const string *const type = [] {
552  string type = "compact";
553  if (sizeof(U) != sizeof(uint32)) type += std::to_string(8 * sizeof(U));
554  type += "_";
555  type += C::Type();
556  if (CompactStore::Type() != "compact") {
557  type += "_";
558  type += CompactStore::Type();
559  }
560  return new string(type);
561  }();
562  return *type;
563  }
564 
565  const ArcCompactor *GetArcCompactor() const { return arc_compactor_.get(); }
566  CompactStore *GetCompactStore() const { return compact_store_.get(); }
567 
568  std::shared_ptr<ArcCompactor> SharedArcCompactor() const {
569  return arc_compactor_;
570  }
571 
572  std::shared_ptr<CompactStore> SharedCompactStore() const {
573  return compact_store_;
574  }
575 
576  // TODO(allauzen): remove dependencies on this method and make private.
578  return arc_compactor_->Expand(s, compact_store_->Compacts(i), f);
579  }
580 
581  private:
582  std::pair<Unsigned, Unsigned> CompactsRange(StateId s) const {
583  std::pair<size_t, size_t> range;
584  if (HasFixedOutdegree()) {
585  range.first = s * arc_compactor_->Size();
586  range.second = arc_compactor_->Size();
587  } else {
588  range.first = compact_store_->States(s);
589  range.second = compact_store_->States(s + 1) - range.first;
590  }
591  return range;
592  }
593 
594  private:
595  std::shared_ptr<ArcCompactor> arc_compactor_;
596  std::shared_ptr<CompactStore> compact_store_;
597 };
598 
599 // Default implementation of state attributes accessor class for
600 // DefaultCompactor. Use of efficient specialization strongly encouraged.
601 template <class C, class U, class S>
602 class DefaultCompactState {
603  public:
604  using Arc = typename C::Arc;
605  using StateId = typename Arc::StateId;
606  using Weight = typename Arc::Weight;
607 
608  DefaultCompactState() = default;
609 
611  : compactor_(compactor),
612  s_(s),
613  range_(compactor->CompactsRange(s)),
614  has_final_(
615  range_.second != 0 &&
616  compactor->ComputeArc(s, range_.first,
617  kArcILabelValue).ilabel == kNoLabel) {
618  if (has_final_) {
619  ++range_.first;
620  --range_.second;
621  }
622  }
623 
624  void Set(const DefaultCompactor<C, U, S> *compactor, StateId s) {
625  compactor_ = compactor;
626  s_ = s;
627  range_ = compactor->CompactsRange(s);
628  if (range_.second != 0 &&
629  compactor->ComputeArc(s, range_.first, kArcILabelValue).ilabel
630  == kNoLabel) {
631  has_final_ = true;
632  ++range_.first;
633  --range_.second;
634  } else {
635  has_final_ = false;
636  }
637  }
638 
639  StateId GetStateId() const { return s_; }
640 
641  Weight Final() const {
642  if (!has_final_) return Weight::Zero();
643  return compactor_->ComputeArc(s_, range_.first - 1, kArcWeightValue).weight;
644  }
645 
646  size_t NumArcs() const { return range_.second; }
647 
648  Arc GetArc(size_t i, uint32 f) const {
649  return compactor_->ComputeArc(s_, range_.first + i, f);
650  }
651 
652  private:
653  const DefaultCompactor<C, U, S> *compactor_ = nullptr; // borrowed ref.
654  StateId s_ = kNoStateId;
655  std::pair<U, U> range_ = {0, 0};
656  bool has_final_ = false;
657 };
658 
659 // Specialization for DefaultCompactStore.
660 template <class C, class U>
661 class DefaultCompactState<C, U, DefaultCompactStore<typename C::Element, U>> {
662  public:
663  using Arc = typename C::Arc;
664  using StateId = typename Arc::StateId;
665  using Weight = typename Arc::Weight;
667 
668  DefaultCompactState() = default;
669 
671  const DefaultCompactor<C, U, CompactStore> *compactor, StateId s)
672  : arc_compactor_(compactor->GetArcCompactor()), s_(s) {
673  Init(compactor);
674  }
675 
677  arc_compactor_ = compactor->GetArcCompactor();
678  s_ = s;
679  has_final_ = false;
680  Init(compactor);
681  }
682 
683  StateId GetStateId() const { return s_; }
684 
685  Weight Final() const {
686  if (!has_final_) return Weight::Zero();
687  return arc_compactor_->Expand(s_, *(compacts_ - 1), kArcWeightValue).weight;
688  }
689 
690  size_t NumArcs() const { return num_arcs_; }
691 
692  Arc GetArc(size_t i, uint32 f) const {
693  return arc_compactor_->Expand(s_, compacts_[i], f);
694  }
695 
696  private:
697  void Init(const DefaultCompactor<C, U, CompactStore> *compactor) {
698  const auto *store = compactor->GetCompactStore();
699  U offset;
700  if (!compactor->HasFixedOutdegree()) { // Variable out-degree compactor.
701  offset = store->States(s_);
702  num_arcs_ = store->States(s_ + 1) - offset;
703  } else { // Fixed out-degree compactor.
704  offset = s_ * arc_compactor_->Size();
705  num_arcs_ = arc_compactor_->Size();
706  }
707  if (num_arcs_ > 0) {
708  compacts_ = &(store->Compacts(offset));
709  if (arc_compactor_->Expand(s_, *compacts_, kArcILabelValue).ilabel
710  == kNoStateId) {
711  ++compacts_;
712  --num_arcs_;
713  has_final_ = true;
714  }
715  }
716  }
717 
718  private:
719  const C *arc_compactor_ = nullptr; // Borrowed reference.
720  const typename C::Element *compacts_ = nullptr; // Borrowed reference.
721  StateId s_ = kNoStateId;
722  U num_arcs_ = 0;
723  bool has_final_ = false;
724 };
725 
726 template <class Arc, class ArcCompactor, class Unsigned, class CompactStore,
727  class CacheStore>
729 
730 template <class F, class G>
731 void Cast(const F &, G *);
732 
733 namespace internal {
734 
735 // Implementation class for CompactFst, which contains parametrizeable
736 // Fst data storage (DefaultCompactStore by default) and Fst cache.
737 template <class Arc, class C, class CacheStore = DefaultCacheStore<Arc>>
739  : public CacheBaseImpl<typename CacheStore::State, CacheStore> {
740  public:
741  using Weight = typename Arc::Weight;
742  using StateId = typename Arc::StateId;
743  using Compactor = C;
744 
745  using FstImpl<Arc>::SetType;
751 
753  using ImplBase::PushArc;
754  using ImplBase::HasArcs;
755  using ImplBase::HasFinal;
756  using ImplBase::HasStart;
757  using ImplBase::SetArcs;
758  using ImplBase::SetFinal;
759  using ImplBase::SetStart;
760 
763  compactor_() {
764  SetType(Compactor::Type());
765  SetProperties(kNullProperties | kStaticProperties);
766  }
767 
768  CompactFstImpl(const Fst<Arc> &fst, std::shared_ptr<Compactor> compactor,
769  const CompactFstOptions &opts)
770  : ImplBase(opts),
771  compactor_(std::make_shared<Compactor>(fst, compactor)) {
772  SetType(Compactor::Type());
773  SetInputSymbols(fst.InputSymbols());
774  SetOutputSymbols(fst.OutputSymbols());
775  if (compactor_->Error()) SetProperties(kError, kError);
776  uint64 copy_properties = fst.Properties(kMutable, false) ?
777  fst.Properties(kCopyProperties, true):
778  CheckProperties(fst,
781  if ((copy_properties & kError) || !compactor_->IsCompatible(fst)) {
782  FSTERROR() << "CompactFstImpl: Input Fst incompatible with compactor";
783  SetProperties(kError, kError);
784  return;
785  }
786  SetProperties(copy_properties | kStaticProperties);
787  }
788 
789  CompactFstImpl(std::shared_ptr<Compactor> compactor,
790  const CompactFstOptions &opts)
791  : ImplBase(opts),
792  compactor_(compactor) {
793  SetType(Compactor::Type());
794  SetProperties(kStaticProperties | compactor_->Properties());
795  if (compactor_->Error()) SetProperties(kError, kError);
796  }
797 
799  : ImplBase(impl),
800  compactor_(impl.compactor_ == nullptr ?
801  std::make_shared<Compactor>() :
802  std::make_shared<Compactor>(*impl.compactor_)) {
803  SetType(impl.Type());
804  SetProperties(impl.Properties());
805  SetInputSymbols(impl.InputSymbols());
806  SetOutputSymbols(impl.OutputSymbols());
807  }
808 
809  // Allows to change the cache store from OtherI to I.
810  template <class OtherCacheStore>
812  : ImplBase(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
813  compactor_(impl.compactor_ == nullptr ?
814  std::make_shared<Compactor>() :
815  std::make_shared<Compactor>(*impl.compactor_)) {
816  SetType(impl.Type());
817  SetProperties(impl.Properties());
818  SetInputSymbols(impl.InputSymbols());
819  SetOutputSymbols(impl.OutputSymbols());
820  }
821 
823  if (!HasStart()) SetStart(compactor_->Start());
824  return ImplBase::Start();
825  }
826 
828  if (HasFinal(s)) return ImplBase::Final(s);
829  compactor_->SetState(s, &state_);
830  return state_.Final();
831  }
832 
833  StateId NumStates() const {
834  if (Properties(kError)) return 0;
835  return compactor_->NumStates();
836  }
837 
838  size_t NumArcs(StateId s) {
839  if (HasArcs(s)) return ImplBase::NumArcs(s);
840  compactor_->SetState(s, &state_);
841  return state_.NumArcs();
842  }
843 
845  if (!HasArcs(s) && !Properties(kILabelSorted)) Expand(s);
846  if (HasArcs(s)) return ImplBase::NumInputEpsilons(s);
847  return CountEpsilons(s, false);
848  }
849 
851  if (!HasArcs(s) && !Properties(kOLabelSorted)) Expand(s);
852  if (HasArcs(s)) return ImplBase::NumOutputEpsilons(s);
853  return CountEpsilons(s, true);
854  }
855 
856  size_t CountEpsilons(StateId s, bool output_epsilons) {
857  compactor_->SetState(s, &state_);
858  const uint32 f = output_epsilons ? kArcOLabelValue : kArcILabelValue;
859  size_t num_eps = 0;
860  for (size_t i = 0; i < state_.NumArcs(); ++i) {
861  const auto& arc = state_.GetArc(i, f);
862  const auto label = output_epsilons ? arc.olabel : arc.ilabel;
863  if (label == 0)
864  ++num_eps;
865  else if (label > 0)
866  break;
867  }
868  return num_eps;
869  }
870 
872  std::istream &strm, const FstReadOptions &opts) {
873  std::unique_ptr<CompactFstImpl<Arc, Compactor, CacheStore>> impl(
875  FstHeader hdr;
876  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
877  return nullptr;
878  }
879  // Ensures compatibility.
880  if (hdr.Version() == kAlignedFileVersion) {
882  }
883  impl->compactor_ = std::shared_ptr<Compactor>(
884  Compactor::Read(strm, opts, hdr));
885  if (!impl->compactor_) {
886  return nullptr;
887  }
888  return impl.release();
889  }
890 
891  bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
892  FstHeader hdr;
893  hdr.SetStart(compactor_->Start());
894  hdr.SetNumStates(compactor_->NumStates());
895  hdr.SetNumArcs(compactor_->NumArcs());
896  // Ensures compatibility.
897  const auto file_version = opts.align ? kAlignedFileVersion : kFileVersion;
898  WriteHeader(strm, opts, file_version, &hdr);
899  return compactor_->Write(strm, opts);
900  }
901 
902  // Provides information needed for generic state iterator.
904  data->base = nullptr;
905  data->nstates = compactor_->NumStates();
906  }
907 
909  if (!HasArcs(s)) Expand(s);
910  ImplBase::InitArcIterator(s, data);
911  }
912 
913  void Expand(StateId s) {
914  compactor_->SetState(s, &state_);
915  for (size_t i = 0; i < state_.NumArcs(); ++i)
916  PushArc(s, state_.GetArc(i, kArcValueFlags));
917  SetArcs(s);
918  if (!HasFinal(s)) SetFinal(s, state_.Final());
919  }
920 
921  const Compactor *GetCompactor() const { return compactor_.get(); }
922  std::shared_ptr<Compactor> SharedCompactor() const { return compactor_; }
923  void SetCompactor(std::shared_ptr<Compactor> compactor) {
924  // TODO(allauzen): is this correct? is this needed?
925  // TODO(allauzen): consider removing and forcing this through direct calls
926  // to compactor.
927  compactor_ = compactor;
928  }
929 
930  // Properties always true of this FST class.
931  static constexpr uint64 kStaticProperties = kExpanded;
932 
933  protected:
934  template <class OtherArc, class OtherCompactor, class OtherCacheStore>
935  explicit CompactFstImpl(
937  : compactor_(std::make_shared<Compactor>(*impl.GetCompactor())) {
938  SetType(impl.Type());
939  SetProperties(impl.Properties());
940  SetInputSymbols(impl.InputSymbols());
941  SetOutputSymbols(impl.OutputSymbols());
942  }
943 
944  private:
945  // Allows access during write.
946  template <class AnyArc, class ArcCompactor, class Unsigned,
947  class CompactStore, class AnyCacheStore>
948  friend class ::fst::CompactFst; // allow access during write.
949 
950  // Current unaligned file format version.
951  static constexpr int kFileVersion = 2;
952  // Current aligned file format version.
953  static constexpr int kAlignedFileVersion = 1;
954  // Minimum file format version supported.
955  static constexpr int kMinFileVersion = 1;
956 
957  std::shared_ptr<Compactor> compactor_;
958  typename Compactor::State state_;
959 };
960 
961 template <class Arc, class Compactor, class CacheStore>
963 
964 template <class Arc, class Compactor, class CacheStore>
966 
967 template <class Arc, class Compactor, class CacheStore>
969 
970 template <class Arc, class Compactor, class CacheStore>
972 
973 } // namespace internal
974 
975 // This class attaches interface to implementation and handles reference
976 // counting, delegating most methods to ImplToExpandedFst. The Unsigned type
977 // is used to represent indices into the compact arc array. (Template
978 // argument defaults are declared in fst-decl.h.)
979 template <class A, class ArcCompactor, class Unsigned, class CompactStore,
980  class CacheStore>
981 class CompactFst
982  : public ImplToExpandedFst<internal::CompactFstImpl<
983  A,
984  DefaultCompactor<ArcCompactor, Unsigned, CompactStore>,
985  CacheStore>> {
986  public:
987  template <class F, class G>
988  void friend Cast(const F &, G *);
989 
990  using Arc = A;
991  using StateId = typename A::StateId;
994  using Store = CacheStore; // for CacheArcIterator
995 
996  friend class StateIterator<
997  CompactFst<A, ArcCompactor, Unsigned, CompactStore, CacheStore>>;
998  friend class ArcIterator<
999  CompactFst<A, ArcCompactor, Unsigned, CompactStore, CacheStore>>;
1000 
1001  CompactFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
1002 
1003  // If data is not nullptr, it is assumed to be already initialized.
1004  explicit CompactFst(
1005  const Fst<A> &fst,
1006  const ArcCompactor &compactor = ArcCompactor(),
1007  const CompactFstOptions &opts = CompactFstOptions(),
1008  std::shared_ptr<CompactStore> data = std::shared_ptr<CompactStore>())
1010  std::make_shared<Impl>(
1011  fst,
1012  std::make_shared<Compactor>(
1013  std::make_shared<ArcCompactor>(compactor), data),
1014  opts)) {}
1015 
1016  // If data is not nullptr, it is assumed to be already initialized.
1018  const Fst<Arc> &fst,
1019  std::shared_ptr<ArcCompactor> compactor,
1020  const CompactFstOptions &opts = CompactFstOptions(),
1021  std::shared_ptr<CompactStore> data = std::shared_ptr<CompactStore>())
1023  std::make_shared<Impl>(fst,
1024  std::make_shared<Compactor>(compactor, data),
1025  opts)) {}
1026 
1027  // The following 2 constructors take as input two iterators delimiting a set
1028  // of (already) compacted transitions, starting with the transitions out of
1029  // the initial state. The format of the input differs for fixed out-degree
1030  // and variable out-degree compactors.
1031  //
1032  // - For fixed out-degree compactors, the final weight (encoded as a
1033  // compacted transition) needs to be given only for final states. All strings
1034  // (compactor of size 1) will be assume to be terminated by a final state
1035  // even when the final state is not implicitely given.
1036  //
1037  // - For variable out-degree compactors, the final weight (encoded as a
1038  // compacted transition) needs to be given for all states and must appeared
1039  // first in the list (for state s, final weight of s, followed by outgoing
1040  // transitons in s).
1041  //
1042  // These 2 constructors allows the direct construction of a CompactFst
1043  // without first creating a more memory-hungry regular FST. This is useful
1044  // when memory usage is severely constrained.
1045  template <class Iterator>
1046  explicit CompactFst(const Iterator &begin, const Iterator &end,
1047  const ArcCompactor &compactor = ArcCompactor(),
1048  const CompactFstOptions &opts = CompactFstOptions())
1050  std::make_shared<Impl>(
1051  std::make_shared<Compactor>(
1052  begin, end, std::make_shared<ArcCompactor>(compactor)),
1053  opts)) {}
1054 
1055  template <class Iterator>
1056  CompactFst(const Iterator &begin, const Iterator &end,
1057  std::shared_ptr<ArcCompactor> compactor,
1058  const CompactFstOptions &opts = CompactFstOptions())
1060  std::make_shared<Impl>(
1061  std::make_shared<Compactor>(begin, end, compactor), opts)) {}
1062 
1063  // See Fst<>::Copy() for doc.
1066  &fst,
1067  bool safe = false)
1068  : ImplToExpandedFst<Impl>(fst, safe) {}
1069 
1070  // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
1072  bool safe = false) const override {
1074  *this, safe);
1075  }
1076 
1077  // Read a CompactFst from an input stream; return nullptr on error
1079  std::istream &strm, const FstReadOptions &opts) {
1080  auto *impl = Impl::Read(strm, opts);
1081  return impl ? new CompactFst<A, ArcCompactor, Unsigned, CompactStore,
1082  CacheStore>(std::shared_ptr<Impl>(impl))
1083  : nullptr;
1084  }
1085 
1086  // Read a CompactFst from a file; return nullptr on error
1087  // Empty filename reads from standard input
1089  const string &filename) {
1090  auto *impl = ImplToExpandedFst<Impl>::Read(filename);
1091  return impl ? new CompactFst<A, ArcCompactor, Unsigned, CompactStore,
1092  CacheStore>(std::shared_ptr<Impl>(impl))
1093  : nullptr;
1094  }
1095 
1096  bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
1097  return GetImpl()->Write(strm, opts);
1098  }
1099 
1100  bool Write(const string &filename) const override {
1101  return Fst<Arc>::WriteFile(filename);
1102  }
1103 
1104  template <class FST>
1105  static bool WriteFst(const FST &fst, const ArcCompactor &compactor,
1106  std::ostream &strm, const FstWriteOptions &opts);
1107 
1108  void InitStateIterator(StateIteratorData<Arc> *data) const override {
1109  GetImpl()->InitStateIterator(data);
1110  }
1111 
1112  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
1113  GetMutableImpl()->InitArcIterator(s, data);
1114  }
1115 
1116  MatcherBase<Arc> *InitMatcher(MatchType match_type) const override {
1117  return new SortedMatcher<
1119  *this, match_type);
1120  }
1121 
1122  template <class Iterator>
1123  void SetCompactElements(const Iterator &b, const Iterator &e) {
1124  GetMutableImpl()->SetCompactor(std::make_shared<Compactor>(
1125  b, e, std::make_shared<ArcCompactor>()));
1126  }
1127 
1128  private:
1129  using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
1130  using ImplToFst<Impl, ExpandedFst<Arc>>::GetMutableImpl;
1131 
1132  explicit CompactFst(std::shared_ptr<Impl> impl)
1133  : ImplToExpandedFst<Impl>(impl) {}
1134 
1135  // Use overloading to extract the type of the argument.
1136  static Impl *GetImplIfCompactFst(
1138  &compact_fst) {
1139  return compact_fst.GetImpl();
1140  }
1141 
1142  // This does not give privileged treatment to subclasses of CompactFst.
1143  template <typename NonCompactFst>
1144  static Impl *GetImplIfCompactFst(const NonCompactFst &fst) {
1145  return nullptr;
1146  }
1147 
1148  CompactFst &operator=(const CompactFst &fst) = delete;
1149 };
1150 
1151 // Writes FST in Compact format, with a possible pass over the machine before
1152 // writing to compute the number of states and arcs.
1153 template <class A, class ArcCompactor, class Unsigned, class CompactStore,
1154  class CacheStore>
1155 template <class FST>
1157  const FST &fst, const ArcCompactor &compactor, std::ostream &strm,
1158  const FstWriteOptions &opts) {
1159  using Arc = A;
1160  using Weight = typename A::Weight;
1161  using Element = typename ArcCompactor::Element;
1162  const auto file_version =
1163  opts.align ? Impl::kAlignedFileVersion : Impl::kFileVersion;
1164  size_t num_arcs = -1;
1165  size_t num_states = -1;
1166  auto first_pass_compactor = compactor;
1167  if (auto *impl = GetImplIfCompactFst(fst)) {
1168  num_arcs = impl->GetCompactor()->GetCompactStore()->NumArcs();
1169  num_states = impl->GetCompactor()->GetCompactStore()->NumStates();
1170  first_pass_compactor = *impl->GetCompactor()->GetArcCompactor();
1171  } else {
1172  // A first pass is needed to compute the state of the compactor, which
1173  // is saved ahead of the rest of the data structures. This unfortunately
1174  // means forcing a complete double compaction when writing in this format.
1175  // TODO(allauzen): eliminate mutable state from compactors.
1176  num_arcs = 0;
1177  num_states = 0;
1178  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
1179  const auto s = siter.Value();
1180  ++num_states;
1181  if (fst.Final(s) != Weight::Zero()) {
1182  first_pass_compactor.Compact(
1183  s, Arc(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1184  }
1185  for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1186  ++num_arcs;
1187  first_pass_compactor.Compact(s, aiter.Value());
1188  }
1189  }
1190  }
1191  FstHeader hdr;
1192  hdr.SetStart(fst.Start());
1193  hdr.SetNumStates(num_states);
1194  hdr.SetNumArcs(num_arcs);
1195  string type = "compact";
1196  if (sizeof(Unsigned) != sizeof(uint32)) {
1197  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
1198  }
1199  type += "_";
1200  type += ArcCompactor::Type();
1201  if (CompactStore::Type() != "compact") {
1202  type += "_";
1203  type += CompactStore::Type();
1204  }
1205  const auto copy_properties = fst.Properties(kCopyProperties, true);
1206  if ((copy_properties & kError) || !compactor.Compatible(fst)) {
1207  FSTERROR() << "Fst incompatible with compactor";
1208  return false;
1209  }
1210  uint64 properties = copy_properties | Impl::kStaticProperties;
1211  internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
1212  properties, &hdr);
1213  first_pass_compactor.Write(strm);
1214  if (first_pass_compactor.Size() == -1) {
1215  if (opts.align && !AlignOutput(strm)) {
1216  LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
1217  return false;
1218  }
1219  Unsigned compacts = 0;
1220  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
1221  const auto s = siter.Value();
1222  strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1223  if (fst.Final(s) != Weight::Zero()) {
1224  ++compacts;
1225  }
1226  compacts += fst.NumArcs(s);
1227  }
1228  strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1229  }
1230  if (opts.align && !AlignOutput(strm)) {
1231  LOG(ERROR) << "Could not align file during write after writing states";
1232  }
1233  const auto &second_pass_compactor = compactor;
1234  Element element;
1235  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
1236  const auto s = siter.Value();
1237  if (fst.Final(s) != Weight::Zero()) {
1238  element = second_pass_compactor.Compact(
1239  s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1240  strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1241  }
1242  for (ArcIterator<FST> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1243  element = second_pass_compactor.Compact(s, aiter.Value());
1244  strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1245  }
1246  }
1247  strm.flush();
1248  if (!strm) {
1249  LOG(ERROR) << "CompactFst write failed: " << opts.source;
1250  return false;
1251  }
1252  return true;
1253 }
1254 
1255 // Specialization for CompactFst; see generic version in fst.h for sample
1256 // usage (but use the CompactFst type!). This version should inline.
1257 template <class Arc, class ArcCompactor, class Unsigned, class CompactStore,
1258  class CacheStore>
1260  CompactFst<Arc, ArcCompactor, Unsigned, CompactStore, CacheStore>> {
1261  public:
1262  using StateId = typename Arc::StateId;
1263 
1264  explicit StateIterator(
1265  const CompactFst<Arc, ArcCompactor, Unsigned, CompactStore,
1266  CacheStore> &fst)
1267  : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
1268 
1269  bool Done() const { return s_ >= nstates_; }
1270 
1271  StateId Value() const { return s_; }
1272 
1273  void Next() { ++s_; }
1274 
1275  void Reset() { s_ = 0; }
1276 
1277  private:
1278  StateId nstates_;
1279  StateId s_;
1280 };
1281 
1282 // Specialization for CompactFst. Never caches,
1283 // always iterates over the underlying compact elements.
1284 template <class Arc, class ArcCompactor, class Unsigned,
1285  class CompactStore, class CacheStore>
1287  Arc, ArcCompactor, Unsigned, CompactStore, CacheStore>> {
1288  public:
1289  using StateId = typename Arc::StateId;
1290  using Element = typename ArcCompactor::Element;
1292  using State = typename Compactor::State;
1293 
1294  ArcIterator(const CompactFst<Arc, ArcCompactor, Unsigned, CompactStore,
1295  CacheStore> &fst,
1296  StateId s)
1297  : state_(fst.GetImpl()->GetCompactor(), s),
1298  pos_(0),
1299  flags_(kArcValueFlags) {}
1300 
1301  bool Done() const { return pos_ >= state_.NumArcs(); }
1302 
1303  const Arc &Value() const {
1304  arc_ = state_.GetArc(pos_, flags_);
1305  return arc_;
1306  }
1307 
1308  void Next() { ++pos_; }
1309 
1310  size_t Position() const { return pos_; }
1311 
1312  void Reset() { pos_ = 0; }
1313 
1314  void Seek(size_t pos) { pos_ = pos; }
1315 
1316  uint32 Flags() const { return flags_; }
1317 
1318  void SetFlags(uint32 f, uint32 m) {
1319  flags_ &= ~m;
1320  flags_ |= (f & kArcValueFlags);
1321  }
1322 
1323  private:
1324  State state_;
1325  size_t pos_;
1326  mutable Arc arc_;
1327  uint32 flags_;
1328 };
1329 
1330 // ArcCompactor for unweighted string FSTs.
1331 template <class A>
1333  public:
1334  using Arc = A;
1335  using Label = typename Arc::Label;
1336  using StateId = typename Arc::StateId;
1337  using Weight = typename Arc::Weight;
1338 
1339  using Element = Label;
1340 
1341  Element Compact(StateId s, const Arc &arc) const { return arc.ilabel; }
1342 
1343  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1344  return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
1345  }
1346 
1347  constexpr ssize_t Size() const { return 1; }
1348 
1349  constexpr uint64 Properties() const {
1350  return kString | kAcceptor | kUnweighted;
1351  }
1352 
1353  bool Compatible(const Fst<Arc> &fst) const {
1354  const auto props = Properties();
1355  return fst.Properties(props, true) == props;
1356  }
1357 
1358  static const string &Type() {
1359  static const string *const type = new string("string");
1360  return *type;
1361  }
1362 
1363  bool Write(std::ostream &strm) const { return true; }
1364 
1365  static StringCompactor *Read(std::istream &strm) {
1366  return new StringCompactor;
1367  }
1368 };
1369 
1370 // ArcCompactor for weighted string FSTs.
1371 template <class A>
1373  public:
1374  using Arc = A;
1375  using Label = typename Arc::Label;
1376  using StateId = typename Arc::StateId;
1377  using Weight = typename Arc::Weight;
1378 
1379  using Element = std::pair<Label, Weight>;
1380 
1381  Element Compact(StateId s, const Arc &arc) const {
1382  return std::make_pair(arc.ilabel, arc.weight);
1383  }
1384 
1385  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1386  return Arc(p.first, p.first, p.second,
1387  p.first != kNoLabel ? s + 1 : kNoStateId);
1388  }
1389 
1390  constexpr ssize_t Size() const { return 1; }
1391 
1392  constexpr uint64 Properties() const { return kString | kAcceptor; }
1393 
1394  bool Compatible(const Fst<Arc> &fst) const {
1395  const auto props = Properties();
1396  return fst.Properties(props, true) == props;
1397  }
1398 
1399  static const string &Type() {
1400  static const string *const type = new string("weighted_string");
1401  return *type;
1402  }
1403 
1404  bool Write(std::ostream &strm) const { return true; }
1405 
1406  static WeightedStringCompactor *Read(std::istream &strm) {
1407  return new WeightedStringCompactor;
1408  }
1409 };
1410 
1411 // ArcCompactor for unweighted acceptor FSTs.
1412 template <class A>
1414  public:
1415  using Arc = A;
1416  using Label = typename Arc::Label;
1417  using StateId = typename Arc::StateId;
1418  using Weight = typename Arc::Weight;
1419 
1420  using Element = std::pair<Label, StateId>;
1421 
1422  Element Compact(StateId s, const Arc &arc) const {
1423  return std::make_pair(arc.ilabel, arc.nextstate);
1424  }
1425 
1426  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1427  return Arc(p.first, p.first, Weight::One(), p.second);
1428  }
1429 
1430  constexpr ssize_t Size() const { return -1; }
1431 
1432  constexpr uint64 Properties() const { return kAcceptor | kUnweighted; }
1433 
1434  bool Compatible(const Fst<Arc> &fst) const {
1435  const auto props = Properties();
1436  return fst.Properties(props, true) == props;
1437  }
1438 
1439  static const string &Type() {
1440  static const string *const type = new string("unweighted_acceptor");
1441  return *type;
1442  }
1443 
1444  bool Write(std::ostream &strm) const { return true; }
1445 
1446  static UnweightedAcceptorCompactor *Read(std::istream &istrm) {
1447  return new UnweightedAcceptorCompactor;
1448  }
1449 };
1450 
1451 // ArcCompactor for weighted acceptor FSTs.
1452 template <class A>
1454  public:
1455  using Arc = A;
1456  using Label = typename Arc::Label;
1457  using StateId = typename Arc::StateId;
1458  using Weight = typename Arc::Weight;
1459 
1460  using Element = std::pair<std::pair<Label, Weight>, StateId>;
1461 
1462  Element Compact(StateId s, const Arc &arc) const {
1463  return std::make_pair(std::make_pair(arc.ilabel, arc.weight),
1464  arc.nextstate);
1465  }
1466 
1467  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1468  return Arc(p.first.first, p.first.first, p.first.second, p.second);
1469  }
1470 
1471  constexpr ssize_t Size() const { return -1; }
1472 
1473  constexpr uint64 Properties() const { return kAcceptor; }
1474 
1475  bool Compatible(const Fst<Arc> &fst) const {
1476  const auto props = Properties();
1477  return fst.Properties(props, true) == props;
1478  }
1479 
1480  static const string &Type() {
1481  static const string *const type = new string("acceptor");
1482  return *type;
1483  }
1484 
1485  bool Write(std::ostream &strm) const { return true; }
1486 
1487  static AcceptorCompactor *Read(std::istream &strm) {
1488  return new AcceptorCompactor;
1489  }
1490 };
1491 
1492 // ArcCompactor for unweighted FSTs.
1493 template <class A>
1495  public:
1496  using Arc = A;
1497  using Label = typename Arc::Label;
1498  using StateId = typename Arc::StateId;
1499  using Weight = typename Arc::Weight;
1500 
1501  using Element = std::pair<std::pair<Label, Label>, StateId>;
1502 
1503  Element Compact(StateId s, const Arc &arc) const {
1504  return std::make_pair(std::make_pair(arc.ilabel, arc.olabel),
1505  arc.nextstate);
1506  }
1507 
1508  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1509  return Arc(p.first.first, p.first.second, Weight::One(), p.second);
1510  }
1511 
1512  constexpr ssize_t Size() const { return -1; }
1513 
1514  constexpr uint64 Properties() const { return kUnweighted; }
1515 
1516  bool Compatible(const Fst<Arc> &fst) const {
1517  const auto props = Properties();
1518  return fst.Properties(props, true) == props;
1519  }
1520 
1521  static const string &Type() {
1522  static const string *const type = new string("unweighted");
1523  return *type;
1524  }
1525 
1526  bool Write(std::ostream &strm) const { return true; }
1527 
1528  static UnweightedCompactor *Read(std::istream &strm) {
1529  return new UnweightedCompactor;
1530  }
1531 };
1532 
1533 template <class Arc, class Unsigned /* = uint32 */>
1535 
1536 template <class Arc, class Unsigned /* = uint32 */>
1539 
1540 template <class Arc, class Unsigned /* = uint32 */>
1542 
1543 template <class Arc, class Unsigned /* = uint32 */>
1544 using CompactUnweightedFst =
1546 
1547 template <class Arc, class Unsigned /* = uint32 */>
1550 
1552 
1554 
1556 
1558 
1561 
1562 } // namespace fst
1563 
1564 #endif // FST_COMPACT_FST_H_
int32 Version() const
Definition: fst.h:123
static Impl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: expanded-fst.h:130
constexpr uint64 Properties() const
Definition: compact-fst.h:1349
ssize_t NumOutputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:99
CompactFst(const Fst< Arc > &fst, std::shared_ptr< ArcCompactor > compactor, const CompactFstOptions &opts=CompactFstOptions(), std::shared_ptr< CompactStore > data=std::shared_ptr< CompactStore >())
Definition: compact-fst.h:1017
CompactFstImpl(const Fst< Arc > &fst, std::shared_ptr< Compactor > compactor, const CompactFstOptions &opts)
Definition: compact-fst.h:768
CompactFst< A, ArcCompactor, Unsigned, CompactStore, CacheStore > * Copy(bool safe=false) const override
Definition: compact-fst.h:1071
CompactFstImpl(const CompactFstImpl< Arc, Compactor, CacheStore > &impl)
Definition: compact-fst.h:798
std::shared_ptr< ArcCompactor > SharedArcCompactor() const
Definition: compact-fst.h:568
Weight Final() const
Definition: compact-fst.h:641
constexpr ssize_t Size() const
Definition: compact-fst.h:1347
void Set(const DefaultCompactor< C, U, CompactStore > *compactor, StateId s)
Definition: compact-fst.h:676
DefaultCompactor(const Fst< Arc > &fst, std::shared_ptr< ArcCompactor > arc_compactor)
Definition: compact-fst.h:483
std::pair< std::pair< Label, Label >, StateId > Element
Definition: compact-fst.h:1501
string source
Definition: fst.h:84
typename Arc::Weight Weight
Definition: compact-fst.h:1499
constexpr int kNoLabel
Definition: fst.h:179
static UnweightedCompactor * Read(std::istream &strm)
Definition: compact-fst.h:1528
StateId GetStateId() const
Definition: compact-fst.h:639
DefaultCompactState(const DefaultCompactor< C, U, S > *compactor, StateId s)
Definition: compact-fst.h:610
void Cast(const F &, G *)
bool Compatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:1434
uint64_t uint64
Definition: types.h:32
const Compactor * GetCompactor() const
Definition: compact-fst.h:921
std::shared_ptr< CompactStore > SharedCompactStore() const
Definition: compact-fst.h:572
Unsigned States(ssize_t i) const
Definition: compact-fst.h:207
typename Arc::Label Label
Definition: compact-fst.h:1497
CompactFstOptions(const CacheOptions &opts)
Definition: compact-fst.h:35
virtual size_t NumArcs(StateId) const =0
Element Compact(StateId s, const Arc &arc) const
Definition: compact-fst.h:1503
CompactFstImpl(std::shared_ptr< Compactor > compactor, const CompactFstOptions &opts)
Definition: compact-fst.h:789
void SetNumArcs(int64 numarcs)
Definition: fst.h:149
bool HasFixedOutdegree() const
Definition: compact-fst.h:548
int64 NumStates() const
Definition: fst.h:131
bool Write(std::ostream &strm) const
Definition: compact-fst.h:1526
Element Compact(StateId s, const Arc &arc) const
Definition: compact-fst.h:1341
bool Write(std::ostream &strm) const
Definition: compact-fst.h:1404
constexpr uint64 kUnweightedCycles
Definition: properties.h:126
static DefaultCompactor< C, U, S > * Read(std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr)
Definition: compact-fst.h:526
DefaultCompactor(const Fst< Arc > &fst, std::shared_ptr< DefaultCompactor< C, U, S >> compactor)
Definition: compact-fst.h:488
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:82
const SymbolTable * OutputSymbols() const
Definition: fst.h:690
bool Write(std::ostream &strm) const
Definition: compact-fst.h:1363
MatchType
Definition: fst.h:171
typename Arc::Weight Weight
Definition: fst.h:190
int64 Start() const
Definition: fst.h:129
constexpr uint64 kILabelSorted
Definition: properties.h:75
std::shared_ptr< Compactor > SharedCompactor() const
Definition: compact-fst.h:922
static const string & Type()
Definition: compact-fst.h:457
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: compact-fst.h:908
Arc GetArc(size_t i, uint32 f) const
Definition: compact-fst.h:648
#define LOG(type)
Definition: log.h:48
virtual Weight Final(StateId) const =0
SetType
Definition: set-weight.h:37
typename Arc::StateId StateId
Definition: compact-fst.h:1498
constexpr uint64 Properties() const
Definition: compact-fst.h:1514
static AcceptorCompactor * Read(std::istream &strm)
Definition: compact-fst.h:1487
constexpr uint64 kWeightedCycles
Definition: properties.h:123
static WeightedStringCompactor * Read(std::istream &strm)
Definition: compact-fst.h:1406
Arc Expand(StateId s, const Element &p, uint32 f=kArcValueFlags) const
Definition: compact-fst.h:1426
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:88
CompactFst(const Fst< A > &fst, const ArcCompactor &compactor=ArcCompactor(), const CompactFstOptions &opts=CompactFstOptions(), std::shared_ptr< CompactStore > data=std::shared_ptr< CompactStore >())
Definition: compact-fst.h:1004
CompactFst(const Iterator &begin, const Iterator &end, std::shared_ptr< ArcCompactor > compactor, const CompactFstOptions &opts=CompactFstOptions())
Definition: compact-fst.h:1056
static DefaultCompactStore< Element, Unsigned > * Read(std::istream &strm, const FstReadOptions &opts, const FstHeader &hdr, const Compactor &compactor)
Definition: compact-fst.h:384
CacheStore Store
Definition: compact-fst.h:994
static const string & Type()
Definition: compact-fst.h:1358
constexpr uint64 kCopyProperties
Definition: properties.h:138
typename Arc::Weight Weight
Definition: compact-fst.h:741
CompactFstImpl(const CompactFstImpl< OtherArc, OtherCompactor, OtherCacheStore > &impl)
Definition: compact-fst.h:935
constexpr int kNoStateId
Definition: fst.h:180
size_t NumStates() const
Definition: compact-fst.h:211
constexpr uint64 kExpanded
Definition: properties.h:27
typename Arc::Weight Weight
Definition: compact-fst.h:1458
virtual uint64 Properties(uint64 mask, bool test) const =0
std::pair< Label, StateId > Element
Definition: compact-fst.h:1420
bool IsCompatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:542
ArcIterator(const CompactFst< Arc, ArcCompactor, Unsigned, CompactStore, CacheStore > &fst, StateId s)
Definition: compact-fst.h:1294
size_t NumOutputEpsilons(StateId s)
Definition: compact-fst.h:850
#define FSTERROR()
Definition: util.h:35
typename C::Arc Arc
Definition: compact-fst.h:473
static UnweightedAcceptorCompactor * Read(std::istream &istrm)
Definition: compact-fst.h:1446
Arc Expand(StateId s, const Element &p, uint32 f=kArcValueFlags) const
Definition: compact-fst.h:1508
size_t NumArcs() const
Definition: compact-fst.h:215
constexpr uint64 kUnweighted
Definition: properties.h:87
bool WriteFile(const string &filename) const
Definition: fst.h:303
typename C::Arc Arc
Definition: compact-fst.h:604
StateIteratorBase< Arc > * base
Definition: fst.h:351
bool Write(const string &filename) const override
Definition: compact-fst.h:1100
ssize_t NumInputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:93
std::pair< Label, Weight > Element
Definition: compact-fst.h:1379
DefaultCompactor(const Iterator &b, const Iterator &e, std::shared_ptr< C > arc_compactor)
Definition: compact-fst.h:503
static const string & Type()
Definition: compact-fst.h:1480
bool AlignOutput(std::ostream &strm)
Definition: util.cc:76
constexpr uint64 Properties() const
Definition: compact-fst.h:1392
typename Arc::Label Label
Definition: compact-fst.h:1375
bool Compatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:1394
StateIterator(const CompactFst< Arc, ArcCompactor, Unsigned, CompactStore, CacheStore > &fst)
Definition: compact-fst.h:1264
uint64 CheckProperties(const Fst< Arc > &fst, uint64 check_mask, uint64 test_mask)
typename Arc::Weight Weight
Definition: compact-fst.h:1337
typename Arc::Weight Weight
Definition: compact-fst.h:475
CompactStore * GetCompactStore() const
Definition: compact-fst.h:566
constexpr uint64 Properties() const
Definition: compact-fst.h:1473
CompactFst(const CompactFst< A, ArcCompactor, Unsigned, CompactStore, CacheStore > &fst, bool safe=false)
Definition: compact-fst.h:1064
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override
Definition: compact-fst.h:1096
int32 GetFlags() const
Definition: fst.h:125
constexpr uint64 kOLabelSorted
Definition: properties.h:80
bool Compatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:1475
typename Arc::StateId StateId
Definition: compact-fst.h:605
void SetNumStates(int64 numstates)
Definition: fst.h:147
StateId nstates
Definition: fst.h:353
constexpr ssize_t Size() const
Definition: compact-fst.h:1390
bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: compact-fst.h:431
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, const string &type, uint64 properties, FstHeader *hdr)
Definition: fst.h:740
typename Arc::StateId StateId
Definition: compact-fst.h:474
StateId Start() const
Definition: compact-fst.h:518
virtual StateId Start() const =0
Arc ComputeArc(StateId s, Unsigned i, uint32 f) const
Definition: compact-fst.h:577
typename Arc::StateId StateId
Definition: compact-fst.h:742
bool Done() const
Definition: fst.h:499
constexpr ssize_t Size() const
Definition: compact-fst.h:1471
static StringCompactor * Read(std::istream &strm)
Definition: compact-fst.h:1365
static MappedFile * Map(std::istream *istrm, bool memorymap, const string &source, size_t size)
Definition: mapped-file.cc:38
typename Arc::Weight Weight
Definition: compact-fst.h:606
typename Arc::Weight Weight
Definition: compact-fst.h:1377
string source
Definition: fst.h:57
uint64 Properties() const
Definition: compact-fst.h:540
size_t NumArcs() const
Definition: compact-fst.h:520
ssize_t Start() const
Definition: compact-fst.h:217
constexpr uint64 kAcceptor
Definition: properties.h:45
MatcherBase< Arc > * InitMatcher(MatchType match_type) const override
Definition: compact-fst.h:1116
typename Arc::Label Label
Definition: compact-fst.h:1456
size_t NumInputEpsilons(StateId s)
Definition: compact-fst.h:844
bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: compact-fst.h:891
Element Compact(StateId s, const Arc &arc) const
Definition: compact-fst.h:1381
DefaultCompactor(const DefaultCompactor< C, U, S > &compactor)
Definition: compact-fst.h:509
void Set(const DefaultCompactor< C, U, S > *compactor, StateId s)
Definition: compact-fst.h:624
typename Arc::Weight Weight
Definition: compact-fst.h:1418
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: compact-fst.h:1112
DefaultCompactor(const DefaultCompactor< OtherC, U, S > &compactor)
Definition: compact-fst.h:514
static const string & Type()
Definition: compact-fst.h:1521
CompactFstImpl(const CompactFstImpl< Arc, Compactor, OtherCacheStore > &impl)
Definition: compact-fst.h:811
std::pair< std::pair< Label, Weight >, StateId > Element
Definition: compact-fst.h:1460
constexpr uint64 Properties() const
Definition: compact-fst.h:1432
void SetFlags(int32 flags)
Definition: fst.h:141
constexpr uint64 kString
Definition: properties.h:117
uint32_t uint32
Definition: types.h:31
void SetStart(int64 start)
Definition: fst.h:145
DefaultCompactor(std::shared_ptr< ArcCompactor > arc_compactor, std::shared_ptr< CompactStore > compact_store)
Definition: compact-fst.h:496
size_t NumCompacts() const
Definition: compact-fst.h:213
virtual const SymbolTable * InputSymbols() const =0
const ArcCompactor * GetArcCompactor() const
Definition: compact-fst.h:565
const SymbolTable * InputSymbols() const
Definition: fst.h:688
static CompactFst< A, ArcCompactor, Unsigned, CompactStore, CacheStore > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: compact-fst.h:1078
static const string & Type()
Definition: compact-fst.h:1399
bool Done() const
Definition: fst.h:383
Element Compact(StateId s, const Arc &arc) const
Definition: compact-fst.h:1422
constexpr uint64 kError
Definition: properties.h:33
bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: compact-fst.h:536
void SetCompactor(std::shared_ptr< Compactor > compactor)
Definition: compact-fst.h:923
typename Arc::StateId StateId
Definition: compact-fst.h:1457
DefaultCompactState(const DefaultCompactor< C, U, CompactStore > *compactor, StateId s)
Definition: compact-fst.h:670
FileReadMode mode
Definition: fst.h:64
constexpr uint64 kNullProperties
Definition: properties.h:131
static const string & Type()
Definition: compact-fst.h:1439
typename Arc::StateId StateId
Definition: compact-fst.h:1336
size_t NumArcs(StateId s)
Definition: compact-fst.h:838
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: compact-fst.h:1108
void Expand(const Fst< Arc > &ifst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &parens, const std::vector< typename Arc::Label > &assignments, MutableFst< Arc > *ofst, const MPdtExpandOptions &opts)
Definition: expand.h:302
Arc Expand(StateId s, const Element &p, uint32 f=kArcValueFlags) const
Definition: compact-fst.h:1467
void InitStateIterator(StateIteratorData< Arc > *data) const
Definition: compact-fst.h:903
typename A::StateId StateId
Definition: compact-fst.h:991
void SetState(StateId s, State *state) const
Definition: compact-fst.h:522
static bool WriteFst(const FST &fst, const ArcCompactor &compactor, std::ostream &strm, const FstWriteOptions &opts)
Definition: compact-fst.h:1156
typename C::Element Element
Definition: compact-fst.h:472
static CompactFst< A, ArcCompactor, Unsigned, CompactStore, CacheStore > * Read(const string &filename)
Definition: compact-fst.h:1088
bool Write(std::ostream &strm) const
Definition: compact-fst.h:1485
typename Arc::StateId StateId
Definition: compact-fst.h:1417
size_t NumArcs() const
Definition: compact-fst.h:646
Element Compact(StateId s, const Arc &arc) const
Definition: compact-fst.h:1462
Arc Expand(StateId s, const Element &p, uint32 f=kArcValueFlags) const
Definition: compact-fst.h:1343
constexpr ssize_t Size() const
Definition: compact-fst.h:1512
bool Compatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:1516
constexpr uint64 kMutable
Definition: properties.h:30
constexpr ssize_t Size() const
Definition: compact-fst.h:1430
StateId NumStates() const
Definition: compact-fst.h:833
StateId NumStates() const
Definition: compact-fst.h:519
typename Arc::Label Label
Definition: compact-fst.h:1335
static const string & Type()
Definition: compact-fst.h:550
CompactFst(const Iterator &begin, const Iterator &end, const ArcCompactor &compactor=ArcCompactor(), const CompactFstOptions &opts=CompactFstOptions())
Definition: compact-fst.h:1046
static CompactFstImpl< Arc, Compactor, CacheStore > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: compact-fst.h:871
bool Write(std::ostream &strm) const
Definition: compact-fst.h:1444
bool Error() const
Definition: compact-fst.h:546
int64 NumArcs() const
Definition: fst.h:133
size_t CountEpsilons(StateId s, bool output_epsilons)
Definition: compact-fst.h:856
const Element & Compacts(size_t i) const
Definition: compact-fst.h:209
bool Compatible(const Fst< Arc > &fst) const
Definition: compact-fst.h:1353
void SetCompactElements(const Iterator &b, const Iterator &e)
Definition: compact-fst.h:1123
bool AlignInput(std::istream &strm)
Definition: util.cc:60
Arc Expand(StateId s, const Element &p, uint32 f=kArcValueFlags) const
Definition: compact-fst.h:1385
typename Arc::StateId StateId
Definition: compact-fst.h:1376
virtual const SymbolTable * OutputSymbols() const =0