FST  openfst-1.7.2
OpenFst Library
const-fst.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Simple concrete immutable FST whose states and arcs are each stored in
5 // single arrays.
6 
7 #ifndef FST_CONST_FST_H_
8 #define FST_CONST_FST_H_
9 
10 #include <climits>
11 #include <string>
12 #include <vector>
13 
14 // Google-only...
15 // ...Google-only
16 #include <fst/log.h>
17 
18 #include <fst/expanded-fst.h>
19 #include <fst/fst-decl.h>
20 #include <fst/mapped-file.h>
21 #include <fst/test-properties.h>
22 #include <fst/util.h>
23 
24 
25 namespace fst {
26 
27 template <class A, class Unsigned>
28 class ConstFst;
29 
30 template <class F, class G>
31 void Cast(const F &, G *);
32 
33 namespace internal {
34 
35 // States and arcs each implemented by single arrays, templated on the
36 // Arc definition. Unsigned is used to represent indices into the arc array.
37 template <class A, class Unsigned>
38 class ConstFstImpl : public FstImpl<A> {
39  public:
40  using Arc = A;
41  using StateId = typename Arc::StateId;
42  using Weight = typename Arc::Weight;
43 
46  using FstImpl<A>::SetType;
49 
51  : states_(nullptr),
52  arcs_(nullptr),
53  narcs_(0),
54  nstates_(0),
55  start_(kNoStateId) {
56  string type = "const";
57  if (sizeof(Unsigned) != sizeof(uint32)) {
58  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
59  }
60  SetType(type);
61  SetProperties(kNullProperties | kStaticProperties);
62  }
63 
64  explicit ConstFstImpl(const Fst<Arc> &fst);
65 
66  StateId Start() const { return start_; }
67 
68  Weight Final(StateId s) const { return states_[s].weight; }
69 
70  StateId NumStates() const { return nstates_; }
71 
72  size_t NumArcs(StateId s) const { return states_[s].narcs; }
73 
74  size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
75 
76  size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
77 
78  static ConstFstImpl<Arc, Unsigned> *Read(std::istream &strm,
79  const FstReadOptions &opts);
80 
81  const Arc *Arcs(StateId s) const { return arcs_ + states_[s].pos; }
82 
83  // Provide information needed for generic state iterator.
85  data->base = nullptr;
86  data->nstates = nstates_;
87  }
88 
89  // Provide information needed for the generic arc iterator.
91  data->base = nullptr;
92  data->arcs = arcs_ + states_[s].pos;
93  data->narcs = states_[s].narcs;
94  data->ref_count = nullptr;
95  }
96 
97  private:
98  // Used to find narcs_ and nstates_ in Write.
99  friend class ConstFst<Arc, Unsigned>;
100 
101  // States implemented by array *states_ below, arcs by (single) *arcs_.
102  struct ConstState {
103  Weight weight; // Final weight.
104  Unsigned pos; // Start of state's arcs in *arcs_.
105  Unsigned narcs; // Number of arcs (per state).
106  Unsigned niepsilons; // Number of input epsilons.
107  Unsigned noepsilons; // Number of output epsilons.
108 
109  ConstState() : weight(Weight::Zero()) {}
110  };
111 
112  // Properties always true of this FST class.
113  static constexpr uint64 kStaticProperties = kExpanded;
114  // Current unaligned file format version. The unaligned version was added and
115  // made the default since the aligned version does not work on pipes.
116  static constexpr int kFileVersion = 2;
117  // Current aligned file format version.
118  static constexpr int kAlignedFileVersion = 1;
119  // Minimum file format version supported.
120  static constexpr int kMinFileVersion = 1;
121 
122  std::unique_ptr<MappedFile> states_region_; // Mapped file for states.
123  std::unique_ptr<MappedFile> arcs_region_; // Mapped file for arcs.
124  ConstState *states_; // States representation.
125  Arc *arcs_; // Arcs representation.
126  size_t narcs_; // Number of arcs.
127  StateId nstates_; // Number of states.
128  StateId start_; // Initial state.
129 
130  ConstFstImpl(const ConstFstImpl &) = delete;
131  ConstFstImpl &operator=(const ConstFstImpl &) = delete;
132 };
133 
134 template <class Arc, class Unsigned>
136 
137 template <class Arc, class Unsigned>
139 
140 template <class Arc, class Unsigned>
142 
143 template <class Arc, class Unsigned>
145 
146 template <class Arc, class Unsigned>
148  : narcs_(0), nstates_(0) {
149  string type = "const";
150  if (sizeof(Unsigned) != sizeof(uint32)) {
151  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
152  }
153  SetType(type);
156  start_ = fst.Start();
157  // Counts states and arcs.
158  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
159  ++nstates_;
160  narcs_ += fst.NumArcs(siter.Value());
161  }
162  states_region_.reset(MappedFile::Allocate(nstates_ * sizeof(*states_)));
163  arcs_region_.reset(MappedFile::Allocate(narcs_ * sizeof(*arcs_)));
164  states_ = reinterpret_cast<ConstState *>(states_region_->mutable_data());
165  arcs_ = reinterpret_cast<Arc *>(arcs_region_->mutable_data());
166  size_t pos = 0;
167  for (StateId s = 0; s < nstates_; ++s) {
168  states_[s].weight = fst.Final(s);
169  states_[s].pos = pos;
170  states_[s].narcs = 0;
171  states_[s].niepsilons = 0;
172  states_[s].noepsilons = 0;
173  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
174  const auto &arc = aiter.Value();
175  ++states_[s].narcs;
176  if (arc.ilabel == 0) ++states_[s].niepsilons;
177  if (arc.olabel == 0) ++states_[s].noepsilons;
178  arcs_[pos] = arc;
179  ++pos;
180  }
181  }
182  const auto props =
183  fst.Properties(kMutable, false)
184  ? fst.Properties(kCopyProperties, true)
185  : CheckProperties(
188  SetProperties(props | kStaticProperties);
189 }
190 
191 template <class Arc, class Unsigned>
193  std::istream &strm, const FstReadOptions &opts) {
194  using ConstState = typename ConstFstImpl<Arc, Unsigned>::ConstState;
195  std::unique_ptr<ConstFstImpl<Arc, Unsigned>> impl(
197  FstHeader hdr;
198  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
199  impl->start_ = hdr.Start();
200  impl->nstates_ = hdr.NumStates();
201  impl->narcs_ = hdr.NumArcs();
202  // Ensures compatibility.
203  if (hdr.Version() == kAlignedFileVersion) {
205  }
206  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
207  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
208  return nullptr;
209  }
210  size_t b = impl->nstates_ * sizeof(ConstState);
211  impl->states_region_.reset(
212  MappedFile::Map(&strm, opts.mode == FstReadOptions::MAP, opts.source, b));
213  if (!strm || !impl->states_region_) {
214  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
215  return nullptr;
216  }
217  impl->states_ =
218  reinterpret_cast<ConstState *>(impl->states_region_->mutable_data());
219  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
220  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
221  return nullptr;
222  }
223  b = impl->narcs_ * sizeof(Arc);
224  impl->arcs_region_.reset(
225  MappedFile::Map(&strm, opts.mode == FstReadOptions::MAP, opts.source, b));
226  if (!strm || !impl->arcs_region_) {
227  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
228  return nullptr;
229  }
230  impl->arcs_ = reinterpret_cast<Arc *>(impl->arcs_region_->mutable_data());
231  return impl.release();
232 }
233 
234 } // namespace internal
235 
236 // Simple concrete immutable FST. This class attaches interface to
237 // implementation and handles reference counting, delegating most methods to
238 // ImplToExpandedFst. The unsigned type U is used to represent indices into the
239 // arc array (default declared in fst-decl.h).
240 template <class A, class Unsigned>
241 class ConstFst : public ImplToExpandedFst<internal::ConstFstImpl<A, Unsigned>> {
242  public:
243  using Arc = A;
244  using StateId = typename Arc::StateId;
245 
247  using ConstState = typename Impl::ConstState;
248 
249  friend class StateIterator<ConstFst<Arc, Unsigned>>;
250  friend class ArcIterator<ConstFst<Arc, Unsigned>>;
251 
252  template <class F, class G>
253  void friend Cast(const F &, G *);
254 
255  ConstFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
256 
257  explicit ConstFst(const Fst<Arc> &fst)
258  : ImplToExpandedFst<Impl>(std::make_shared<Impl>(fst)) {}
259 
260  ConstFst(const ConstFst<A, Unsigned> &fst, bool safe = false)
261  : ImplToExpandedFst<Impl>(fst) {}
262 
263  // Gets a copy of this ConstFst. See Fst<>::Copy() for further doc.
264  ConstFst<A, Unsigned> *Copy(bool safe = false) const override {
265  return new ConstFst<A, Unsigned>(*this, safe);
266  }
267 
268  // Reads a ConstFst from an input stream, returning nullptr on error.
269  static ConstFst<A, Unsigned> *Read(std::istream &strm,
270  const FstReadOptions &opts) {
271  auto *impl = Impl::Read(strm, opts);
272  return impl ? new ConstFst<A, Unsigned>(std::shared_ptr<Impl>(impl))
273  : nullptr;
274  }
275 
276  // Read a ConstFst from a file; return nullptr on error; empty filename reads
277  // from standard input.
278  static ConstFst<A, Unsigned> *Read(const string &filename) {
279  auto *impl = ImplToExpandedFst<Impl>::Read(filename);
280  return impl ? new ConstFst<A, Unsigned>(std::shared_ptr<Impl>(impl))
281  : nullptr;
282  }
283 
284  bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
285  return WriteFst(*this, strm, opts);
286  }
287 
288  bool Write(const string &filename) const override {
289  return Fst<Arc>::WriteFile(filename);
290  }
291 
292  template <class FST>
293  static bool WriteFst(const FST &fst, std::ostream &strm,
294  const FstWriteOptions &opts);
295 
296  void InitStateIterator(StateIteratorData<Arc> *data) const override {
297  GetImpl()->InitStateIterator(data);
298  }
299 
300  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
301  GetImpl()->InitArcIterator(s, data);
302  }
303 
304  private:
305  explicit ConstFst(std::shared_ptr<Impl> impl)
306  : ImplToExpandedFst<Impl>(impl) {}
307 
308  using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
309 
310  // Uses overloading to extract the type of the argument.
311  static const Impl *GetImplIfConstFst(const ConstFst &const_fst) {
312  return const_fst.GetImpl();
313  }
314 
315  // NB: this does not give privileged treatment to subtypes of ConstFst.
316  template <typename FST>
317  static Impl *GetImplIfConstFst(const FST &fst) {
318  return nullptr;
319  }
320 
321  ConstFst &operator=(const ConstFst &) = delete;
322 };
323 
324 // Writes FST in Const format, potentially with a pass over the machine before
325 // writing to compute number of states and arcs.
326 template <class Arc, class Unsigned>
327 template <class FST>
328 bool ConstFst<Arc, Unsigned>::WriteFst(const FST &fst, std::ostream &strm,
329  const FstWriteOptions &opts) {
330  const auto file_version =
333  size_t num_arcs = 0; // To silence -Wsometimes-uninitialized warnings.
334  size_t num_states = 0; // Ditto.
335  std::streamoff start_offset = 0;
336  bool update_header = true;
337  if (const auto *impl = GetImplIfConstFst(fst)) {
338  num_arcs = impl->narcs_;
339  num_states = impl->nstates_;
340  update_header = false;
341  } else if (opts.stream_write || (start_offset = strm.tellp()) == -1) {
342  // precompute values needed for header when we cannot seek to rewrite it.
343  num_arcs = 0;
344  num_states = 0;
345  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
346  num_arcs += fst.NumArcs(siter.Value());
347  ++num_states;
348  }
349  update_header = false;
350  }
351  FstHeader hdr;
352  hdr.SetStart(fst.Start());
353  hdr.SetNumStates(num_states);
354  hdr.SetNumArcs(num_arcs);
355  string type = "const";
356  if (sizeof(Unsigned) != sizeof(uint32)) {
357  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
358  }
359  const auto properties =
360  fst.Properties(kCopyProperties, true) |
362  internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
363  properties, &hdr);
364  if (opts.align && !AlignOutput(strm)) {
365  LOG(ERROR) << "Could not align file during write after header";
366  return false;
367  }
368  size_t pos = 0;
369  size_t states = 0;
371  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
372  const auto s = siter.Value();
373  state.weight = fst.Final(s);
374  state.pos = pos;
375  state.narcs = fst.NumArcs(s);
376  state.niepsilons = fst.NumInputEpsilons(s);
377  state.noepsilons = fst.NumOutputEpsilons(s);
378  strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
379  pos += state.narcs;
380  ++states;
381  }
382  hdr.SetNumStates(states);
383  hdr.SetNumArcs(pos);
384  if (opts.align && !AlignOutput(strm)) {
385  LOG(ERROR) << "Could not align file during write after writing states";
386  }
387  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
388  for (ArcIterator<FST> aiter(fst, siter.Value()); !aiter.Done();
389  aiter.Next()) {
390  const auto &arc = aiter.Value();
391 // Google-only...
392 #ifdef MEMORY_SANITIZER
393  // arc may contain padding which has unspecified contents. Tell MSAN to
394  // not complain about it when writing it to a file.
395  ANNOTATE_MEMORY_IS_INITIALIZED(reinterpret_cast<const char *>(&arc),
396  sizeof(arc));
397 #endif
398  // ...Google-only
399  strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
400  }
401  }
402  strm.flush();
403  if (!strm) {
404  LOG(ERROR) << "ConstFst::WriteFst: write failed: " << opts.source;
405  return false;
406  }
407  if (update_header) {
409  fst, strm, opts, file_version, type, properties, &hdr, start_offset);
410  } else {
411  if (hdr.NumStates() != num_states) {
412  LOG(ERROR) << "Inconsistent number of states observed during write";
413  return false;
414  }
415  if (hdr.NumArcs() != num_arcs) {
416  LOG(ERROR) << "Inconsistent number of arcs observed during write";
417  return false;
418  }
419  }
420  return true;
421 }
422 
423 // Specialization for ConstFst; see generic version in fst.h for sample usage
424 // (but use the ConstFst type instead). This version should inline.
425 template <class Arc, class Unsigned>
426 class StateIterator<ConstFst<Arc, Unsigned>> {
427  public:
428  using StateId = typename Arc::StateId;
429 
431  : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
432 
433  bool Done() const { return s_ >= nstates_; }
434 
435  StateId Value() const { return s_; }
436 
437  void Next() { ++s_; }
438 
439  void Reset() { s_ = 0; }
440 
441  private:
442  const StateId nstates_;
443  StateId s_;
444 };
445 
446 // Specialization for ConstFst; see generic version in fst.h for sample usage
447 // (but use the ConstFst type instead). This version should inline.
448 template <class Arc, class Unsigned>
449 class ArcIterator<ConstFst<Arc, Unsigned>> {
450  public:
451  using StateId = typename Arc::StateId;
452 
454  : arcs_(fst.GetImpl()->Arcs(s)),
455  narcs_(fst.GetImpl()->NumArcs(s)),
456  i_(0) {}
457 
458  bool Done() const { return i_ >= narcs_; }
459 
460  const Arc &Value() const { return arcs_[i_]; }
461 
462  void Next() { ++i_; }
463 
464  size_t Position() const { return i_; }
465 
466  void Reset() { i_ = 0; }
467 
468  void Seek(size_t a) { i_ = a; }
469 
470  constexpr uint32 Flags() const { return kArcValueFlags; }
471 
473 
474  private:
475  const Arc *arcs_;
476  size_t narcs_;
477  size_t i_;
478 };
479 
480 // A useful alias when using StdArc.
482 
483 } // namespace fst
484 
485 #endif // FST_CONST_FST_H_
int32 Version() const
Definition: fst.h:123
static Impl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: expanded-fst.h:130
typename Arc::Weight Weight
Definition: const-fst.h:42
StateId Start() const
Definition: const-fst.h:66
string source
Definition: fst.h:84
void Cast(const F &, G *)
uint64_t uint64
Definition: types.h:32
virtual size_t NumArcs(StateId) const =0
void SetNumArcs(int64 numarcs)
Definition: fst.h:149
int64 NumStates() const
Definition: fst.h:131
constexpr uint64 kUnweightedCycles
Definition: properties.h:126
int64 Start() const
Definition: fst.h:129
#define LOG(type)
Definition: log.h:48
virtual Weight Final(StateId) const =0
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:700
constexpr uint64 kWeightedCycles
Definition: properties.h:123
static ConstFst< A, Unsigned > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:269
static bool UpdateFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, const string &type, uint64 properties, FstHeader *hdr, size_t header_offset)
Definition: fst.h:773
bool stream_write
Definition: fst.h:89
static bool WriteFst(const FST &fst, std::ostream &strm, const FstWriteOptions &opts)
Definition: const-fst.h:328
constexpr uint64 kCopyProperties
Definition: properties.h:138
constexpr int kNoStateId
Definition: fst.h:180
constexpr uint64 kExpanded
Definition: properties.h:27
virtual uint64 Properties(uint64 mask, bool test) const =0
void SetType(const string &type)
Definition: fst.h:664
ConstFst(const Fst< Arc > &fst)
Definition: const-fst.h:257
ArcIterator(const ConstFst< Arc, Unsigned > &fst, StateId s)
Definition: const-fst.h:453
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: const-fst.h:300
bool WriteFile(const string &filename) const
Definition: fst.h:303
StateIteratorBase< Arc > * base
Definition: fst.h:351
const Arc * Arcs(StateId s) const
Definition: const-fst.h:81
bool AlignOutput(std::ostream &strm)
Definition: util.cc:76
uint64 CheckProperties(const Fst< Arc > &fst, uint64 check_mask, uint64 test_mask)
typename Arc::StateId StateId
Definition: const-fst.h:41
typename Impl::ConstState ConstState
Definition: const-fst.h:247
int32 GetFlags() const
Definition: fst.h:125
size_t NumArcs(StateId s) const
Definition: const-fst.h:72
ConstFst< A, Unsigned > * Copy(bool safe=false) const override
Definition: const-fst.h:264
void SetNumStates(int64 numstates)
Definition: fst.h:147
StateId nstates
Definition: fst.h:353
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, const string &type, uint64 properties, FstHeader *hdr)
Definition: fst.h:740
size_t NumInputEpsilons(StateId s) const
Definition: const-fst.h:74
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:499
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: const-fst.h:90
static MappedFile * Map(std::istream *istrm, bool memorymap, const string &source, size_t size)
Definition: mapped-file.cc:38
string source
Definition: fst.h:57
StateIterator(const ConstFst< Arc, Unsigned > &fst)
Definition: const-fst.h:430
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:696
void SetFlags(int32 flags)
Definition: fst.h:141
typename Arc::StateId StateId
Definition: const-fst.h:244
uint32_t uint32
Definition: types.h:31
void SetStart(int64 start)
Definition: fst.h:145
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override
Definition: const-fst.h:284
static ConstFst< A, Unsigned > * Read(const string &filename)
Definition: const-fst.h:278
virtual const SymbolTable * InputSymbols() const =0
void SetProperties(uint64 props)
Definition: fst.h:670
bool Done() const
Definition: fst.h:383
const Arc * arcs
Definition: fst.h:462
static MappedFile * Allocate(size_t size, int align=kArchAlignment)
Definition: mapped-file.cc:104
size_t NumOutputEpsilons(StateId s) const
Definition: const-fst.h:76
bool Write(const string &filename) const override
Definition: const-fst.h:288
ConstFst(const ConstFst< A, Unsigned > &fst, bool safe=false)
Definition: const-fst.h:260
FileReadMode mode
Definition: fst.h:64
constexpr uint64 kNullProperties
Definition: properties.h:131
ArcIteratorBase< Arc > * base
Definition: fst.h:461
void InitStateIterator(StateIteratorData< Arc > *data) const
Definition: const-fst.h:84
StateId NumStates() const
Definition: const-fst.h:70
static ConstFstImpl< Arc, Unsigned > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:192
constexpr uint64 kMutable
Definition: properties.h:30
Weight Final(StateId s) const
Definition: const-fst.h:68
int * ref_count
Definition: fst.h:464
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: const-fst.h:296
size_t narcs
Definition: fst.h:463
int64 NumArcs() const
Definition: fst.h:133
bool AlignInput(std::istream &strm)
Definition: util.cc:60
const Impl * GetImpl() const
Definition: fst.h:945
virtual const SymbolTable * OutputSymbols() const =0