FST  openfst-1.8.2
OpenFst Library
const-fst.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Simple concrete immutable FST whose states and arcs are each stored in
19 // single arrays.
20 
21 #ifndef FST_CONST_FST_H_
22 #define FST_CONST_FST_H_
23 
24 #include <climits>
25 #include <cstdint>
26 #include <string>
27 #include <vector>
28 
29 #include <fst/log.h>
30 
31 #include <fst/expanded-fst.h>
32 #include <fst/fst-decl.h>
33 #include <fst/mapped-file.h>
34 #include <fst/test-properties.h>
35 #include <fst/util.h>
36 
37 namespace fst {
38 
39 template <class A, class Unsigned>
40 class ConstFst;
41 
42 template <class F, class G>
43 void Cast(const F &, G *);
44 
45 namespace internal {
46 
47 // States and arcs each implemented by single arrays, templated on the
48 // Arc definition. Unsigned is used to represent indices into the arc array.
49 template <class A, class Unsigned>
50 class ConstFstImpl : public FstImpl<A> {
51  public:
52  using Arc = A;
53  using StateId = typename Arc::StateId;
54  using Weight = typename Arc::Weight;
55 
58  using FstImpl<A>::SetType;
61 
63  std::string type = "const";
64  if (sizeof(Unsigned) != sizeof(uint32_t)) {
65  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
66  }
67  SetType(type);
68  SetProperties(kNullProperties | kStaticProperties);
69  }
70 
71  explicit ConstFstImpl(const Fst<Arc> &fst);
72 
73  StateId Start() const { return start_; }
74 
75  Weight Final(StateId s) const { return states_[s].final_weight; }
76 
77  StateId NumStates() const { return nstates_; }
78 
79  size_t NumArcs(StateId s) const { return states_[s].narcs; }
80 
81  size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
82 
83  size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
84 
85  static ConstFstImpl *Read(std::istream &strm, const FstReadOptions &opts);
86 
87  const Arc *Arcs(StateId s) const { return arcs_ + states_[s].pos; }
88 
89  // Provide information needed for generic state iterator.
91  data->base = nullptr;
92  data->nstates = nstates_;
93  }
94 
95  // Provide information needed for the generic arc iterator.
97  data->base = nullptr;
98  data->arcs = arcs_ + states_[s].pos;
99  data->narcs = states_[s].narcs;
100  data->ref_count = nullptr;
101  }
102 
103  private:
104  // Used to find narcs_ and nstates_ in Write.
105  friend class ConstFst<Arc, Unsigned>;
106 
107  // States implemented by array *states_ below, arcs by (single) *arcs_.
108  struct ConstState {
109  Weight final_weight; // Final weight.
110  Unsigned pos; // Start of state's arcs in *arcs_.
111  Unsigned narcs; // Number of arcs (per state).
112  Unsigned niepsilons; // Number of input epsilons.
113  Unsigned noepsilons; // Number of output epsilons.
114 
115  ConstState() : final_weight(Weight::Zero()) {}
116  };
117 
118  // Properties always true of this FST class.
119  static constexpr uint64_t kStaticProperties = kExpanded;
120  // Current unaligned file format version. The unaligned version was added and
121  // made the default since the aligned version does not work on pipes.
122  static constexpr int kFileVersion = 2;
123  // Current aligned file format version.
124  static constexpr int kAlignedFileVersion = 1;
125  // Minimum file format version supported.
126  static constexpr int kMinFileVersion = 1;
127 
128  std::unique_ptr<MappedFile> states_region_; // Mapped file for states.
129  std::unique_ptr<MappedFile> arcs_region_; // Mapped file for arcs.
130  ConstState *states_ = nullptr; // States representation.
131  Arc *arcs_ = nullptr; // Arcs representation.
132  size_t narcs_ = 0; // Number of arcs.
133  StateId nstates_ = 0; // Number of states.
134  StateId start_ = kNoStateId; // Initial state.
135 
136  ConstFstImpl(const ConstFstImpl &) = delete;
137  ConstFstImpl &operator=(const ConstFstImpl &) = delete;
138 };
139 
140 template <class Arc, class Unsigned>
142  std::string type = "const";
143  if (sizeof(Unsigned) != sizeof(uint32_t)) {
144  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
145  }
146  SetType(type);
149  start_ = fst.Start();
150  // Counts states and arcs.
151  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
152  ++nstates_;
153  narcs_ += fst.NumArcs(siter.Value());
154  }
155  states_region_.reset(MappedFile::AllocateType<ConstState>(nstates_));
156  arcs_region_.reset(MappedFile::AllocateType<Arc>(narcs_));
157  states_ = static_cast<ConstState *>(states_region_->mutable_data());
158  arcs_ = static_cast<Arc *>(arcs_region_->mutable_data());
159  size_t pos = 0;
160  for (StateId s = 0; s < nstates_; ++s) {
161  states_[s].final_weight = fst.Final(s);
162  states_[s].pos = pos;
163  states_[s].narcs = 0;
164  states_[s].niepsilons = 0;
165  states_[s].noepsilons = 0;
166  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
167  const auto &arc = aiter.Value();
168  ++states_[s].narcs;
169  if (arc.ilabel == 0) ++states_[s].niepsilons;
170  if (arc.olabel == 0) ++states_[s].noepsilons;
171  arcs_[pos] = arc;
172  ++pos;
173  }
174  }
175  const auto props =
176  fst.Properties(kMutable, false)
177  ? fst.Properties(kCopyProperties, true)
178  : CheckProperties(
181  SetProperties(props | kStaticProperties);
182 }
183 
184 template <class Arc, class Unsigned>
186  std::istream &strm, const FstReadOptions &opts) {
187  auto impl = std::make_unique<ConstFstImpl>();
188  FstHeader hdr;
189  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
190  impl->start_ = hdr.Start();
191  impl->nstates_ = hdr.NumStates();
192  impl->narcs_ = hdr.NumArcs();
193  // Ensures compatibility.
194  if (hdr.Version() == kAlignedFileVersion) {
195  hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
196  }
197  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
198  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
199  return nullptr;
200  }
201  size_t b = impl->nstates_ * sizeof(ConstState);
202  impl->states_region_.reset(
203  MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
204  if (!strm || !impl->states_region_) {
205  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
206  return nullptr;
207  }
208  impl->states_ =
209  static_cast<ConstState *>(impl->states_region_->mutable_data());
210  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
211  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
212  return nullptr;
213  }
214  b = impl->narcs_ * sizeof(Arc);
215  impl->arcs_region_.reset(
216  MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
217  if (!strm || !impl->arcs_region_) {
218  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
219  return nullptr;
220  }
221  impl->arcs_ = static_cast<Arc *>(impl->arcs_region_->mutable_data());
222  return impl.release();
223 }
224 
225 } // namespace internal
226 
227 // Simple concrete immutable FST. This class attaches interface to
228 // implementation and handles reference counting, delegating most methods to
229 // ImplToExpandedFst. The unsigned type U is used to represent indices into the
230 // arc array (default declared in fst-decl.h).
231 //
232 // ConstFst is thread-safe.
233 template <class A, class Unsigned>
234 class ConstFst : public ImplToExpandedFst<internal::ConstFstImpl<A, Unsigned>> {
235  public:
236  using Arc = A;
237  using StateId = typename Arc::StateId;
238 
240  using ConstState = typename Impl::ConstState;
241 
242  friend class StateIterator<ConstFst<Arc, Unsigned>>;
243  friend class ArcIterator<ConstFst<Arc, Unsigned>>;
244 
245  template <class F, class G>
246  void friend Cast(const F &, G *);
247 
248  ConstFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
249 
250  explicit ConstFst(const Fst<Arc> &fst)
251  : ImplToExpandedFst<Impl>(std::make_shared<Impl>(fst)) {}
252 
253  ConstFst(const ConstFst &fst, bool unused_safe = false)
254  : ImplToExpandedFst<Impl>(fst.GetSharedImpl()) {}
255 
256  // Gets a copy of this ConstFst. See Fst<>::Copy() for further doc.
257  ConstFst *Copy(bool safe = false) const override {
258  return new ConstFst(*this, safe);
259  }
260 
261  // Reads a ConstFst from an input stream, returning nullptr on error.
262  static ConstFst *Read(std::istream &strm, const FstReadOptions &opts) {
263  auto *impl = Impl::Read(strm, opts);
264  return impl ? new ConstFst(std::shared_ptr<Impl>(impl)) : nullptr;
265  }
266 
267  // Read a ConstFst from a file; return nullptr on error; empty source reads
268  // from standard input.
269  static ConstFst *Read(const std::string &source) {
270  auto *impl = ImplToExpandedFst<Impl>::Read(source);
271  return impl ? new ConstFst(std::shared_ptr<Impl>(impl)) : nullptr;
272  }
273 
274  bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
275  return WriteFst(*this, strm, opts);
276  }
277 
278  bool Write(const std::string &source) const override {
279  return Fst<Arc>::WriteFile(source);
280  }
281 
282  template <class FST>
283  static bool WriteFst(const FST &fst, std::ostream &strm,
284  const FstWriteOptions &opts);
285 
286  void InitStateIterator(StateIteratorData<Arc> *data) const override {
287  GetImpl()->InitStateIterator(data);
288  }
289 
290  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
291  GetImpl()->InitArcIterator(s, data);
292  }
293 
294  private:
295  explicit ConstFst(std::shared_ptr<Impl> impl)
296  : ImplToExpandedFst<Impl>(impl) {}
297 
298  using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
299 
300  // Uses overloading to extract the type of the argument.
301  static const Impl *GetImplIfConstFst(const ConstFst &const_fst) {
302  return const_fst.GetImpl();
303  }
304 
305  // NB: this does not give privileged treatment to subtypes of ConstFst.
306  template <typename FST>
307  static Impl *GetImplIfConstFst(const FST &fst) {
308  return nullptr;
309  }
310 
311  ConstFst &operator=(const ConstFst &) = delete;
312 };
313 
314 // Writes FST in Const format, potentially with a pass over the machine before
315 // writing to compute number of states and arcs.
316 template <class Arc, class Unsigned>
317 template <class FST>
318 bool ConstFst<Arc, Unsigned>::WriteFst(const FST &fst, std::ostream &strm,
319  const FstWriteOptions &opts) {
320  const auto file_version =
323  size_t num_arcs = 0; // To silence -Wsometimes-uninitialized warnings.
324  size_t num_states = 0; // Ditto.
325  std::streamoff start_offset = 0;
326  bool update_header = true;
327  if (const auto *impl = GetImplIfConstFst(fst)) {
328  num_arcs = impl->narcs_;
329  num_states = impl->nstates_;
330  update_header = false;
331  } else if (opts.stream_write || (start_offset = strm.tellp()) == -1) {
332  // precompute values needed for header when we cannot seek to rewrite it.
333  num_arcs = 0;
334  num_states = 0;
335  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
336  num_arcs += fst.NumArcs(siter.Value());
337  ++num_states;
338  }
339  update_header = false;
340  }
341  FstHeader hdr;
342  hdr.SetStart(fst.Start());
343  hdr.SetNumStates(num_states);
344  hdr.SetNumArcs(num_arcs);
345  std::string type = "const";
346  if (sizeof(Unsigned) != sizeof(uint32_t)) {
347  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
348  }
349  const auto properties =
350  fst.Properties(kCopyProperties, true) |
352  internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
353  properties, &hdr);
354  if (opts.align && !AlignOutput(strm)) {
355  LOG(ERROR) << "Could not align file during write after header";
356  return false;
357  }
358  size_t pos = 0;
359  size_t states = 0;
360  ConstState state;
361  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
362  const auto s = siter.Value();
363  state.final_weight = fst.Final(s);
364  state.pos = pos;
365  state.narcs = fst.NumArcs(s);
366  state.niepsilons = fst.NumInputEpsilons(s);
367  state.noepsilons = fst.NumOutputEpsilons(s);
368  strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
369  pos += state.narcs;
370  ++states;
371  }
372  hdr.SetNumStates(states);
373  hdr.SetNumArcs(pos);
374  if (opts.align && !AlignOutput(strm)) {
375  LOG(ERROR) << "Could not align file during write after writing states";
376  }
377  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
378  for (ArcIterator<FST> aiter(fst, siter.Value()); !aiter.Done();
379  aiter.Next()) {
380  const auto &arc = aiter.Value();
381  strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
382  }
383  }
384  strm.flush();
385  if (!strm) {
386  LOG(ERROR) << "ConstFst::WriteFst: Write failed: " << opts.source;
387  return false;
388  }
389  if (update_header) {
391  fst, strm, opts, file_version, type, properties, &hdr, start_offset);
392  } else {
393  if (hdr.NumStates() != num_states) {
394  LOG(ERROR) << "Inconsistent number of states observed during write";
395  return false;
396  }
397  if (hdr.NumArcs() != num_arcs) {
398  LOG(ERROR) << "Inconsistent number of arcs observed during write";
399  return false;
400  }
401  }
402  return true;
403 }
404 
405 // Specialization for ConstFst; see generic version in fst.h for sample usage
406 // (but use the ConstFst type instead). This version should inline.
407 template <class Arc, class Unsigned>
408 class StateIterator<ConstFst<Arc, Unsigned>> {
409  public:
410  using StateId = typename Arc::StateId;
411 
413  : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
414 
415  bool Done() const { return s_ >= nstates_; }
416 
417  StateId Value() const { return s_; }
418 
419  void Next() { ++s_; }
420 
421  void Reset() { s_ = 0; }
422 
423  private:
424  const StateId nstates_;
425  StateId s_;
426 };
427 
428 // Specialization for ConstFst; see generic version in fst.h for sample usage
429 // (but use the ConstFst type instead). This version should inline.
430 template <class Arc, class Unsigned>
431 class ArcIterator<ConstFst<Arc, Unsigned>> {
432  public:
433  using StateId = typename Arc::StateId;
434 
436  : arcs_(fst.GetImpl()->Arcs(s)),
437  narcs_(fst.GetImpl()->NumArcs(s)),
438  i_(0) {}
439 
440  bool Done() const { return i_ >= narcs_; }
441 
442  const Arc &Value() const { return arcs_[i_]; }
443 
444  void Next() { ++i_; }
445 
446  size_t Position() const { return i_; }
447 
448  void Reset() { i_ = 0; }
449 
450  void Seek(size_t a) { i_ = a; }
451 
452  constexpr uint8_t Flags() const { return kArcValueFlags; }
453 
454  void SetFlags(uint8_t, uint8_t) {}
455 
456  private:
457  const Arc *arcs_;
458  size_t narcs_;
459  size_t i_;
460 };
461 
462 // A useful alias when using StdArc.
464 
465 } // namespace fst
466 
467 #endif // FST_CONST_FST_H_
static Impl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: expanded-fst.h:145
typename Arc::Weight Weight
Definition: const-fst.h:54
StateId Start() const
Definition: const-fst.h:73
void SetProperties(uint64_t props)
Definition: fst.h:710
bool AlignInput(std::istream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:81
constexpr uint64_t kMutable
Definition: properties.h:48
constexpr uint64_t kWeightedCycles
Definition: properties.h:141
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
void Cast(const F &, G *)
virtual uint64_t Properties(uint64_t mask, bool test) const =0
virtual size_t NumArcs(StateId) const =0
std::string source
Definition: fst.h:74
ConstFst * Copy(bool safe=false) const override
Definition: const-fst.h:257
static MappedFile * Map(std::istream &istrm, bool memorymap, const std::string &source, size_t size)
Definition: mapped-file.cc:74
#define LOG(type)
Definition: log.h:49
virtual Weight Final(StateId) const =0
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:772
void SetNumArcs(int64_t numarcs)
Definition: fst.h:172
constexpr uint64_t kUnweightedCycles
Definition: properties.h:144
bool AlignOutput(std::ostream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:97
static ConstFst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:262
bool stream_write
Definition: fst.h:107
static bool WriteFst(const FST &fst, std::ostream &strm, const FstWriteOptions &opts)
Definition: const-fst.h:318
constexpr int kNoStateId
Definition: fst.h:202
std::string source
Definition: fst.h:102
ConstFst(const Fst< Arc > &fst)
Definition: const-fst.h:250
ArcIterator(const ConstFst< Arc, Unsigned > &fst, StateId s)
Definition: const-fst.h:435
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: const-fst.h:290
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr)
Definition: fst.h:812
const Arc * Arcs(StateId s) const
Definition: const-fst.h:87
constexpr uint64_t kCopyProperties
Definition: properties.h:162
typename Arc::StateId StateId
Definition: const-fst.h:53
typename Impl::ConstState ConstState
Definition: const-fst.h:240
bool Write(const std::string &source) const override
Definition: const-fst.h:278
uint64_t CheckProperties(const Fst< Arc > &fst, uint64_t check_mask, uint64_t test_mask)
static bool UpdateFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr, size_t header_offset)
Definition: fst.h:845
size_t NumArcs(StateId s) const
Definition: const-fst.h:79
StateId nstates
Definition: fst.h:384
size_t NumInputEpsilons(StateId s) const
Definition: const-fst.h:81
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:533
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: const-fst.h:96
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
StateIterator(const ConstFst< Arc, Unsigned > &fst)
Definition: const-fst.h:412
constexpr uint64_t kNullProperties
Definition: properties.h:149
std::unique_ptr< ArcIteratorBase< Arc > > base
Definition: fst.h:495
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:768
bool WriteFile(const std::string &source) const
Definition: fst.h:332
int64_t NumArcs() const
Definition: fst.h:156
ConstFst(const ConstFst &fst, bool unused_safe=false)
Definition: const-fst.h:253
typename Arc::StateId StateId
Definition: const-fst.h:237
void SetNumStates(int64_t numstates)
Definition: fst.h:170
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override
Definition: const-fst.h:274
virtual const SymbolTable * InputSymbols() const =0
int64_t NumStates() const
Definition: fst.h:154
void SetType(std::string_view type)
Definition: fst.h:700
bool Done() const
Definition: fst.h:415
void SetStart(int64_t start)
Definition: fst.h:168
const Arc * arcs
Definition: fst.h:496
size_t NumOutputEpsilons(StateId s) const
Definition: const-fst.h:83
FileReadMode mode
Definition: fst.h:81
void InitStateIterator(StateIteratorData< Arc > *data) const
Definition: const-fst.h:90
StateId NumStates() const
Definition: const-fst.h:77
static ConstFst * Read(const std::string &source)
Definition: const-fst.h:269
constexpr uint64_t kExpanded
Definition: properties.h:45
static ConstFstImpl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:185
Weight Final(StateId s) const
Definition: const-fst.h:75
int * ref_count
Definition: fst.h:498
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: const-fst.h:286
size_t narcs
Definition: fst.h:497
const Impl * GetImpl() const
Definition: fst.h:1026
virtual const SymbolTable * OutputSymbols() const =0