FST  openfst-1.8.3
OpenFst Library
const-fst.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Simple concrete immutable FST whose states and arcs are each stored in
19 // single arrays.
20 
21 #ifndef FST_CONST_FST_H_
22 #define FST_CONST_FST_H_
23 
24 #include <climits>
25 #include <cstddef>
26 #include <cstdint>
27 #include <ios>
28 #include <istream>
29 #include <memory>
30 #include <ostream>
31 #include <string>
32 #include <vector>
33 
34 #include <fst/log.h>
35 #include <fst/arc.h>
36 #include <fst/expanded-fst.h>
37 #include <fst/float-weight.h>
38 #include <fst/fst-decl.h>
39 #include <fst/fst.h>
40 #include <fst/impl-to-fst.h>
41 #include <fst/mapped-file.h>
42 #include <fst/properties.h>
43 #include <fst/test-properties.h>
44 #include <fst/util.h>
45 #include <string_view>
46 
47 namespace fst {
48 
49 template <class A, class Unsigned>
50 class ConstFst;
51 
52 template <class F, class G>
53 void Cast(const F &, G *);
54 
55 namespace internal {
56 
57 // States and arcs each implemented by single arrays, templated on the
58 // Arc definition. Unsigned is used to represent indices into the arc array.
59 template <class A, class Unsigned>
60 class ConstFstImpl : public FstImpl<A> {
61  public:
62  using Arc = A;
63  using StateId = typename Arc::StateId;
64  using Weight = typename Arc::Weight;
65 
68  using FstImpl<A>::SetType;
71 
73  std::string type = "const";
74  if (sizeof(Unsigned) != sizeof(uint32_t)) {
75  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
76  }
77  SetType(type);
78  SetProperties(kNullProperties | kStaticProperties);
79  }
80 
81  explicit ConstFstImpl(const Fst<Arc> &fst);
82 
83  StateId Start() const { return start_; }
84 
85  Weight Final(StateId s) const { return states_[s].final_weight; }
86 
87  StateId NumStates() const { return nstates_; }
88 
89  size_t NumArcs(StateId s) const { return states_[s].narcs; }
90 
91  size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
92 
93  size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
94 
95  static ConstFstImpl *Read(std::istream &strm, const FstReadOptions &opts);
96 
97  const Arc *Arcs(StateId s) const { return arcs_ + states_[s].pos; }
98 
99  // Provide information needed for generic state iterator.
101  data->base = nullptr;
102  data->nstates = nstates_;
103  }
104 
105  // Provide information needed for the generic arc iterator.
107  data->base = nullptr;
108  data->arcs = arcs_ + states_[s].pos;
109  data->narcs = states_[s].narcs;
110  data->ref_count = nullptr;
111  }
112 
113  private:
114  // Used to find narcs_ and nstates_ in Write.
115  friend class ConstFst<Arc, Unsigned>;
116 
117  // States implemented by array *states_ below, arcs by (single) *arcs_.
118  struct ConstState {
119  Weight final_weight; // Final weight.
120  Unsigned pos; // Start of state's arcs in *arcs_.
121  Unsigned narcs; // Number of arcs (per state).
122  Unsigned niepsilons; // Number of input epsilons.
123  Unsigned noepsilons; // Number of output epsilons.
124 
125  ConstState() : final_weight(Weight::Zero()) {}
126  };
127 
128  // Properties always true of this FST class.
129  static constexpr uint64_t kStaticProperties = kExpanded;
130  // Current unaligned file format version. The unaligned version was added and
131  // made the default since the aligned version does not work on pipes.
132  static constexpr int kFileVersion = 2;
133  // Current aligned file format version.
134  static constexpr int kAlignedFileVersion = 1;
135  // Minimum file format version supported.
136  static constexpr int kMinFileVersion = 1;
137 
138  std::unique_ptr<MappedFile> states_region_; // Mapped file for states.
139  std::unique_ptr<MappedFile> arcs_region_; // Mapped file for arcs.
140  ConstState *states_ = nullptr; // States representation.
141  Arc *arcs_ = nullptr; // Arcs representation.
142  size_t narcs_ = 0; // Number of arcs.
143  StateId nstates_ = 0; // Number of states.
144  StateId start_ = kNoStateId; // Initial state.
145 
146  ConstFstImpl(const ConstFstImpl &) = delete;
147  ConstFstImpl &operator=(const ConstFstImpl &) = delete;
148 };
149 
150 template <class Arc, class Unsigned>
152  std::string type = "const";
153  if (sizeof(Unsigned) != sizeof(uint32_t)) {
154  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
155  }
156  SetType(type);
159  start_ = fst.Start();
160  // Counts states and arcs.
161  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
162  ++nstates_;
163  narcs_ += fst.NumArcs(siter.Value());
164  }
165  states_region_.reset(MappedFile::AllocateType<ConstState>(nstates_));
166  arcs_region_.reset(MappedFile::AllocateType<Arc>(narcs_));
167  states_ = static_cast<ConstState *>(states_region_->mutable_data());
168  arcs_ = static_cast<Arc *>(arcs_region_->mutable_data());
169  size_t pos = 0;
170  for (StateId s = 0; s < nstates_; ++s) {
171  states_[s].final_weight = fst.Final(s);
172  states_[s].pos = pos;
173  states_[s].narcs = 0;
174  states_[s].niepsilons = 0;
175  states_[s].noepsilons = 0;
176  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
177  const auto &arc = aiter.Value();
178  ++states_[s].narcs;
179  if (arc.ilabel == 0) ++states_[s].niepsilons;
180  if (arc.olabel == 0) ++states_[s].noepsilons;
181  arcs_[pos] = arc;
182  ++pos;
183  }
184  }
185  const auto props =
186  fst.Properties(kMutable, false)
187  ? fst.Properties(kCopyProperties, true)
188  : CheckProperties(
191  SetProperties(props | kStaticProperties);
192 }
193 
194 template <class Arc, class Unsigned>
196  std::istream &strm, const FstReadOptions &opts) {
197  auto impl = std::make_unique<ConstFstImpl>();
198  FstHeader hdr;
199  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
200  impl->start_ = hdr.Start();
201  impl->nstates_ = hdr.NumStates();
202  impl->narcs_ = hdr.NumArcs();
203  // Ensures compatibility.
204  if (hdr.Version() == kAlignedFileVersion) {
205  hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
206  }
207  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
208  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
209  return nullptr;
210  }
211  size_t b = impl->nstates_ * sizeof(ConstState);
212  impl->states_region_.reset(
213  MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
214  if (!strm || !impl->states_region_) {
215  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
216  return nullptr;
217  }
218  impl->states_ =
219  static_cast<ConstState *>(impl->states_region_->mutable_data());
220  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
221  LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source;
222  return nullptr;
223  }
224  b = impl->narcs_ * sizeof(Arc);
225  impl->arcs_region_.reset(
226  MappedFile::Map(strm, opts.mode == FstReadOptions::MAP, opts.source, b));
227  if (!strm || !impl->arcs_region_) {
228  LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
229  return nullptr;
230  }
231  impl->arcs_ = static_cast<Arc *>(impl->arcs_region_->mutable_data());
232  return impl.release();
233 }
234 
235 } // namespace internal
236 
237 // Simple concrete immutable FST. This class attaches interface to
238 // implementation and handles reference counting, delegating most methods to
239 // ImplToExpandedFst. The unsigned type U is used to represent indices into the
240 // arc array (default declared in fst-decl.h).
241 //
242 // ConstFst is thread-safe.
243 template <class A, class Unsigned>
244 class ConstFst : public ImplToExpandedFst<internal::ConstFstImpl<A, Unsigned>> {
245  public:
246  using Arc = A;
247  using StateId = typename Arc::StateId;
248 
250  using ConstState = typename Impl::ConstState;
251 
252  friend class StateIterator<ConstFst<Arc, Unsigned>>;
253  friend class ArcIterator<ConstFst<Arc, Unsigned>>;
254 
255  template <class F, class G>
256  void friend Cast(const F &, G *);
257 
258  ConstFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
259 
260  explicit ConstFst(const Fst<Arc> &fst)
261  : ImplToExpandedFst<Impl>(std::make_shared<Impl>(fst)) {}
262 
263  ConstFst(const ConstFst &fst, bool unused_safe = false)
264  : ImplToExpandedFst<Impl>(fst.GetSharedImpl()) {}
265 
266  // Gets a copy of this ConstFst. See Fst<>::Copy() for further doc.
267  ConstFst *Copy(bool safe = false) const override {
268  return new ConstFst(*this, safe);
269  }
270 
271  // Reads a ConstFst from an input stream, returning nullptr on error.
272  static ConstFst *Read(std::istream &strm, const FstReadOptions &opts) {
273  auto *impl = Impl::Read(strm, opts);
274  return impl ? new ConstFst(std::shared_ptr<Impl>(impl)) : nullptr;
275  }
276 
277  // Read a ConstFst from a file; return nullptr on error; empty source reads
278  // from standard input.
279  static ConstFst *Read(std::string_view source) {
280  auto *impl = ImplToExpandedFst<Impl>::Read(source);
281  return impl ? new ConstFst(std::shared_ptr<Impl>(impl)) : nullptr;
282  }
283 
284  bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
285  return WriteFst(*this, strm, opts);
286  }
287 
288  bool Write(const std::string &source) const override {
289  return Fst<Arc>::WriteFile(source);
290  }
291 
292  template <class FST>
293  static bool WriteFst(const FST &fst, std::ostream &strm,
294  const FstWriteOptions &opts);
295 
296  void InitStateIterator(StateIteratorData<Arc> *data) const override {
297  GetImpl()->InitStateIterator(data);
298  }
299 
300  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
301  GetImpl()->InitArcIterator(s, data);
302  }
303 
304  private:
305  explicit ConstFst(std::shared_ptr<Impl> impl)
306  : ImplToExpandedFst<Impl>(impl) {}
307 
308  using ImplToFst<Impl, ExpandedFst<Arc>>::GetImpl;
309 
310  // Uses overloading to extract the type of the argument.
311  static const Impl *GetImplIfConstFst(const ConstFst &const_fst) {
312  return const_fst.GetImpl();
313  }
314 
315  // NB: this does not give privileged treatment to subtypes of ConstFst.
316  template <typename FST>
317  static Impl *GetImplIfConstFst(const FST &fst) {
318  return nullptr;
319  }
320 
321  ConstFst &operator=(const ConstFst &) = delete;
322 };
323 
324 // Writes FST in Const format, potentially with a pass over the machine before
325 // writing to compute number of states and arcs.
326 template <class Arc, class Unsigned>
327 template <class FST>
328 bool ConstFst<Arc, Unsigned>::WriteFst(const FST &fst, std::ostream &strm,
329  const FstWriteOptions &opts) {
330  const auto file_version =
333  size_t num_arcs = 0; // To silence -Wsometimes-uninitialized warnings.
334  size_t num_states = 0; // Ditto.
335  std::streamoff start_offset = 0;
336  bool update_header = true;
337  if (const auto *impl = GetImplIfConstFst(fst)) {
338  num_arcs = impl->narcs_;
339  num_states = impl->nstates_;
340  update_header = false;
341  } else if (opts.stream_write || (start_offset = strm.tellp()) == -1) {
342  // precompute values needed for header when we cannot seek to rewrite it.
343  num_arcs = 0;
344  num_states = 0;
345  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
346  num_arcs += fst.NumArcs(siter.Value());
347  ++num_states;
348  }
349  update_header = false;
350  }
351  FstHeader hdr;
352  hdr.SetStart(fst.Start());
353  hdr.SetNumStates(num_states);
354  hdr.SetNumArcs(num_arcs);
355  std::string type = "const";
356  if (sizeof(Unsigned) != sizeof(uint32_t)) {
357  type += std::to_string(CHAR_BIT * sizeof(Unsigned));
358  }
359  const auto properties =
360  fst.Properties(kCopyProperties, true) |
362  internal::FstImpl<Arc>::WriteFstHeader(fst, strm, opts, file_version, type,
363  properties, &hdr);
364  if (opts.align && !AlignOutput(strm)) {
365  LOG(ERROR) << "Could not align file during write after header";
366  return false;
367  }
368  size_t pos = 0;
369  size_t states = 0;
370  ConstState state;
371  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
372  const auto s = siter.Value();
373  state.final_weight = fst.Final(s);
374  state.pos = pos;
375  state.narcs = fst.NumArcs(s);
376  state.niepsilons = fst.NumInputEpsilons(s);
377  state.noepsilons = fst.NumOutputEpsilons(s);
378  strm.write(reinterpret_cast<const char *>(&state), sizeof(state));
379  pos += state.narcs;
380  ++states;
381  }
382  hdr.SetNumStates(states);
383  hdr.SetNumArcs(pos);
384  if (opts.align && !AlignOutput(strm)) {
385  LOG(ERROR) << "Could not align file during write after writing states";
386  }
387  for (StateIterator<FST> siter(fst); !siter.Done(); siter.Next()) {
388  for (ArcIterator<FST> aiter(fst, siter.Value()); !aiter.Done();
389  aiter.Next()) {
390  const auto &arc = aiter.Value();
391  strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc));
392  }
393  }
394  strm.flush();
395  if (!strm) {
396  LOG(ERROR) << "ConstFst::WriteFst: Write failed: " << opts.source;
397  return false;
398  }
399  if (update_header) {
401  fst, strm, opts, file_version, type, properties, &hdr, start_offset);
402  } else {
403  if (hdr.NumStates() != num_states) {
404  LOG(ERROR) << "Inconsistent number of states observed during write";
405  return false;
406  }
407  if (hdr.NumArcs() != num_arcs) {
408  LOG(ERROR) << "Inconsistent number of arcs observed during write";
409  return false;
410  }
411  }
412  return true;
413 }
414 
415 // Specialization for ConstFst; see generic version in fst.h for sample usage
416 // (but use the ConstFst type instead). This version should inline.
417 template <class Arc, class Unsigned>
418 class StateIterator<ConstFst<Arc, Unsigned>> {
419  public:
420  using StateId = typename Arc::StateId;
421 
423  : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
424 
425  bool Done() const { return s_ >= nstates_; }
426 
427  StateId Value() const { return s_; }
428 
429  void Next() { ++s_; }
430 
431  void Reset() { s_ = 0; }
432 
433  private:
434  const StateId nstates_;
435  StateId s_;
436 };
437 
438 // Specialization for ConstFst; see generic version in fst.h for sample usage
439 // (but use the ConstFst type instead). This version should inline.
440 template <class Arc, class Unsigned>
441 class ArcIterator<ConstFst<Arc, Unsigned>> {
442  public:
443  using StateId = typename Arc::StateId;
444 
446  : arcs_(fst.GetImpl()->Arcs(s)),
447  narcs_(fst.GetImpl()->NumArcs(s)),
448  i_(0) {}
449 
450  bool Done() const { return i_ >= narcs_; }
451 
452  const Arc &Value() const { return arcs_[i_]; }
453 
454  void Next() { ++i_; }
455 
456  size_t Position() const { return i_; }
457 
458  void Reset() { i_ = 0; }
459 
460  void Seek(size_t a) { i_ = a; }
461 
462  constexpr uint8_t Flags() const { return kArcValueFlags; }
463 
464  void SetFlags(uint8_t, uint8_t) {}
465 
466  private:
467  const Arc *arcs_;
468  size_t narcs_;
469  size_t i_;
470 };
471 
472 // A useful alias when using StdArc.
474 
475 } // namespace fst
476 
477 #endif // FST_CONST_FST_H_
static Impl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: expanded-fst.h:155
typename Arc::Weight Weight
Definition: const-fst.h:64
StateId Start() const
Definition: const-fst.h:83
void SetProperties(uint64_t props)
Definition: fst.h:709
bool AlignInput(std::istream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:85
constexpr uint64_t kMutable
Definition: properties.h:49
constexpr uint64_t kWeightedCycles
Definition: properties.h:142
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
void Cast(const F &, G *)
virtual uint64_t Properties(uint64_t mask, bool test) const =0
virtual size_t NumArcs(StateId) const =0
std::string source
Definition: fst.h:73
ConstFst * Copy(bool safe=false) const override
Definition: const-fst.h:267
static MappedFile * Map(std::istream &istrm, bool memorymap, const std::string &source, size_t size)
Definition: mapped-file.cc:77
#define LOG(type)
Definition: log.h:53
virtual Weight Final(StateId) const =0
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:771
void SetNumArcs(int64_t numarcs)
Definition: fst.h:166
constexpr uint64_t kUnweightedCycles
Definition: properties.h:145
bool AlignOutput(std::ostream &strm, size_t align=MappedFile::kArchAlignment)
Definition: util.cc:101
static ConstFst * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:272
bool stream_write
Definition: fst.h:107
static bool WriteFst(const FST &fst, std::ostream &strm, const FstWriteOptions &opts)
Definition: const-fst.h:328
constexpr int kNoStateId
Definition: fst.h:196
static ConstFst * Read(std::string_view source)
Definition: const-fst.h:279
std::string source
Definition: fst.h:102
ConstFst(const Fst< Arc > &fst)
Definition: const-fst.h:260
ArcIterator(const ConstFst< Arc, Unsigned > &fst, StateId s)
Definition: const-fst.h:445
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: const-fst.h:300
static void WriteFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr)
Definition: fst.h:811
const Arc * Arcs(StateId s) const
Definition: const-fst.h:97
constexpr uint64_t kCopyProperties
Definition: properties.h:163
typename Arc::StateId StateId
Definition: const-fst.h:63
typename Impl::ConstState ConstState
Definition: const-fst.h:250
bool Write(const std::string &source) const override
Definition: const-fst.h:288
uint64_t CheckProperties(const Fst< Arc > &fst, uint64_t check_mask, uint64_t test_mask)
static bool UpdateFstHeader(const Fst< Arc > &fst, std::ostream &strm, const FstWriteOptions &opts, int version, std::string_view type, uint64_t properties, FstHeader *hdr, size_t header_offset)
Definition: fst.h:844
size_t NumArcs(StateId s) const
Definition: const-fst.h:89
StateId nstates
Definition: fst.h:384
size_t NumInputEpsilons(StateId s) const
Definition: const-fst.h:91
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:532
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: const-fst.h:106
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
StateIterator(const ConstFst< Arc, Unsigned > &fst)
Definition: const-fst.h:422
constexpr uint64_t kNullProperties
Definition: properties.h:150
std::unique_ptr< ArcIteratorBase< Arc > > base
Definition: fst.h:494
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:767
bool WriteFile(const std::string &source) const
Definition: fst.h:332
int64_t NumArcs() const
Definition: fst.h:150
ConstFst(const ConstFst &fst, bool unused_safe=false)
Definition: const-fst.h:263
typename Arc::StateId StateId
Definition: const-fst.h:247
void SetNumStates(int64_t numstates)
Definition: fst.h:164
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override
Definition: const-fst.h:284
virtual const SymbolTable * InputSymbols() const =0
int64_t NumStates() const
Definition: fst.h:148
void SetType(std::string_view type)
Definition: fst.h:699
bool Done() const
Definition: fst.h:415
void SetStart(int64_t start)
Definition: fst.h:162
const Arc * arcs
Definition: fst.h:495
size_t NumOutputEpsilons(StateId s) const
Definition: const-fst.h:93
FileReadMode mode
Definition: fst.h:80
void InitStateIterator(StateIteratorData< Arc > *data) const
Definition: const-fst.h:100
StateId NumStates() const
Definition: const-fst.h:87
constexpr uint64_t kExpanded
Definition: properties.h:46
static ConstFstImpl * Read(std::istream &strm, const FstReadOptions &opts)
Definition: const-fst.h:195
Weight Final(StateId s) const
Definition: const-fst.h:85
int * ref_count
Definition: fst.h:497
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: const-fst.h:296
size_t narcs
Definition: fst.h:496
const Impl * GetImpl() const
Definition: impl-to-fst.h:123
virtual const SymbolTable * OutputSymbols() const =0