FST  openfst-1.8.3
OpenFst Library
state-table.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes for representing the mapping between state tuples and state IDs.
19 
20 #ifndef FST_STATE_TABLE_H_
21 #define FST_STATE_TABLE_H_
22 
23 #include <sys/types.h>
24 
25 #include <cstddef>
26 #include <deque>
27 #include <utility>
28 #include <vector>
29 
30 #include <fst/log.h>
31 #include <fst/bi-table.h>
32 #include <fst/expanded-fst.h>
33 #include <fst/filter-state.h>
34 #include <fst/fst.h>
35 #include <fst/properties.h>
36 #include <fst/util.h>
37 
38 namespace fst {
39 
40 // State tables determine the bijective mapping between state tuples (e.g., in
41 // composition, triples of two FST states and a composition filter state) and
42 // their corresponding state IDs. They are classes, templated on state tuples,
43 // with the following interface:
44 //
45 // template <class T>
46 // class StateTable {
47 // public:
48 // using StateTuple = T;
49 //
50 // // Required constructors.
51 // StateTable();
52 //
53 // StateTable(const StateTable &);
54 //
55 // // Looks up state ID by tuple. If it doesn't exist, then add it.
56 // StateId FindState(const StateTuple &tuple);
57 //
58 // // Looks up state tuple by state ID.
59 // const StateTuple<StateId> &Tuple(StateId s) const;
60 //
61 // // # of stored tuples.
62 // StateId Size() const;
63 // };
64 //
65 // A state tuple has the form:
66 //
67 // template <class S>
68 // struct StateTuple {
69 // using StateId = S;
70 //
71 // // Required constructors.
72 //
73 // StateTuple();
74 //
75 // StateTuple(const StateTuple &tuple);
76 // };
77 
78 // An implementation using a hash map for the tuple to state ID mapping. The
79 // state tuple T must support operator==.
80 template <class T, class H>
81 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
82  public:
83  using StateTuple = T;
84  using StateId = typename StateTuple::StateId;
85 
89 
91 
92  explicit HashStateTable(size_t table_size)
93  : HashBiTable<StateId, StateTuple, H>(table_size) {}
94 
95  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
96 
97  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
98 };
99 
100 // An implementation using a hash map for the tuple to state ID mapping. The
101 // state tuple T must support operator==.
102 template <class T, class H>
104  : public CompactHashBiTable<typename T::StateId, T, H> {
105  public:
106  using StateTuple = T;
107  using StateId = typename StateTuple::StateId;
108 
112 
114 
115  explicit CompactHashStateTable(size_t table_size)
116  : CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
117 
118  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
119 
120  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
121 };
122 
123 // An implementation using a vector for the tuple to state mapping. It is
124 // passed a fingerprint functor that should fingerprint tuples uniquely to an
125 // integer that can used as a vector index. Normally, VectorStateTable
126 // constructs the fingerprint functor. Alternately, the user can pass this
127 // object, in which case the table takes ownership.
128 template <class T, class FP>
129 class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
130  public:
131  using StateTuple = T;
132  using StateId = typename StateTuple::StateId;
133 
138 
139  explicit VectorStateTable(const FP &fingerprint = FP(), size_t table_size = 0)
140  : VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
141 
142  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
143 
144  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
145 };
146 
147 // An implementation using a vector and a compact hash table. The selection
148 // functor returns true for tuples to be hashed in the vector. The fingerprint
149 // functor should fingerprint tuples uniquely to an integer that can be used as
150 // a vector index. A hash functor is used when hashing tuples into the compact
151 // hash table.
152 template <class T, class Select, class FP, class H>
154  : public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
155  public:
156  using StateTuple = T;
157  using StateId = typename StateTuple::StateId;
158 
165 
166  VectorHashStateTable(const Select &select, const FP &fingerprint,
167  const H &hash, size_t vector_size = 0,
168  size_t tuple_size = 0)
169  : VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
170  select, fingerprint, hash, vector_size, tuple_size) {}
171 
172  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
173 
174  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
175 };
176 
177 // An implementation using a hash map to map from tuples to state IDs. This
178 // version permits erasing of states. The state tuple's default constructor
179 // must produce a tuple that will never be seen and the table must suppor
180 // operator==.
181 template <class T, class H>
182 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
183  public:
184  using StateTuple = T;
185  using StateId = typename StateTuple::StateId;
186 
191 
193 
194  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
195 
196  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
197 };
198 
199 // The composition state table has the form:
200 //
201 // template <class Arc, class FilterState>
202 // class ComposeStateTable {
203 // public:
204 // using StateId = typename Arc::StateId;
205 //
206 // // Required constructors.
207 //
208 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
209 // ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
210 //
211 // // Looks up a state ID by tuple, adding it if doesn't exist.
212 // StateId FindState(const StateTuple &tuple);
213 //
214 // // Looks up a tuple by state ID.
215 // const ComposeStateTuple<StateId> &Tuple(StateId s) const;
216 //
217 // // The number of stored tuples.
218 // StateId Size() const;
219 //
220 // // Return true if error was encountered.
221 // bool Error() const;
222 // };
223 //
224 // The following interface is used to represent the composition state.
225 //
226 // template <class S, class FS>
227 // class CompositionStateTuple {
228 // public:
229 // using StateId = typename StateId;
230 // using FS = FilterState;
231 //
232 // // Required constructors.
233 // StateTuple();
234 // StateTuple(StateId s1, StateId s2, const FilterState &fs);
235 //
236 // StateId StateId1() const;
237 // StateId StateId2() const;
238 //
239 // FilterState GetFilterState() const;
240 //
241 // std::pair<StateId, StateId> StatePair() const;
242 //
243 // size_t Hash() const;
244 //
245 // friend bool operator==(const StateTuple& x, const StateTuple &y);
246 // }
247 //
248 template <typename S, typename FS>
250  public:
251  using StateId = S;
252  using FilterState = FS;
253 
255  : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
256 
258  : state_pair_(s1, s2), fs_(fs) {}
259 
260  StateId StateId1() const { return state_pair_.first; }
261 
262  StateId StateId2() const { return state_pair_.second; }
263 
264  FilterState GetFilterState() const { return fs_; }
265 
266  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
267 
268  friend bool operator==(const DefaultComposeStateTuple &x,
269  const DefaultComposeStateTuple &y) {
270  return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
271  }
272 
273  size_t Hash() const {
274  return static_cast<size_t>(StateId1()) +
275  static_cast<size_t>(StateId2()) * 7853u +
276  GetFilterState().Hash() * 7867u;
277  }
278 
279  private:
280  std::pair<StateId, StateId> state_pair_;
281  FilterState fs_; // State of composition filter.
282 };
283 
284 // Specialization for TrivialFilterState that does not explicitly store the
285 // filter state since it is always the unique non-blocking state.
286 template <typename S>
288  public:
289  using StateId = S;
291 
293 
295  : state_pair_(s1, s2) {}
296 
297  StateId StateId1() const { return state_pair_.first; }
298 
299  StateId StateId2() const { return state_pair_.second; }
300 
301  FilterState GetFilterState() const { return FilterState(true); }
302 
303  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
304 
305  friend bool operator==(const DefaultComposeStateTuple &x,
306  const DefaultComposeStateTuple &y) {
307  return (&x == &y) || (x.state_pair_ == y.state_pair_);
308  }
309 
310  size_t Hash() const { return StateId1() + StateId2() * size_t{7853}; }
311 
312  private:
313  std::pair<StateId, StateId> state_pair_;
314 };
315 
316 // Hashing of composition state tuples.
317 template <typename T>
318 class ComposeHash {
319  public:
320  size_t operator()(const T &t) const { return t.Hash(); }
321 };
322 
323 // A HashStateTable over composition tuples.
324 template <typename Arc, typename FilterState,
325  typename StateTuple =
327  typename StateTable =
329 class GenericComposeStateTable : public StateTable {
330  public:
331  using StateId = typename Arc::StateId;
332 
333  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
334 
335  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
336  size_t table_size)
337  : StateTable(table_size) {}
338 
339  constexpr bool Error() const { return false; }
340 
341  private:
342  GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
343  delete;
344 };
345 
346 // Fingerprint for general composition tuples.
347 template <typename StateTuple>
349  public:
350  using StateId = typename StateTuple::StateId;
351 
352  // Required but suboptimal constructor.
353  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
354  LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
355  }
356 
357  // Constructor is provided the sizes of the input FSTs.
358  ComposeFingerprint(StateId nstates1, StateId nstates2)
359  : mult1_(nstates1), mult2_(nstates1 * nstates2) {}
360 
361  size_t operator()(const StateTuple &tuple) const {
362  return tuple.StateId1() + tuple.StateId2() * mult1_ +
363  tuple.GetFilterState().Hash() * mult2_;
364  }
365 
366  private:
367  const ssize_t mult1_;
368  const ssize_t mult2_;
369 };
370 
371 // Useful when the first composition state determines the tuple.
372 template <typename StateTuple>
374  public:
375  size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
376 };
377 
378 // Useful when the second composition state determines the tuple.
379 template <typename StateTuple>
381  public:
382  size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
383 };
384 
385 // A VectorStateTable over composition tuples. This can be used when the
386 // product of number of states in FST1 and FST2 (and the composition filter
387 // state hash) is manageable. If the FSTs are not expanded FSTs, they will
388 // first have their states counted.
389 template <typename Arc, typename StateTuple>
391  : public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
392  public:
393  using StateId = typename Arc::StateId;
394  using StateTable =
396 
397  ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
398  size_t table_size = 0)
399  : StateTable(ComposeFingerprint<StateTuple>(CountStates(fst1),
400  CountStates(fst2)),
401  table_size) {}
402 
405  : StateTable(ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
406 
407  constexpr bool Error() const { return false; }
408 
409  private:
410  ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
411  delete;
412 };
413 
414 // A vector-backed table over composition tuples which can be used when the
415 // first FST is a string (i.e., satisfies kString property) and the second is
416 // deterministic and epsilon-free. It should be used with a composition filter
417 // that creates at most one filter state per tuple under these conditions (e.g.,
418 // SequenceComposeFilter or MatchComposeFilter).
419 template <typename Arc, typename StateTuple>
421  : public VectorStateTable<StateTuple,
422  ComposeState1Fingerprint<StateTuple>> {
423  public:
424  using StateId = typename Arc::StateId;
425  using StateTable =
427 
428  StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
429  : error_(false) {
430  static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
431  if (fst1.Properties(kString, true) != kString) {
432  FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
433  error_ = true;
434  } else if (fst2.Properties(props2, true) != props2) {
435  FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
436  "and epsilon-free";
437  error_ = true;
438  }
439  }
440 
443  : StateTable(table), error_(table.error_) {}
444 
445  bool Error() const { return error_; }
446 
447  private:
448  bool error_;
449 
451  delete;
452 };
453 
454 // A vector-backed table over composition tuples which can be used when the
455 // first FST is deterministic and epsilon-free and the second is a string (i.e.,
456 // satisfies kString). It should be used with a composition filter that creates
457 // at most one filter state per tuple under these conditions (e.g.,
458 // SequenceComposeFilter or MatchComposeFilter).
459 template <typename Arc, typename StateTuple>
461  : public VectorStateTable<StateTuple,
462  ComposeState2Fingerprint<StateTuple>> {
463  public:
464  using StateId = typename Arc::StateId;
465  using StateTable =
467 
468  DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
469  : error_(false) {
470  static constexpr auto props = kODeterministic | kNoOEpsilons;
471  if (fst1.Properties(props, true) != props) {
472  FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
473  << "input-deterministic and epsilon-free";
474  error_ = true;
475  } else if (fst2.Properties(kString, true) != kString) {
476  FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
477  error_ = true;
478  }
479  }
480 
483  : StateTable(table), error_(table.error_) {}
484 
485  bool Error() const { return error_; }
486 
487  private:
488  bool error_;
489 
491  delete;
492 };
493 
494 // An erasable table over composition tuples. The Erase(StateId) method can be
495 // called if the user either is sure that composition will never return to that
496 // tuple or doesn't care that if it does, it is assigned a new state ID.
497 template <typename Arc, typename StateTuple>
499  : public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
500  public:
501  ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
502 
503  constexpr bool Error() const { return false; }
504 
505  private:
506  ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
507  delete;
508 };
509 
510 } // namespace fst
511 
512 #endif // FST_STATE_TABLE_H_
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:196
ProductComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size=0)
Definition: state-table.h:397
virtual uint64_t Properties(uint64_t mask, bool test) const =0
FilterState GetFilterState() const
Definition: state-table.h:264
VectorStateTable(const FP &fingerprint=FP(), size_t table_size=0)
Definition: state-table.h:139
constexpr bool Error() const
Definition: state-table.h:407
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:194
typename Arc::StateId StateId
Definition: state-table.h:424
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:97
typename StateTuple::StateId StateId
Definition: state-table.h:84
DetStringComposeStateTable(const DetStringComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:481
StringDetComposeStateTable(const StringDetComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:441
size_t operator()(const StateTuple &tuple) const
Definition: state-table.h:361
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:382
typename StateTuple::StateId StateId
Definition: state-table.h:350
const T & FindEntry(T::StateIds) const
Definition: bi-table.h:100
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:375
#define LOG(type)
Definition: log.h:53
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:142
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:144
HashStateTable(size_t table_size)
Definition: state-table.h:92
ComposeFingerprint(StateId nstates1, StateId nstates2)
Definition: state-table.h:358
constexpr uint64_t kODeterministic
Definition: properties.h:74
constexpr int kNoStateId
Definition: fst.h:196
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size)
Definition: state-table.h:335
DetStringComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:468
#define FSTERROR()
Definition: util.h:56
StringDetComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:428
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:174
constexpr uint64_t kNoOEpsilons
Definition: properties.h:91
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:118
typename StateTuple::StateId StateId
Definition: state-table.h:157
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:120
T::StateId FindId(const T &entry, bool insert=true)
Definition: bi-table.h:87
constexpr uint64_t kIDeterministic
Definition: properties.h:69
size_t operator()(const T &t) const
Definition: state-table.h:320
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:268
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:333
constexpr bool Error() const
Definition: state-table.h:339
Arc::StateId CountStates(const Fst< Arc > &fst)
Definition: expanded-fst.h:179
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:305
CompactHashStateTable(size_t table_size)
Definition: state-table.h:115
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
Definition: state-table.h:294
constexpr uint64_t kString
Definition: properties.h:136
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
Definition: state-table.h:257
typename Arc::StateId StateId
Definition: state-table.h:464
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:172
ErasableComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:501
constexpr uint64_t kNoIEpsilons
Definition: properties.h:86
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:303
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:266
typename Arc::StateId StateId
Definition: state-table.h:393
ProductComposeStateTable(const ProductComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:403
constexpr bool Error() const
Definition: state-table.h:503
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:95
VectorHashStateTable(const Select &select, const FP &fingerprint, const H &hash, size_t vector_size=0, size_t tuple_size=0)
Definition: state-table.h:166