FST  openfst-1.7.2
OpenFst Library
state-table.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Classes for representing the mapping between state tuples and state IDs.
5 
6 #ifndef FST_STATE_TABLE_H_
7 #define FST_STATE_TABLE_H_
8 
9 #include <deque>
10 #include <utility>
11 #include <vector>
12 
13 #include <fst/log.h>
14 
15 #include <fst/bi-table.h>
16 #include <fst/expanded-fst.h>
17 #include <fst/filter-state.h>
18 
19 
20 namespace fst {
21 
22 // State tables determine the bijective mapping between state tuples (e.g., in
23 // composition, triples of two FST states and a composition filter state) and
24 // their corresponding state IDs. They are classes, templated on state tuples,
25 // with the following interface:
26 //
27 // template <class T>
28 // class StateTable {
29 // public:
30 // using StateTuple = T;
31 //
32 // // Required constructors.
33 // StateTable();
34 //
35 // StateTable(const StateTable &);
36 //
37 // // Looks up state ID by tuple. If it doesn't exist, then add it.
38 // StateId FindState(const StateTuple &tuple);
39 //
40 // // Looks up state tuple by state ID.
41 // const StateTuple<StateId> &Tuple(StateId s) const;
42 //
43 // // # of stored tuples.
44 // StateId Size() const;
45 // };
46 //
47 // A state tuple has the form:
48 //
49 // template <class S>
50 // struct StateTuple {
51 // using StateId = S;
52 //
53 // // Required constructors.
54 //
55 // StateTuple();
56 //
57 // StateTuple(const StateTuple &tuple);
58 // };
59 
60 // An implementation using a hash map for the tuple to state ID mapping. The
61 // state tuple T must support operator==.
62 template <class T, class H>
63 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
64  public:
65  using StateTuple = T;
66  using StateId = typename StateTuple::StateId;
67 
71 
73 
74  explicit HashStateTable(size_t table_size)
75  : HashBiTable<StateId, StateTuple, H>(table_size) {}
76 
77  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
78 
79  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
80 };
81 
82 // An implementation using a hash map for the tuple to state ID mapping. The
83 // state tuple T must support operator==.
84 template <class T, class H>
86  : public CompactHashBiTable<typename T::StateId, T, H> {
87  public:
88  using StateTuple = T;
89  using StateId = typename StateTuple::StateId;
90 
94 
96 
97  explicit CompactHashStateTable(size_t table_size)
98  : CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
99 
100  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
101 
102  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
103 };
104 
105 // An implementation using a vector for the tuple to state mapping. It is
106 // passed a fingerprint functor that should fingerprint tuples uniquely to an
107 // integer that can used as a vector index. Normally, VectorStateTable
108 // constructs the fingerprint functor. Alternately, the user can pass this
109 // object, in which case the table takes ownership.
110 template <class T, class FP>
111 class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
112  public:
113  using StateTuple = T;
114  using StateId = typename StateTuple::StateId;
115 
120 
121  explicit VectorStateTable(FP *fingerprint = nullptr, size_t table_size = 0)
122  : VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
123 
124  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
125 
126  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
127 };
128 
129 // An implementation using a vector and a compact hash table. The selection
130 // functor returns true for tuples to be hashed in the vector. The fingerprint
131 // functor should fingerprint tuples uniquely to an integer that can be used as
132 // a vector index. A hash functor is used when hashing tuples into the compact
133 // hash table.
134 template <class T, class Select, class FP, class H>
136  : public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
137  public:
138  using StateTuple = T;
139  using StateId = typename StateTuple::StateId;
140 
147 
148  VectorHashStateTable(Select *select, FP *fingerprint, H *hash,
149  size_t vector_size = 0, size_t tuple_size = 0)
150  : VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
151  select, fingerprint, hash, vector_size, tuple_size) {}
152 
153  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
154 
155  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
156 };
157 
158 // An implementation using a hash map to map from tuples to state IDs. This
159 // version permits erasing of states. The state tuple's default constructor
160 // must produce a tuple that will never be seen and the table must suppor
161 // operator==.
162 template <class T, class H>
163 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
164  public:
165  using StateTuple = T;
166  using StateId = typename StateTuple::StateId;
167 
172 
174 
175  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
176 
177  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
178 };
179 
180 // The composition state table has the form:
181 //
182 // template <class Arc, class FilterState>
183 // class ComposeStateTable {
184 // public:
185 // using StateId = typename Arc::StateId;
186 //
187 // // Required constructors.
188 //
189 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
190 // ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
191 //
192 // // Looks up a state ID by tuple, adding it if doesn't exist.
193 // StateId FindState(const StateTuple &tuple);
194 //
195 // // Looks up a tuple by state ID.
196 // const ComposeStateTuple<StateId> &Tuple(StateId s) const;
197 //
198 // // The number of of stored tuples.
199 // StateId Size() const;
200 //
201 // // Return true if error was encountered.
202 // bool Error() const;
203 // };
204 //
205 // The following interface is used to represent the composition state.
206 //
207 // template <class S, class FS>
208 // class CompositionStateTuple {
209 // public:
210 // using StateId = typename StateId;
211 // using FS = FilterState;
212 //
213 // // Required constructors.
214 // StateTuple();
215 // StateTuple(StateId s1, StateId s2, const FilterState &fs);
216 //
217 // StateId StateId1() const;
218 // StateId StateId2() const;
219 //
220 // FilterState GetFilterState() const;
221 //
222 // std::pair<StateId, StateId> StatePair() const;
223 //
224 // size_t Hash() const;
225 //
226 // friend bool operator==(const StateTuple& x, const StateTuple &y);
227 // }
228 //
229 template <typename S, typename FS>
231  public:
232  using StateId = S;
233  using FilterState = FS;
234 
236  : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
237 
239  : state_pair_(s1, s2), fs_(fs) {}
240 
241  StateId StateId1() const { return state_pair_.first; }
242 
243  StateId StateId2() const { return state_pair_.second; }
244 
245  FilterState GetFilterState() const { return fs_; }
246 
247  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
248 
249  friend bool operator==(const DefaultComposeStateTuple &x,
250  const DefaultComposeStateTuple &y) {
251  return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
252  }
253 
254  size_t Hash() const {
255  return static_cast<size_t>(StateId1()) +
256  static_cast<size_t>(StateId2()) * 7853u +
257  GetFilterState().Hash() * 7867u;
258  }
259 
260  private:
261  std::pair<StateId, StateId> state_pair_;
262  FilterState fs_; // State of composition filter.
263 };
264 
265 // Specialization for TrivialFilterState that does not explicitely store the
266 // filter state since it is always the unique non-blocking state.
267 template <typename S>
269  public:
270  using StateId = S;
272 
274  : state_pair_(kNoStateId, kNoStateId) {}
275 
277  : state_pair_(s1, s2) {}
278 
279  StateId StateId1() const { return state_pair_.first; }
280 
281  StateId StateId2() const { return state_pair_.second; }
282 
283  FilterState GetFilterState() const { return FilterState(true); }
284 
285  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
286 
287  friend bool operator==(const DefaultComposeStateTuple &x,
288  const DefaultComposeStateTuple &y) {
289  return (&x == &y) || (x.state_pair_ == y.state_pair_);
290  }
291 
292  size_t Hash() const { return StateId1() + StateId2() * 7853; }
293 
294  private:
295  std::pair<StateId, StateId> state_pair_;
296 };
297 
298 // Hashing of composition state tuples.
299 template <typename T>
300 class ComposeHash {
301  public:
302  size_t operator()(const T &t) const { return t.Hash(); }
303 };
304 
305 // A HashStateTable over composition tuples.
306 template <typename Arc, typename FilterState,
307  typename StateTuple =
309  typename StateTable =
311 class GenericComposeStateTable : public StateTable {
312  public:
313  using StateId = typename Arc::StateId;
314 
315  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
316 
317  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
318  size_t table_size)
319  : StateTable(table_size) {}
320 
321  constexpr bool Error() const { return false; }
322 
323  private:
324  GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
325  delete;
326 };
327 
328 // Fingerprint for general composition tuples.
329 template <typename StateTuple>
331  public:
332  using StateId = typename StateTuple::StateId;
333 
334  // Required but suboptimal constructor.
335  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
336  LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
337  }
338 
339  // Constructor is provided the sizes of the input FSTs.
340  ComposeFingerprint(StateId nstates1, StateId nstates2)
341  : mult1_(nstates1), mult2_(nstates1 * nstates2) {}
342 
343  size_t operator()(const StateTuple &tuple) {
344  return tuple.StateId1() + tuple.StateId2() * mult1_ +
345  tuple.GetFilterState().Hash() * mult2_;
346  }
347 
348  private:
349  const ssize_t mult1_;
350  const ssize_t mult2_;
351 };
352 
353 // Useful when the first composition state determines the tuple.
354 template <typename StateTuple>
356  public:
357  size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
358 };
359 
360 // Useful when the second composition state determines the tuple.
361 template <typename StateTuple>
363  public:
364  size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
365 };
366 
367 // A VectorStateTable over composition tuples. This can be used when the
368 // product of number of states in FST1 and FST2 (and the composition filter
369 // state hash) is manageable. If the FSTs are not expanded FSTs, they will
370 // first have their states counted.
371 template <typename Arc, typename StateTuple>
373  : public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
374  public:
375  using StateId = typename Arc::StateId;
376  using StateTable =
378 
379  ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
380  size_t table_size = 0)
381  : StateTable(new ComposeFingerprint<StateTuple>(CountStates(fst1),
382  CountStates(fst2)),
383  table_size) {}
384 
387  : StateTable(new ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
388 
389  constexpr bool Error() const { return false; }
390 
391  private:
392  ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
393  delete;
394 };
395 
396 // A vector-backed table over composition tuples which can be used when the
397 // first FST is a string (i.e., satisfies kString property) and the second is
398 // deterministic and epsilon-free. It should be used with a composition filter
399 // that creates at most one filter state per tuple under these conditions (e.g.,
400 // SequenceComposeFilter or MatchComposeFilter).
401 template <typename Arc, typename StateTuple>
403  : public VectorStateTable<StateTuple,
404  ComposeState1Fingerprint<StateTuple>> {
405  public:
406  using StateId = typename Arc::StateId;
407  using StateTable =
409 
410  StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
411  : error_(false) {
412  static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
413  if (fst1.Properties(kString, true) != kString) {
414  FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
415  error_ = true;
416  } else if (fst2.Properties(props2, true) != props2) {
417  FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
418  "and epsilon-free";
419  error_ = true;
420  }
421  }
422 
425  : StateTable(table), error_(table.error_) {}
426 
427  bool Error() const { return error_; }
428 
429  private:
430  bool error_;
431 
433  delete;
434 };
435 
436 // A vector-backed table over composition tuples which can be used when the
437 // first FST is deterministic and epsilon-free and the second is a string (i.e.,
438 // satisfies kString). It should be used with a composition filter that creates
439 // at most one filter state per tuple under these conditions (e.g.,
440 // SequenceComposeFilter or MatchComposeFilter).
441 template <typename Arc, typename StateTuple>
443  : public VectorStateTable<StateTuple,
444  ComposeState2Fingerprint<StateTuple>> {
445  public:
446  using StateId = typename Arc::StateId;
447  using StateTable =
449 
450  DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
451  : error_(false) {
452  static constexpr auto props = kODeterministic | kNoOEpsilons;
453  if (fst1.Properties(props, true) != props) {
454  FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
455  << "input-deterministic and epsilon-free";
456  error_ = true;
457  } else if (fst2.Properties(kString, true) != kString) {
458  FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
459  error_ = true;
460  }
461  }
462 
465  : StateTable(table), error_(table.error_) {}
466 
467  bool Error() const { return error_; }
468 
469  private:
470  bool error_;
471 
473  delete;
474 };
475 
476 // An erasable table over composition tuples. The Erase(StateId) method can be
477 // called if the user either is sure that composition will never return to that
478 // tuple or doesn't care that if it does, it is assigned a new state ID.
479 template <typename Arc, typename StateTuple>
481  : public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
482  public:
483  ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
484 
485  constexpr bool Error() const { return false; }
486 
487  private:
488  ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
489  delete;
490 };
491 
492 } // namespace fst
493 
494 #endif // FST_STATE_TABLE_H_
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:177
ProductComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size=0)
Definition: state-table.h:379
FilterState GetFilterState() const
Definition: state-table.h:245
constexpr bool Error() const
Definition: state-table.h:389
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:175
typename Arc::StateId StateId
Definition: state-table.h:406
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:79
typename StateTuple::StateId StateId
Definition: state-table.h:66
DetStringComposeStateTable(const DetStringComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:463
StringDetComposeStateTable(const StringDetComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:423
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:364
typename StateTuple::StateId StateId
Definition: state-table.h:332
const T & FindEntry(T::StateIds) const
Definition: bi-table.h:80
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:357
#define LOG(type)
Definition: log.h:48
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:124
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:126
HashStateTable(size_t table_size)
Definition: state-table.h:74
VectorHashStateTable(Select *select, FP *fingerprint, H *hash, size_t vector_size=0, size_t tuple_size=0)
Definition: state-table.h:148
ComposeFingerprint(StateId nstates1, StateId nstates2)
Definition: state-table.h:340
constexpr int kNoStateId
Definition: fst.h:180
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size)
Definition: state-table.h:317
DetStringComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:450
virtual uint64 Properties(uint64 mask, bool test) const =0
#define FSTERROR()
Definition: util.h:35
StringDetComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:410
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:155
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:100
constexpr uint64 kIDeterministic
Definition: properties.h:50
typename StateTuple::StateId StateId
Definition: state-table.h:139
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:102
T::StateId FindId(const T &entry, bool insert=true)
Definition: bi-table.h:67
constexpr uint64 kNoOEpsilons
Definition: properties.h:72
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:343
size_t operator()(const T &t) const
Definition: state-table.h:302
constexpr uint64 kString
Definition: properties.h:117
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:249
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:315
VectorStateTable(FP *fingerprint=nullptr, size_t table_size=0)
Definition: state-table.h:121
constexpr bool Error() const
Definition: state-table.h:321
Arc::StateId CountStates(const Fst< Arc > &fst)
Definition: expanded-fst.h:154
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:287
CompactHashStateTable(size_t table_size)
Definition: state-table.h:97
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
Definition: state-table.h:276
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
Definition: state-table.h:238
typename Arc::StateId StateId
Definition: state-table.h:446
constexpr uint64 kODeterministic
Definition: properties.h:55
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:153
ErasableComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:483
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:285
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:247
typename Arc::StateId StateId
Definition: state-table.h:375
constexpr uint64 kNoIEpsilons
Definition: properties.h:67
ProductComposeStateTable(const ProductComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:385
constexpr bool Error() const
Definition: state-table.h:485
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:77