FST  openfst-1.8.2
OpenFst Library
state-table.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes for representing the mapping between state tuples and state IDs.
19 
20 #ifndef FST_STATE_TABLE_H_
21 #define FST_STATE_TABLE_H_
22 
23 #include <deque>
24 #include <utility>
25 #include <vector>
26 
27 #include <fst/log.h>
28 
29 #include <fst/bi-table.h>
30 #include <fst/expanded-fst.h>
31 #include <fst/filter-state.h>
32 
33 
34 namespace fst {
35 
36 // State tables determine the bijective mapping between state tuples (e.g., in
37 // composition, triples of two FST states and a composition filter state) and
38 // their corresponding state IDs. They are classes, templated on state tuples,
39 // with the following interface:
40 //
41 // template <class T>
42 // class StateTable {
43 // public:
44 // using StateTuple = T;
45 //
46 // // Required constructors.
47 // StateTable();
48 //
49 // StateTable(const StateTable &);
50 //
51 // // Looks up state ID by tuple. If it doesn't exist, then add it.
52 // StateId FindState(const StateTuple &tuple);
53 //
54 // // Looks up state tuple by state ID.
55 // const StateTuple<StateId> &Tuple(StateId s) const;
56 //
57 // // # of stored tuples.
58 // StateId Size() const;
59 // };
60 //
61 // A state tuple has the form:
62 //
63 // template <class S>
64 // struct StateTuple {
65 // using StateId = S;
66 //
67 // // Required constructors.
68 //
69 // StateTuple();
70 //
71 // StateTuple(const StateTuple &tuple);
72 // };
73 
74 // An implementation using a hash map for the tuple to state ID mapping. The
75 // state tuple T must support operator==.
76 template <class T, class H>
77 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
78  public:
79  using StateTuple = T;
80  using StateId = typename StateTuple::StateId;
81 
85 
87 
88  explicit HashStateTable(size_t table_size)
89  : HashBiTable<StateId, StateTuple, H>(table_size) {}
90 
91  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
92 
93  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
94 };
95 
96 // An implementation using a hash map for the tuple to state ID mapping. The
97 // state tuple T must support operator==.
98 template <class T, class H>
100  : public CompactHashBiTable<typename T::StateId, T, H> {
101  public:
102  using StateTuple = T;
103  using StateId = typename StateTuple::StateId;
104 
108 
110 
111  explicit CompactHashStateTable(size_t table_size)
112  : CompactHashBiTable<StateId, StateTuple, H>(table_size) {}
113 
114  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
115 
116  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
117 };
118 
119 // An implementation using a vector for the tuple to state mapping. It is
120 // passed a fingerprint functor that should fingerprint tuples uniquely to an
121 // integer that can used as a vector index. Normally, VectorStateTable
122 // constructs the fingerprint functor. Alternately, the user can pass this
123 // object, in which case the table takes ownership.
124 template <class T, class FP>
125 class VectorStateTable : public VectorBiTable<typename T::StateId, T, FP> {
126  public:
127  using StateTuple = T;
128  using StateId = typename StateTuple::StateId;
129 
134 
135  explicit VectorStateTable(const FP &fingerprint = FP(), size_t table_size = 0)
136  : VectorBiTable<StateId, StateTuple, FP>(fingerprint, table_size) {}
137 
138  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
139 
140  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
141 };
142 
143 // An implementation using a vector and a compact hash table. The selection
144 // functor returns true for tuples to be hashed in the vector. The fingerprint
145 // functor should fingerprint tuples uniquely to an integer that can be used as
146 // a vector index. A hash functor is used when hashing tuples into the compact
147 // hash table.
148 template <class T, class Select, class FP, class H>
150  : public VectorHashBiTable<typename T::StateId, T, Select, FP, H> {
151  public:
152  using StateTuple = T;
153  using StateId = typename StateTuple::StateId;
154 
161 
162  VectorHashStateTable(const Select &select, const FP &fingerprint,
163  const H &hash, size_t vector_size = 0,
164  size_t tuple_size = 0)
165  : VectorHashBiTable<StateId, StateTuple, Select, FP, H>(
166  select, fingerprint, hash, vector_size, tuple_size) {}
167 
168  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
169 
170  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
171 };
172 
173 // An implementation using a hash map to map from tuples to state IDs. This
174 // version permits erasing of states. The state tuple's default constructor
175 // must produce a tuple that will never be seen and the table must suppor
176 // operator==.
177 template <class T, class H>
178 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, H> {
179  public:
180  using StateTuple = T;
181  using StateId = typename StateTuple::StateId;
182 
187 
189 
190  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
191 
192  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
193 };
194 
195 // The composition state table has the form:
196 //
197 // template <class Arc, class FilterState>
198 // class ComposeStateTable {
199 // public:
200 // using StateId = typename Arc::StateId;
201 //
202 // // Required constructors.
203 //
204 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
205 // ComposeStateTable(const ComposeStateTable<Arc, FilterState> &table);
206 //
207 // // Looks up a state ID by tuple, adding it if doesn't exist.
208 // StateId FindState(const StateTuple &tuple);
209 //
210 // // Looks up a tuple by state ID.
211 // const ComposeStateTuple<StateId> &Tuple(StateId s) const;
212 //
213 // // The number of stored tuples.
214 // StateId Size() const;
215 //
216 // // Return true if error was encountered.
217 // bool Error() const;
218 // };
219 //
220 // The following interface is used to represent the composition state.
221 //
222 // template <class S, class FS>
223 // class CompositionStateTuple {
224 // public:
225 // using StateId = typename StateId;
226 // using FS = FilterState;
227 //
228 // // Required constructors.
229 // StateTuple();
230 // StateTuple(StateId s1, StateId s2, const FilterState &fs);
231 //
232 // StateId StateId1() const;
233 // StateId StateId2() const;
234 //
235 // FilterState GetFilterState() const;
236 //
237 // std::pair<StateId, StateId> StatePair() const;
238 //
239 // size_t Hash() const;
240 //
241 // friend bool operator==(const StateTuple& x, const StateTuple &y);
242 // }
243 //
244 template <typename S, typename FS>
246  public:
247  using StateId = S;
248  using FilterState = FS;
249 
251  : state_pair_(kNoStateId, kNoStateId), fs_(FilterState::NoState()) {}
252 
254  : state_pair_(s1, s2), fs_(fs) {}
255 
256  StateId StateId1() const { return state_pair_.first; }
257 
258  StateId StateId2() const { return state_pair_.second; }
259 
260  FilterState GetFilterState() const { return fs_; }
261 
262  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
263 
264  friend bool operator==(const DefaultComposeStateTuple &x,
265  const DefaultComposeStateTuple &y) {
266  return (&x == &y) || (x.state_pair_ == y.state_pair_ && x.fs_ == y.fs_);
267  }
268 
269  size_t Hash() const {
270  return static_cast<size_t>(StateId1()) +
271  static_cast<size_t>(StateId2()) * 7853u +
272  GetFilterState().Hash() * 7867u;
273  }
274 
275  private:
276  std::pair<StateId, StateId> state_pair_;
277  FilterState fs_; // State of composition filter.
278 };
279 
280 // Specialization for TrivialFilterState that does not explicitly store the
281 // filter state since it is always the unique non-blocking state.
282 template <typename S>
284  public:
285  using StateId = S;
287 
289 
291  : state_pair_(s1, s2) {}
292 
293  StateId StateId1() const { return state_pair_.first; }
294 
295  StateId StateId2() const { return state_pair_.second; }
296 
297  FilterState GetFilterState() const { return FilterState(true); }
298 
299  const std::pair<StateId, StateId> &StatePair() const { return state_pair_; }
300 
301  friend bool operator==(const DefaultComposeStateTuple &x,
302  const DefaultComposeStateTuple &y) {
303  return (&x == &y) || (x.state_pair_ == y.state_pair_);
304  }
305 
306  size_t Hash() const { return StateId1() + StateId2() * size_t{7853}; }
307 
308  private:
309  std::pair<StateId, StateId> state_pair_;
310 };
311 
312 // Hashing of composition state tuples.
313 template <typename T>
314 class ComposeHash {
315  public:
316  size_t operator()(const T &t) const { return t.Hash(); }
317 };
318 
319 // A HashStateTable over composition tuples.
320 template <typename Arc, typename FilterState,
321  typename StateTuple =
323  typename StateTable =
325 class GenericComposeStateTable : public StateTable {
326  public:
327  using StateId = typename Arc::StateId;
328 
329  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
330 
331  GenericComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
332  size_t table_size)
333  : StateTable(table_size) {}
334 
335  constexpr bool Error() const { return false; }
336 
337  private:
338  GenericComposeStateTable &operator=(const GenericComposeStateTable &table) =
339  delete;
340 };
341 
342 // Fingerprint for general composition tuples.
343 template <typename StateTuple>
345  public:
346  using StateId = typename StateTuple::StateId;
347 
348  // Required but suboptimal constructor.
349  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
350  LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
351  }
352 
353  // Constructor is provided the sizes of the input FSTs.
354  ComposeFingerprint(StateId nstates1, StateId nstates2)
355  : mult1_(nstates1), mult2_(nstates1 * nstates2) {}
356 
357  size_t operator()(const StateTuple &tuple) const {
358  return tuple.StateId1() + tuple.StateId2() * mult1_ +
359  tuple.GetFilterState().Hash() * mult2_;
360  }
361 
362  private:
363  const ssize_t mult1_;
364  const ssize_t mult2_;
365 };
366 
367 // Useful when the first composition state determines the tuple.
368 template <typename StateTuple>
370  public:
371  size_t operator()(const StateTuple &tuple) { return tuple.StateId1(); }
372 };
373 
374 // Useful when the second composition state determines the tuple.
375 template <typename StateTuple>
377  public:
378  size_t operator()(const StateTuple &tuple) { return tuple.StateId2(); }
379 };
380 
381 // A VectorStateTable over composition tuples. This can be used when the
382 // product of number of states in FST1 and FST2 (and the composition filter
383 // state hash) is manageable. If the FSTs are not expanded FSTs, they will
384 // first have their states counted.
385 template <typename Arc, typename StateTuple>
387  : public VectorStateTable<StateTuple, ComposeFingerprint<StateTuple>> {
388  public:
389  using StateId = typename Arc::StateId;
390  using StateTable =
392 
393  ProductComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
394  size_t table_size = 0)
395  : StateTable(ComposeFingerprint<StateTuple>(CountStates(fst1),
396  CountStates(fst2)),
397  table_size) {}
398 
401  : StateTable(ComposeFingerprint<StateTuple>(table.Fingerprint())) {}
402 
403  constexpr bool Error() const { return false; }
404 
405  private:
406  ProductComposeStateTable &operator=(const ProductComposeStateTable &table) =
407  delete;
408 };
409 
410 // A vector-backed table over composition tuples which can be used when the
411 // first FST is a string (i.e., satisfies kString property) and the second is
412 // deterministic and epsilon-free. It should be used with a composition filter
413 // that creates at most one filter state per tuple under these conditions (e.g.,
414 // SequenceComposeFilter or MatchComposeFilter).
415 template <typename Arc, typename StateTuple>
417  : public VectorStateTable<StateTuple,
418  ComposeState1Fingerprint<StateTuple>> {
419  public:
420  using StateId = typename Arc::StateId;
421  using StateTable =
423 
424  StringDetComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
425  : error_(false) {
426  static constexpr auto props2 = kIDeterministic | kNoIEpsilons;
427  if (fst1.Properties(kString, true) != kString) {
428  FSTERROR() << "StringDetComposeStateTable: 1st FST is not a string";
429  error_ = true;
430  } else if (fst2.Properties(props2, true) != props2) {
431  FSTERROR() << "StringDetComposeStateTable: 2nd FST is not deterministic "
432  "and epsilon-free";
433  error_ = true;
434  }
435  }
436 
439  : StateTable(table), error_(table.error_) {}
440 
441  bool Error() const { return error_; }
442 
443  private:
444  bool error_;
445 
447  delete;
448 };
449 
450 // A vector-backed table over composition tuples which can be used when the
451 // first FST is deterministic and epsilon-free and the second is a string (i.e.,
452 // satisfies kString). It should be used with a composition filter that creates
453 // at most one filter state per tuple under these conditions (e.g.,
454 // SequenceComposeFilter or MatchComposeFilter).
455 template <typename Arc, typename StateTuple>
457  : public VectorStateTable<StateTuple,
458  ComposeState2Fingerprint<StateTuple>> {
459  public:
460  using StateId = typename Arc::StateId;
461  using StateTable =
463 
464  DetStringComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2)
465  : error_(false) {
466  static constexpr auto props = kODeterministic | kNoOEpsilons;
467  if (fst1.Properties(props, true) != props) {
468  FSTERROR() << "StringDetComposeStateTable: 1st FST is not "
469  << "input-deterministic and epsilon-free";
470  error_ = true;
471  } else if (fst2.Properties(kString, true) != kString) {
472  FSTERROR() << "DetStringComposeStateTable: 2nd FST is not a string";
473  error_ = true;
474  }
475  }
476 
479  : StateTable(table), error_(table.error_) {}
480 
481  bool Error() const { return error_; }
482 
483  private:
484  bool error_;
485 
487  delete;
488 };
489 
490 // An erasable table over composition tuples. The Erase(StateId) method can be
491 // called if the user either is sure that composition will never return to that
492 // tuple or doesn't care that if it does, it is assigned a new state ID.
493 template <typename Arc, typename StateTuple>
495  : public ErasableStateTable<StateTuple, ComposeHash<StateTuple>> {
496  public:
497  ErasableComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {}
498 
499  constexpr bool Error() const { return false; }
500 
501  private:
502  ErasableComposeStateTable &operator=(const ErasableComposeStateTable &table) =
503  delete;
504 };
505 
506 } // namespace fst
507 
508 #endif // FST_STATE_TABLE_H_
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:192
ProductComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size=0)
Definition: state-table.h:393
virtual uint64_t Properties(uint64_t mask, bool test) const =0
FilterState GetFilterState() const
Definition: state-table.h:260
VectorStateTable(const FP &fingerprint=FP(), size_t table_size=0)
Definition: state-table.h:135
constexpr bool Error() const
Definition: state-table.h:403
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:190
typename Arc::StateId StateId
Definition: state-table.h:420
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:93
typename StateTuple::StateId StateId
Definition: state-table.h:80
DetStringComposeStateTable(const DetStringComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:477
StringDetComposeStateTable(const StringDetComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:437
size_t operator()(const StateTuple &tuple) const
Definition: state-table.h:357
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:378
typename StateTuple::StateId StateId
Definition: state-table.h:346
const T & FindEntry(T::StateIds) const
Definition: bi-table.h:96
size_t operator()(const StateTuple &tuple)
Definition: state-table.h:371
#define LOG(type)
Definition: log.h:49
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:138
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:140
HashStateTable(size_t table_size)
Definition: state-table.h:88
ComposeFingerprint(StateId nstates1, StateId nstates2)
Definition: state-table.h:354
constexpr uint64_t kODeterministic
Definition: properties.h:73
constexpr int kNoStateId
Definition: fst.h:202
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2, size_t table_size)
Definition: state-table.h:331
DetStringComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:464
#define FSTERROR()
Definition: util.h:53
StringDetComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:424
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:170
constexpr uint64_t kNoOEpsilons
Definition: properties.h:90
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:114
typename StateTuple::StateId StateId
Definition: state-table.h:153
const StateTuple & Tuple(StateId s) const
Definition: state-table.h:116
T::StateId FindId(const T &entry, bool insert=true)
Definition: bi-table.h:83
constexpr uint64_t kIDeterministic
Definition: properties.h:68
size_t operator()(const T &t) const
Definition: state-table.h:316
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:264
GenericComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:329
constexpr bool Error() const
Definition: state-table.h:335
Arc::StateId CountStates(const Fst< Arc > &fst)
Definition: expanded-fst.h:169
friend bool operator==(const DefaultComposeStateTuple &x, const DefaultComposeStateTuple &y)
Definition: state-table.h:301
CompactHashStateTable(size_t table_size)
Definition: state-table.h:111
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &)
Definition: state-table.h:290
constexpr uint64_t kString
Definition: properties.h:135
DefaultComposeStateTuple(StateId s1, StateId s2, const FilterState &fs)
Definition: state-table.h:253
typename Arc::StateId StateId
Definition: state-table.h:460
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:168
ErasableComposeStateTable(const Fst< Arc > &fst1, const Fst< Arc > &fst2)
Definition: state-table.h:497
constexpr uint64_t kNoIEpsilons
Definition: properties.h:85
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:299
const std::pair< StateId, StateId > & StatePair() const
Definition: state-table.h:262
typename Arc::StateId StateId
Definition: state-table.h:389
ProductComposeStateTable(const ProductComposeStateTable< Arc, StateTuple > &table)
Definition: state-table.h:399
constexpr bool Error() const
Definition: state-table.h:499
StateId FindState(const StateTuple &tuple)
Definition: state-table.h:91
VectorHashStateTable(const Select &select, const FP &fingerprint, const H &hash, size_t vector_size=0, size_t tuple_size=0)
Definition: state-table.h:162