FST  openfst-1.7.1
OpenFst Library
equivalent.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Functions and classes to determine the equivalence of two FSTs.
5 
6 #ifndef FST_EQUIVALENT_H_
7 #define FST_EQUIVALENT_H_
8 
9 #include <algorithm>
10 #include <deque>
11 #include <unordered_map>
12 #include <utility>
13 #include <vector>
14 #include <fst/log.h>
15 
16 #include <fst/encode.h>
17 #include <fst/push.h>
18 #include <fst/union-find.h>
19 #include <fst/vector-fst.h>
20 
21 
22 namespace fst {
23 namespace internal {
24 
25 // Traits-like struct holding utility functions/typedefs/constants for
26 // the equivalence algorithm.
27 //
28 // Encoding device: in order to make the statesets of the two acceptors
29 // disjoint, we map Arc::StateId on the type MappedId. The states of
30 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
31 // those of the second one on even numbers (s -> 2s + 2). The number 0
32 // is reserved for an implicit (non-final) dead state (required for
33 // the correct treatment of non-coaccessible states; kNoStateId is mapped to
34 // kDeadState for both acceptors). The union-find algorithm operates on the
35 // mapped IDs.
36 template <class Arc>
38  using StateId = typename Arc::StateId;
39  using Weight = typename Arc::Weight;
40 
41  using MappedId = StateId; // ID for an equivalence class.
42 
43  // MappedId for an implicit dead state.
44  static constexpr MappedId kDeadState = 0;
45 
46  // MappedId for lookup failure.
47  static constexpr MappedId kInvalidId = -1;
48 
49  // Maps state ID to the representative of the corresponding
50  // equivalence class. The parameter 'which_fst' takes the values 1
51  // and 2, identifying the input FST.
52  static MappedId MapState(StateId s, int32 which_fst) {
53  return (kNoStateId == s) ? kDeadState
54  : (static_cast<MappedId>(s) << 1) + which_fst;
55  }
56 
57  // Maps set ID to State ID.
59  return static_cast<StateId>((--id) >> 1);
60  }
61 
62  // Convenience function: checks if state with MappedId s is final in
63  // acceptor fa.
64  static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
65  return (kDeadState == s) ? false
66  : (fa.Final(UnMapState(s)) != Weight::Zero());
67  }
68  // Convenience function: returns the representative of ID in sets,
69  // creating a new set if needed.
71  const auto repr = sets->FindSet(id);
72  if (repr != kInvalidId) {
73  return repr;
74  } else {
75  sets->MakeSet(id);
76  return id;
77  }
78  }
79 };
80 
81 template <class Arc>
82 constexpr
84 
85 template <class Arc>
86 constexpr
88 
89 } // namespace internal
90 
91 // Equivalence checking algorithm: determines if the two FSTs fst1 and fst2
92 // are equivalent. The input FSTs must be deterministic input-side epsilon-free
93 // acceptors, unweighted or with weights over a left semiring. Two acceptors are
94 // considered equivalent if they accept exactly the same set of strings (with
95 // the same weights).
96 //
97 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and Analysis of
98 // Computer Programs") successively constructs sets of states that can be
99 // reached by the same prefixes, starting with a set containing the start states
100 // of both acceptors. A disjoint tree forest (the union-find algorithm) is used
101 // to represent the sets of states. The algorithm returns false if one of the
102 // constructed sets contains both final and non-final states. Returns an
103 // optional error value (useful when FLAGS_error_fatal = false).
104 //
105 // Complexity:
106 //
107 // Quasi-linear, i.e., O(n G(n)), where
108 //
109 // n = |S1| + |S2| is the number of states in both acceptors
110 //
111 // G(n) is a very slowly growing function that can be approximated
112 // by 4 by all practical purposes.
113 template <class Arc>
114 bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
115  float delta = kDelta, bool *error = nullptr) {
116  using Weight = typename Arc::Weight;
117  if (error) *error = false;
118  // Check that the symbol table are compatible.
119  if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
120  !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
121  FSTERROR() << "Equivalent: Input/output symbol tables of 1st argument "
122  << "do not match input/output symbol tables of 2nd argument";
123  if (error) *error = true;
124  return false;
125  }
126  // Check properties first.
127  static constexpr auto props = kNoEpsilons | kIDeterministic | kAcceptor;
128  if (fst1.Properties(props, true) != props) {
129  FSTERROR() << "Equivalent: 1st argument not an"
130  << " epsilon-free deterministic acceptor";
131  if (error) *error = true;
132  return false;
133  }
134  if (fst2.Properties(props, true) != props) {
135  FSTERROR() << "Equivalent: 2nd argument not an"
136  << " epsilon-free deterministic acceptor";
137  if (error) *error = true;
138  return false;
139  }
140  if ((fst1.Properties(kUnweighted, true) != kUnweighted) ||
141  (fst2.Properties(kUnweighted, true) != kUnweighted)) {
142  VectorFst<Arc> efst1(fst1);
143  VectorFst<Arc> efst2(fst2);
144  Push(&efst1, REWEIGHT_TO_INITIAL, delta);
145  Push(&efst2, REWEIGHT_TO_INITIAL, delta);
146  ArcMap(&efst1, QuantizeMapper<Arc>(delta));
147  ArcMap(&efst2, QuantizeMapper<Arc>(delta));
148  EncodeMapper<Arc> mapper(kEncodeWeights | kEncodeLabels, ENCODE);
149  ArcMap(&efst1, &mapper);
150  ArcMap(&efst2, &mapper);
151  return Equivalent(efst1, efst2);
152  }
153  using Util = internal::EquivalenceUtil<Arc>;
154  using MappedId = typename Util::MappedId;
155  enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...)
156  auto s1 = Util::MapState(fst1.Start(), FST1);
157  auto s2 = Util::MapState(fst2.Start(), FST2);
158  // The union-find structure.
159  UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
160  // Initializes the union-find structure.
161  eq_classes.MakeSet(s1);
162  eq_classes.MakeSet(s2);
163  // Data structure for the (partial) acceptor transition function of fst1 and
164  // fst2: input labels mapped to pairs of MappedIds representing destination
165  // states of the corresponding arcs in fst1 and fst2, respectively.
166  using Label2StatePairMap =
167  std::unordered_map<typename Arc::Label, std::pair<MappedId, MappedId>>;
168  Label2StatePairMap arc_pairs;
169  // Pairs of MappedId's to be processed, organized in a queue.
170  std::deque<std::pair<MappedId, MappedId>> q;
171  bool ret = true;
172  // Returns early if the start states differ w.r.t. finality.
173  if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) ret = false;
174  // Main loop: explores the two acceptors in a breadth-first manner, updating
175  // the equivalence relation on the statesets. Loop invariant: each block of
176  // the states contains either final states only or non-final states only.
177  for (q.push_back(std::make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
178  s1 = q.front().first;
179  s2 = q.front().second;
180  // Representatives of the equivalence classes of s1/s2.
181  const auto rep1 = Util::FindSet(&eq_classes, s1);
182  const auto rep2 = Util::FindSet(&eq_classes, s2);
183  if (rep1 != rep2) {
184  eq_classes.Union(rep1, rep2);
185  arc_pairs.clear();
186  // Copies outgoing arcs starting at s1 into the hash-table.
187  if (Util::kDeadState != s1) {
188  ArcIterator<Fst<Arc>> arc_iter(fst1, Util::UnMapState(s1));
189  for (; !arc_iter.Done(); arc_iter.Next()) {
190  const auto &arc = arc_iter.Value();
191  // Zero-weight arcs are treated as if they did not exist.
192  if (arc.weight != Weight::Zero()) {
193  arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
194  }
195  }
196  }
197  // Copies outgoing arcs starting at s2 into the hashtable.
198  if (Util::kDeadState != s2) {
199  ArcIterator<Fst<Arc>> arc_iter(fst2, Util::UnMapState(s2));
200  for (; !arc_iter.Done(); arc_iter.Next()) {
201  const auto &arc = arc_iter.Value();
202  // Zero-weight arcs are treated as if they did not exist.
203  if (arc.weight != Weight::Zero()) {
204  arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
205  }
206  }
207  }
208  // Iterates through the hashtable and process pairs of target states.
209  for (const auto &arc_iter : arc_pairs) {
210  const auto &pair = arc_iter.second;
211  if (Util::IsFinal(fst1, pair.first) !=
212  Util::IsFinal(fst2, pair.second)) {
213  // Detected inconsistency: return false.
214  ret = false;
215  break;
216  }
217  q.push_back(pair);
218  }
219  }
220  }
221  if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
222  if (error) *error = true;
223  return false;
224  }
225  return ret;
226 }
227 
228 } // namespace fst
229 
230 #endif // FST_EQUIVALENT_H_
constexpr uint64 kNoEpsilons
Definition: properties.h:62
void ArcMap(MutableFst< A > *fst, C *mapper)
Definition: arc-map.h:94
static constexpr MappedId kInvalidId
Definition: equivalent.h:47
virtual Weight Final(StateId) const =0
static constexpr MappedId kDeadState
Definition: equivalent.h:44
constexpr int kNoStateId
Definition: fst.h:180
const Arc & Value() const
Definition: fst.h:503
virtual uint64 Properties(uint64 mask, bool test) const =0
#define FSTERROR()
Definition: util.h:35
constexpr uint64 kUnweighted
Definition: properties.h:87
static MappedId FindSet(UnionFind< MappedId > *sets, MappedId id)
Definition: equivalent.h:70
constexpr uint64 kIDeterministic
Definition: properties.h:50
static StateId UnMapState(MappedId id)
Definition: equivalent.h:58
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:499
typename Arc::Weight Weight
Definition: equivalent.h:39
constexpr uint64 kAcceptor
Definition: properties.h:45
virtual const SymbolTable * InputSymbols() const =0
static MappedId MapState(StateId s, int32 which_fst)
Definition: equivalent.h:52
int32_t int32
Definition: types.h:26
T MakeSet(T item)
Definition: union-find.h:41
constexpr uint64 kError
Definition: properties.h:33
bool Equivalent(const Fst< Arc > &fst1, const Fst< Arc > &fst2, float delta=kDelta, bool *error=nullptr)
Definition: equivalent.h:114
typename Arc::StateId StateId
Definition: equivalent.h:38
bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2, bool warning=true)
T FindSet(T item)
Definition: union-find.h:26
static bool IsFinal(const Fst< Arc > &fa, MappedId s)
Definition: equivalent.h:64
void Push(MutableFst< Arc > *fst, ReweightType type, float delta=kDelta, bool remove_total_weight=false)
Definition: push.h:77
constexpr float kDelta
Definition: weight.h:109
virtual const SymbolTable * OutputSymbols() const =0
void Next()
Definition: fst.h:507