FST  openfst-1.8.3
OpenFst Library
equivalent.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Functions and classes to determine the equivalence of two FSTs.
19 
20 #ifndef FST_EQUIVALENT_H_
21 #define FST_EQUIVALENT_H_
22 
23 #include <algorithm>
24 #include <cstdint>
25 #include <queue>
26 #include <utility>
27 #include <vector>
28 
29 #include <fst/log.h>
30 #include <fst/arc-map.h>
31 #include <fst/encode.h>
32 #include <fst/fst.h>
33 #include <fst/properties.h>
34 #include <fst/push.h>
35 #include <fst/reweight.h>
36 #include <fst/symbol-table.h>
37 #include <fst/union-find.h>
38 #include <fst/util.h>
39 #include <fst/vector-fst.h>
40 #include <fst/weight.h>
41 #include <unordered_map>
42 
43 namespace fst {
44 namespace internal {
45 
46 // Traits-like struct holding utility functions/typedefs/constants for
47 // the equivalence algorithm.
48 //
49 // Encoding device: in order to make the statesets of the two acceptors
50 // disjoint, we map Arc::StateId on the type MappedId. The states of
51 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
52 // those of the second one on even numbers (s -> 2s + 2). The number 0
53 // is reserved for an implicit (non-final) dead state (required for
54 // the correct treatment of non-coaccessible states; kNoStateId is mapped to
55 // kDeadState for both acceptors). The union-find algorithm operates on the
56 // mapped IDs.
57 template <class Arc>
59  using StateId = typename Arc::StateId;
60  using Weight = typename Arc::Weight;
61 
62  using MappedId = StateId; // ID for an equivalence class.
63 
64  // MappedId for an implicit dead state.
65  static constexpr MappedId kDeadState = 0;
66 
67  // MappedId for lookup failure.
68  static constexpr MappedId kInvalidId = -1;
69 
70  // Maps state ID to the representative of the corresponding
71  // equivalence class. The parameter 'which_fst' takes the values 1
72  // and 2, identifying the input FST.
73  static MappedId MapState(StateId s, int32_t which_fst) {
74  return (kNoStateId == s) ? kDeadState
75  : (static_cast<MappedId>(s) << 1) + which_fst;
76  }
77 
78  // Maps set ID to State ID.
80  return static_cast<StateId>((--id) >> 1);
81  }
82 
83  // Convenience function: checks if state with MappedId s is final in
84  // acceptor fa.
85  static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
86  return (kDeadState == s) ? false
87  : (fa.Final(UnMapState(s)) != Weight::Zero());
88  }
89  // Convenience function: returns the representative of ID in sets,
90  // creating a new set if needed.
92  const auto repr = sets->FindSet(id);
93  if (repr != kInvalidId) {
94  return repr;
95  } else {
96  sets->MakeSet(id);
97  return id;
98  }
99  }
100 };
101 
102 } // namespace internal
103 
104 // Equivalence checking algorithm: determines if the two FSTs fst1 and fst2
105 // are equivalent. The input FSTs must be deterministic input-side epsilon-free
106 // acceptors, unweighted or with weights over a left semiring. Two acceptors are
107 // considered equivalent if they accept exactly the same set of strings (with
108 // the same weights).
109 //
110 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and Analysis of
111 // Computer Programs") successively constructs sets of states that can be
112 // reached by the same prefixes, starting with a set containing the start states
113 // of both acceptors. A disjoint tree forest (the union-find algorithm) is used
114 // to represent the sets of states. The algorithm returns false if one of the
115 // constructed sets contains both final and non-final states. Returns an
116 // optional error value (useful when FST_FLAGS_error_fatal = false).
117 //
118 // Complexity:
119 //
120 // Quasi-linear, i.e., O(n G(n)), where
121 //
122 // n = |S1| + |S2| is the number of states in both acceptors
123 //
124 // G(n) is a very slowly growing function that can be approximated
125 // by 4 by all practical purposes.
126 template <class Arc>
127 bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
128  float delta = kDelta, bool *error = nullptr) {
129  using Weight = typename Arc::Weight;
130  if (error) *error = false;
131  // Check that the symbol table are compatible.
132  if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
133  !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
134  FSTERROR() << "Equivalent: Input/output symbol tables of 1st argument "
135  << "do not match input/output symbol tables of 2nd argument";
136  if (error) *error = true;
137  return false;
138  }
139  // Check properties first.
140  static constexpr auto props = kNoEpsilons | kIDeterministic | kAcceptor;
141  if (fst1.Properties(props, true) != props) {
142  FSTERROR() << "Equivalent: 1st argument not an"
143  << " epsilon-free deterministic acceptor";
144  if (error) *error = true;
145  return false;
146  }
147  if (fst2.Properties(props, true) != props) {
148  FSTERROR() << "Equivalent: 2nd argument not an"
149  << " epsilon-free deterministic acceptor";
150  if (error) *error = true;
151  return false;
152  }
153  if ((fst1.Properties(kUnweighted, true) != kUnweighted) ||
154  (fst2.Properties(kUnweighted, true) != kUnweighted)) {
155  VectorFst<Arc> efst1(fst1);
156  VectorFst<Arc> efst2(fst2);
157  Push(&efst1, REWEIGHT_TO_INITIAL, delta);
158  Push(&efst2, REWEIGHT_TO_INITIAL, delta);
159  ArcMap(&efst1, QuantizeMapper<Arc>(delta));
160  ArcMap(&efst2, QuantizeMapper<Arc>(delta));
162  ArcMap(&efst1, &mapper);
163  ArcMap(&efst2, &mapper);
164  return Equivalent(efst1, efst2);
165  }
166  using Util = internal::EquivalenceUtil<Arc>;
167  using MappedId = typename Util::MappedId;
168  enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...)
169  auto s1 = Util::MapState(fst1.Start(), FST1);
170  auto s2 = Util::MapState(fst2.Start(), FST2);
171  // The union-find structure.
172  UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
173  // Initializes the union-find structure.
174  eq_classes.MakeSet(s1);
175  eq_classes.MakeSet(s2);
176  // Data structure for the (partial) acceptor transition function of fst1 and
177  // fst2: input labels mapped to pairs of MappedIds representing destination
178  // states of the corresponding arcs in fst1 and fst2, respectively.
179  using Label2StatePairMap =
180  std::unordered_map<typename Arc::Label, std::pair<MappedId, MappedId>>;
181  Label2StatePairMap arc_pairs;
182  // Pairs of MappedId's to be processed, organized in a queue.
183  std::queue<std::pair<MappedId, MappedId>> q;
184  bool ret = true;
185  // Returns early if the start states differ w.r.t. finality.
186  if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) ret = false;
187  // Main loop: explores the two acceptors in a breadth-first manner, updating
188  // the equivalence relation on the statesets. Loop invariant: each block of
189  // the states contains either final states only or non-final states only.
190  for (q.emplace(s1, s2); ret && !q.empty(); q.pop()) {
191  s1 = q.front().first;
192  s2 = q.front().second;
193  // Representatives of the equivalence classes of s1/s2.
194  const auto rep1 = Util::FindSet(&eq_classes, s1);
195  const auto rep2 = Util::FindSet(&eq_classes, s2);
196  if (rep1 != rep2) {
197  eq_classes.Union(rep1, rep2);
198  arc_pairs.clear();
199  // Copies outgoing arcs starting at s1 into the hash-table.
200  if (Util::kDeadState != s1) {
201  ArcIterator<Fst<Arc>> arc_iter(fst1, Util::UnMapState(s1));
202  for (; !arc_iter.Done(); arc_iter.Next()) {
203  const auto &arc = arc_iter.Value();
204  // Zero-weight arcs are treated as if they did not exist.
205  if (arc.weight != Weight::Zero()) {
206  arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
207  }
208  }
209  }
210  // Copies outgoing arcs starting at s2 into the hashtable.
211  if (Util::kDeadState != s2) {
212  ArcIterator<Fst<Arc>> arc_iter(fst2, Util::UnMapState(s2));
213  for (; !arc_iter.Done(); arc_iter.Next()) {
214  const auto &arc = arc_iter.Value();
215  // Zero-weight arcs are treated as if they did not exist.
216  if (arc.weight != Weight::Zero()) {
217  arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
218  }
219  }
220  }
221  // Iterates through the hashtable and process pairs of target states.
222  for (const auto &arc_iter : arc_pairs) {
223  const auto &pair = arc_iter.second;
224  if (Util::IsFinal(fst1, pair.first) !=
225  Util::IsFinal(fst2, pair.second)) {
226  // Detected inconsistency: return false.
227  ret = false;
228  break;
229  }
230  q.push(pair);
231  }
232  }
233  }
234  if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
235  if (error) *error = true;
236  return false;
237  }
238  return ret;
239 }
240 
241 } // namespace fst
242 
243 #endif // FST_EQUIVALENT_H_
void ArcMap(MutableFst< A > *fst, C *mapper)
Definition: arc-map.h:120
static constexpr MappedId kDeadState
Definition: equivalent.h:65
virtual uint64_t Properties(uint64_t mask, bool test) const =0
static MappedId MapState(StateId s, int32_t which_fst)
Definition: equivalent.h:73
constexpr uint64_t kError
Definition: properties.h:52
virtual Weight Final(StateId) const =0
constexpr int kNoStateId
Definition: fst.h:196
const Arc & Value() const
Definition: fst.h:536
#define FSTERROR()
Definition: util.h:56
constexpr uint8_t kEncodeLabels
Definition: encode.h:55
constexpr uint64_t kNoEpsilons
Definition: properties.h:81
static MappedId FindSet(UnionFind< MappedId > *sets, MappedId id)
Definition: equivalent.h:91
static StateId UnMapState(MappedId id)
Definition: equivalent.h:79
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:532
typename Arc::Weight Weight
Definition: equivalent.h:60
constexpr uint64_t kIDeterministic
Definition: properties.h:69
void Push(MutableFst< Arc > *fst, ReweightType type=REWEIGHT_TO_INITIAL, float delta=kShortestDelta, bool remove_total_weight=false)
Definition: push.h:94
constexpr uint8_t kEncodeWeights
Definition: encode.h:56
constexpr uint64_t kUnweighted
Definition: properties.h:106
virtual const SymbolTable * InputSymbols() const =0
T MakeSet(T item)
Definition: union-find.h:60
bool Equivalent(const Fst< Arc > &fst1, const Fst< Arc > &fst2, float delta=kDelta, bool *error=nullptr)
Definition: equivalent.h:127
typename Arc::StateId StateId
Definition: equivalent.h:59
bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2, bool warning=true)
T FindSet(T item)
Definition: union-find.h:39
static bool IsFinal(const Fst< Arc > &fa, MappedId s)
Definition: equivalent.h:85
constexpr float kDelta
Definition: weight.h:133
constexpr uint64_t kAcceptor
Definition: properties.h:64
virtual const SymbolTable * OutputSymbols() const =0
void Next()
Definition: fst.h:540
static constexpr MappedId kInvalidId
Definition: equivalent.h:68