FST  openfst-1.8.2.post1
OpenFst Library
isomorphic.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Function to test two FSTs are isomorphic, i.e., they are equal up to a state
19 // and arc re-ordering. FSTs should be deterministic when viewed as
20 // unweighted automata. False negatives (but not false positives) are possible
21 // when the inputs are nondeterministic (when viewed as unweighted automata).
22 
23 #ifndef FST_ISOMORPHIC_H_
24 #define FST_ISOMORPHIC_H_
25 
26 #include <algorithm>
27 #include <queue>
28 #include <type_traits>
29 #include <vector>
30 
31 #include <fst/log.h>
32 
33 #include <fst/fst.h>
34 
35 
36 namespace fst {
37 namespace internal {
38 
39 // Orders weights for equality checking; delta is ignored.
40 template <class Weight,
41  typename std::enable_if_t<IsIdempotent<Weight>::value> * = nullptr>
42 bool WeightCompare(const Weight &w1, const Weight &w2, float, bool *) {
43  static const NaturalLess<Weight> less;
44  return less(w1, w2);
45 }
46 
47 template <class Weight,
48  typename std::enable_if_t<!IsIdempotent<Weight>::value> * = nullptr>
49 bool WeightCompare(const Weight &w1, const Weight &w2, float delta,
50  bool *error) {
51  // No natural order; use hash.
52  const auto q1 = w1.Quantize(delta);
53  const auto q2 = w2.Quantize(delta);
54  const auto n1 = q1.Hash();
55  const auto n2 = q2.Hash();
56  // Hash not unique; very unlikely to happen.
57  if (n1 == n2 && q1 != q2) {
58  VLOG(1) << "Isomorphic: Weight hash collision";
59  *error = true;
60  }
61  return n1 < n2;
62 }
63 
64 template <class Arc>
65 class Isomorphism {
66  using StateId = typename Arc::StateId;
67 
68  public:
69  Isomorphism(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta)
70  : fst1_(fst1.Copy()),
71  fst2_(fst2.Copy()),
72  delta_(delta),
73  error_(false),
74  nondet_(false),
75  comp_(delta, &error_) {}
76 
77  // Checks if input FSTs are isomorphic.
78  bool IsIsomorphic() {
79  if (fst1_->Start() == kNoStateId && fst2_->Start() == kNoStateId) {
80  return true;
81  }
82  if (fst1_->Start() == kNoStateId || fst2_->Start() == kNoStateId) {
83  VLOG(1) << "Isomorphic: Only one of the FSTs is empty.";
84  return false;
85  }
86  PairState(fst1_->Start(), fst2_->Start());
87  while (!queue_.empty()) {
88  const auto &[state1, state2] = queue_.front();
89  if (!IsIsomorphicState(state1, state2)) {
90  if (nondet_) {
91  VLOG(1) << "Isomorphic: Non-determinism as an unweighted automaton. "
92  << "state1: " << state1 << " state2: " << state2;
93  error_ = true;
94  }
95  return false;
96  }
97  queue_.pop();
98  }
99  return true;
100  }
101 
102  bool Error() const { return error_; }
103 
104  private:
105  // Orders arcs for equality checking.
106  class ArcCompare {
107  public:
108  ArcCompare(float delta, bool *error) : delta_(delta), error_(error) {}
109 
110  bool operator()(const Arc &arc1, const Arc &arc2) const {
111  if (arc1.ilabel < arc2.ilabel) return true;
112  if (arc1.ilabel > arc2.ilabel) return false;
113  if (arc1.olabel < arc2.olabel) return true;
114  if (arc1.olabel > arc2.olabel) return false;
115  if (!ApproxEqual(arc1.weight, arc2.weight, delta_)) {
116  return WeightCompare(arc1.weight, arc2.weight, delta_, error_);
117  } else {
118  return arc1.nextstate < arc2.nextstate;
119  }
120  }
121 
122  private:
123  const float delta_;
124  bool *error_;
125  };
126 
127  // Maintains state correspondences and queue.
128  bool PairState(StateId s1, StateId s2) {
129  if (state_pairs_.size() <= s1) state_pairs_.resize(s1 + 1, kNoStateId);
130  if (state_pairs_[s1] == s2) {
131  return true; // Already seen this pair.
132  } else if (state_pairs_[s1] != kNoStateId) {
133  return false; // s1 already paired with another s2.
134  }
135  VLOG(3) << "Pairing states: (" << s1 << ", " << s2 << ")";
136  state_pairs_[s1] = s2;
137  queue_.emplace(s1, s2);
138  return true;
139  }
140 
141  // Checks if state pair is isomorphic.
142  bool IsIsomorphicState(StateId s1, StateId s2);
143 
144  std::unique_ptr<Fst<Arc>> fst1_;
145  std::unique_ptr<Fst<Arc>> fst2_;
146  float delta_; // Weight equality delta.
147  std::vector<Arc> arcs1_; // For sorting arcs on FST1.
148  std::vector<Arc> arcs2_; // For sorting arcs on FST2.
149  std::vector<StateId> state_pairs_; // Maintains state correspondences.
150  std::queue<std::pair<StateId, StateId>> queue_; // Queue of state pairs.
151  bool error_; // Error flag.
152  bool nondet_; // Nondeterminism detected.
153  ArcCompare comp_;
154 };
155 
156 template <class Arc>
157 bool Isomorphism<Arc>::IsIsomorphicState(StateId s1, StateId s2) {
158  if (!ApproxEqual(fst1_->Final(s1), fst2_->Final(s2), delta_)) {
159  VLOG(1) << "Isomorphic: Final weights not equal to within delta=" << delta_
160  << ": "
161  << "fst1.Final(" << s1 << ") = " << fst1_->Final(s1) << ", "
162  << "fst2.Final(" << s2 << ") = " << fst2_->Final(s2);
163  return false;
164  }
165  const auto narcs1 = fst1_->NumArcs(s1);
166  const auto narcs2 = fst2_->NumArcs(s2);
167  if (narcs1 != narcs2) {
168  VLOG(1) << "Isomorphic: NumArcs not equal. "
169  << "fst1.NumArcs(" << s1 << ") = " << narcs1 << ", "
170  << "fst2.NumArcs(" << s2 << ") = " << narcs2;
171  return false;
172  }
173  ArcIterator<Fst<Arc>> aiter1(*fst1_, s1);
174  ArcIterator<Fst<Arc>> aiter2(*fst2_, s2);
175  arcs1_.clear();
176  arcs1_.reserve(narcs1);
177  arcs2_.clear();
178  arcs2_.reserve(narcs2);
179  for (; !aiter1.Done(); aiter1.Next(), aiter2.Next()) {
180  arcs1_.push_back(aiter1.Value());
181  arcs2_.push_back(aiter2.Value());
182  }
183  std::sort(arcs1_.begin(), arcs1_.end(), comp_);
184  std::sort(arcs2_.begin(), arcs2_.end(), comp_);
185  for (size_t i = 0; i < arcs1_.size(); ++i) {
186  const auto &arc1 = arcs1_[i];
187  const auto &arc2 = arcs2_[i];
188  if (arc1.ilabel != arc2.ilabel) {
189  VLOG(1) << "Isomorphic: ilabels not equal. "
190  << "state1: " << s1 << " arc1: *" << arc1.ilabel << "* "
191  << arc1.olabel << " " << arc1.weight << " " << arc1.nextstate
192  << " state2: " << s2 << " arc2: *" << arc2.ilabel << "* "
193  << arc2.olabel << " " << arc2.weight << " " << arc2.nextstate;
194  return false;
195  }
196  if (arc1.olabel != arc2.olabel) {
197  VLOG(1) << "Isomorphic: olabels not equal. "
198  << "state1: " << s1 << " arc1: " << arc1.ilabel << " *"
199  << arc1.olabel << "* " << arc1.weight << " " << arc1.nextstate
200  << " state2: " << s2 << " arc2: " << arc2.ilabel << " *"
201  << arc2.olabel << "* " << arc2.weight << " " << arc2.nextstate;
202  return false;
203  }
204  if (!ApproxEqual(arc1.weight, arc2.weight, delta_)) {
205  VLOG(1) << "Isomorphic: weights not ApproxEqual. "
206  << "state1: " << s1 << " arc1: " << arc1.ilabel << " "
207  << arc1.olabel << " *" << arc1.weight << "* " << arc1.nextstate
208  << " state2: " << s2 << " arc2: " << arc2.ilabel << " "
209  << arc2.olabel << " *" << arc2.weight << "* " << arc2.nextstate;
210  return false;
211  }
212  if (!PairState(arc1.nextstate, arc2.nextstate)) {
213  VLOG(1) << "Isomorphic: nextstates could not be paired. "
214  << "state1: " << s1 << " arc1: " << arc1.ilabel << " "
215  << arc1.olabel << " " << arc1.weight << " *" << arc1.nextstate
216  << "* "
217  << "state2: " << s2 << " arc2: " << arc2.ilabel << " "
218  << arc2.olabel << " " << arc2.weight << " *" << arc2.nextstate
219  << "*";
220  return false;
221  }
222  if (i > 0) { // Checks for non-determinism.
223  const auto &arc0 = arcs1_[i - 1];
224  if (arc1.ilabel == arc0.ilabel && arc1.olabel == arc0.olabel &&
225  ApproxEqual(arc1.weight, arc0.weight, delta_)) {
226  // Any subsequent matching failure maybe a false negative
227  // since we only consider one permutation when pairing destination
228  // states of nondeterministic transitions.
229  VLOG(1) << "Isomorphic: Detected non-determinism as an unweighted "
230  << "automaton; deferring error. "
231  << "state: " << s1 << " arc1: " << arc1.ilabel << " "
232  << arc1.olabel << " " << arc1.weight << " " << arc1.nextstate
233  << " arc2: " << arc2.ilabel << " " << arc2.olabel << " "
234  << arc2.weight << " " << arc2.nextstate;
235  nondet_ = true;
236  }
237  }
238  }
239  return true;
240 }
241 
242 } // namespace internal
243 
244 // Tests if two FSTs have the same states and arcs up to a reordering.
245 // Inputs should be deterministic when viewed as unweighted automata.
246 // When the inputs are nondeterministic, the algorithm only considers one
247 // permutation for each set of equivalent nondeterministic transitions
248 // (the permutation that preserves state ID ordering) and hence might return
249 // false negatives (but it never returns false positives).
250 template <class Arc>
251 bool Isomorphic(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
252  float delta = kDelta) {
253  internal::Isomorphism<Arc> iso(fst1, fst2, delta);
254  const bool result = iso.IsIsomorphic();
255  if (iso.Error()) {
256  FSTERROR() << "Isomorphic: Cannot determine if inputs are isomorphic";
257  return false;
258  } else {
259  return result;
260  }
261 }
262 
263 } // namespace fst
264 
265 #endif // FST_ISOMORPHIC_H_
Isomorphism(const Fst< Arc > &fst1, const Fst< Arc > &fst2, float delta)
Definition: isomorphic.h:69
bool WeightCompare(const Weight &w1, const Weight &w2, float, bool *)
Definition: isomorphic.h:42
constexpr int kNoStateId
Definition: fst.h:202
const Arc & Value() const
Definition: fst.h:537
#define FSTERROR()
Definition: util.h:53
#define VLOG(level)
Definition: log.h:50
bool Done() const
Definition: fst.h:533
bool Isomorphic(FarReader< Arc > &reader1, FarReader< Arc > &reader2, float delta=kDelta, std::string_view begin_key="", std::string_view end_key="")
Definition: isomorphic.h:29
constexpr float kDelta
Definition: weight.h:130
bool ApproxEqual(const ErrorWeight &, const ErrorWeight &, float)
Definition: error-weight.h:57
void Next()
Definition: fst.h:541