FST  openfst-1.8.3
OpenFst Library
algo_test.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Regression test for various FST algorithms.
19 
20 #ifndef FST_TEST_ALGO_TEST_H_
21 #define FST_TEST_ALGO_TEST_H_
22 
23 #include <sys/types.h>
24 
25 #include <cstddef>
26 #include <cstdint>
27 #include <memory>
28 #include <random>
29 #include <string>
30 #include <utility>
31 #include <vector>
32 
33 #include <fst/log.h>
34 #include <fst/arc-map.h>
35 #include <fst/arc.h>
36 #include <fst/arcfilter.h>
37 #include <fst/arcsort.h>
38 #include <fst/cache.h>
39 #include <fst/closure.h>
40 #include <fst/compose-filter.h>
41 #include <fst/compose.h>
42 #include <fst/concat.h>
43 #include <fst/connect.h>
44 #include <fst/determinize.h>
45 #include <fst/dfs-visit.h>
46 #include <fst/difference.h>
47 #include <fst/disambiguate.h>
48 #include <fst/encode.h>
49 #include <fst/equivalent.h>
50 #include <fst/float-weight.h>
51 #include <fst/fst.h>
52 #include <fst/fstlib.h>
53 #include <fst/intersect.h>
54 #include <fst/invert.h>
55 #include <fst/lookahead-matcher.h>
56 #include <fst/matcher-fst.h>
57 #include <fst/matcher.h>
58 #include <fst/minimize.h>
59 #include <fst/mutable-fst.h>
60 #include <fst/pair-weight.h>
61 #include <fst/project.h>
62 #include <fst/properties.h>
63 #include <fst/prune.h>
64 #include <fst/push.h>
65 #include <fst/randequivalent.h>
66 #include <fst/randgen.h>
67 #include <fst/rational.h>
68 #include <fst/relabel.h>
69 #include <fst/reverse.h>
70 #include <fst/reweight.h>
71 #include <fst/rmepsilon.h>
72 #include <fst/shortest-distance.h>
73 #include <fst/shortest-path.h>
74 #include <fst/string-weight.h>
75 #include <fst/synchronize.h>
76 #include <fst/topsort.h>
77 #include <fst/union-weight.h>
78 #include <fst/union.h>
79 #include <fst/vector-fst.h>
80 #include <fst/verify.h>
81 #include <fst/weight.h>
82 #include <fst/test/rand-fst.h>
83 
84 DECLARE_int32(repeat); // defined in ./algo_test.cc
85 
86 namespace fst {
87 
88 // Mapper to change input and output label of every transition into
89 // epsilons.
90 template <class A>
91 class EpsMapper {
92  public:
93  EpsMapper() = default;
94 
95  A operator()(const A &arc) const {
96  return A(0, 0, arc.weight, arc.nextstate);
97  }
98 
99  uint64_t Properties(uint64_t props) const {
100  props &= ~kNotAcceptor;
101  props |= kAcceptor;
102  props &= ~kNoIEpsilons & ~kNoOEpsilons & ~kNoEpsilons;
103  props |= kIEpsilons | kOEpsilons | kEpsilons;
104  props &= ~kNotILabelSorted & ~kNotOLabelSorted;
105  props |= kILabelSorted | kOLabelSorted;
106  return props;
107  }
108 
110 
112 
114 };
115 
116 // Generic - no lookahead.
117 template <class Arc>
118 void LookAheadCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
119  MutableFst<Arc> *ofst) {
120  Compose(ifst1, ifst2, ofst);
121 }
122 
123 // Specialized and epsilon olabel acyclic - lookahead.
124 inline void LookAheadCompose(const Fst<StdArc> &ifst1, const Fst<StdArc> &ifst2,
125  MutableFst<StdArc> *ofst) {
126  std::vector<StdArc::StateId> order;
127  bool acyclic;
128  TopOrderVisitor<StdArc> visitor(&order, &acyclic);
129  DfsVisit(ifst1, &visitor, OutputEpsilonArcFilter<StdArc>());
130  if (acyclic) { // no ifst1 output epsilon cycles?
131  StdOLabelLookAheadFst lfst1(ifst1);
132  StdVectorFst lfst2(ifst2);
133  LabelLookAheadRelabeler<StdArc>::Relabel(&lfst2, lfst1, true);
134  Compose(lfst1, lfst2, ofst);
135  } else {
136  Compose(ifst1, ifst2, ofst);
137  }
138 }
139 
140 // This class tests a variety of identities and properties that must
141 // hold for various algorithms on weighted FSTs.
142 template <class Arc>
144  public:
145  using Label = typename Arc::Label;
146  using StateId = typename Arc::StateId;
147  using Weight = typename Arc::Weight;
149 
150  WeightedTester(uint64_t seed, const Fst<Arc> &zero_fst,
151  const Fst<Arc> &one_fst, const Fst<Arc> &univ_fst,
152  WeightGenerator weight_generator)
153  : seed_(seed),
154  rand_(seed),
155  zero_fst_(zero_fst),
156  one_fst_(one_fst),
157  univ_fst_(univ_fst),
158  generate_(std::move(weight_generator)) {}
159 
160  void Test(const Fst<Arc> &T1, const Fst<Arc> &T2, const Fst<Arc> &T3) {
161  TestRational(T1, T2, T3);
162  TestMap(T1);
163  TestCompose(T1, T2, T3);
164  TestSort(T1);
165  TestOptimize(T1);
166  TestSearch(T1);
167  }
168 
169  private:
170  // Tests rational operations with identities
171  void TestRational(const Fst<Arc> &T1, const Fst<Arc> &T2,
172  const Fst<Arc> &T3) {
173  {
174  VLOG(1) << "Check destructive and delayed union are equivalent.";
175  VectorFst<Arc> U1(T1);
176  Union(&U1, T2);
177  UnionFst<Arc> U2(T1, T2);
178  CHECK(Equiv(U1, U2));
179  }
180 
181  {
182  VLOG(1) << "Check destructive and delayed concatenation are equivalent.";
183  VectorFst<Arc> C1(T1);
184  Concat(&C1, T2);
185  ConcatFst<Arc> C2(T1, T2);
186  CHECK(Equiv(C1, C2));
187  VectorFst<Arc> C3(T2);
188  Concat(T1, &C3);
189  CHECK(Equiv(C3, C2));
190  }
191 
192  {
193  VLOG(1) << "Check destructive and delayed closure* are equivalent.";
194  VectorFst<Arc> C1(T1);
195  Closure(&C1, CLOSURE_STAR);
197  CHECK(Equiv(C1, C2));
198  }
199 
200  {
201  VLOG(1) << "Check destructive and delayed closure+ are equivalent.";
202  VectorFst<Arc> C1(T1);
203  Closure(&C1, CLOSURE_PLUS);
205  CHECK(Equiv(C1, C2));
206  }
207 
208  {
209  VLOG(1) << "Check union is associative (destructive).";
210  VectorFst<Arc> U1(T1);
211  Union(&U1, T2);
212  Union(&U1, T3);
213 
214  VectorFst<Arc> U3(T2);
215  Union(&U3, T3);
216  VectorFst<Arc> U4(T1);
217  Union(&U4, U3);
218 
219  CHECK(Equiv(U1, U4));
220  }
221 
222  {
223  VLOG(1) << "Check union is associative (delayed).";
224  UnionFst<Arc> U1(T1, T2);
225  UnionFst<Arc> U2(U1, T3);
226 
227  UnionFst<Arc> U3(T2, T3);
228  UnionFst<Arc> U4(T1, U3);
229 
230  CHECK(Equiv(U2, U4));
231  }
232 
233  {
234  VLOG(1) << "Check union is associative (destructive delayed).";
235  UnionFst<Arc> U1(T1, T2);
236  Union(&U1, T3);
237 
238  UnionFst<Arc> U3(T2, T3);
239  UnionFst<Arc> U4(T1, U3);
240 
241  CHECK(Equiv(U1, U4));
242  }
243 
244  {
245  VLOG(1) << "Check concatenation is associative (destructive).";
246  VectorFst<Arc> C1(T1);
247  Concat(&C1, T2);
248  Concat(&C1, T3);
249 
250  VectorFst<Arc> C3(T2);
251  Concat(&C3, T3);
252  VectorFst<Arc> C4(T1);
253  Concat(&C4, C3);
254 
255  CHECK(Equiv(C1, C4));
256  }
257 
258  {
259  VLOG(1) << "Check concatenation is associative (delayed).";
260  ConcatFst<Arc> C1(T1, T2);
261  ConcatFst<Arc> C2(C1, T3);
262 
263  ConcatFst<Arc> C3(T2, T3);
264  ConcatFst<Arc> C4(T1, C3);
265 
266  CHECK(Equiv(C2, C4));
267  }
268 
269  {
270  VLOG(1) << "Check concatenation is associative (destructive delayed).";
271  ConcatFst<Arc> C1(T1, T2);
272  Concat(&C1, T3);
273 
274  ConcatFst<Arc> C3(T2, T3);
275  ConcatFst<Arc> C4(T1, C3);
276 
277  CHECK(Equiv(C1, C4));
278  }
279 
280  if (Weight::Properties() & kLeftSemiring) {
281  VLOG(1) << "Check concatenation left distributes"
282  << " over union (destructive).";
283 
284  VectorFst<Arc> U1(T1);
285  Union(&U1, T2);
286  VectorFst<Arc> C1(T3);
287  Concat(&C1, U1);
288 
289  VectorFst<Arc> C2(T3);
290  Concat(&C2, T1);
291  VectorFst<Arc> C3(T3);
292  Concat(&C3, T2);
293  VectorFst<Arc> U2(C2);
294  Union(&U2, C3);
295 
296  CHECK(Equiv(C1, U2));
297  }
298 
299  if (Weight::Properties() & kRightSemiring) {
300  VLOG(1) << "Check concatenation right distributes"
301  << " over union (destructive).";
302  VectorFst<Arc> U1(T1);
303  Union(&U1, T2);
304  VectorFst<Arc> C1(U1);
305  Concat(&C1, T3);
306 
307  VectorFst<Arc> C2(T1);
308  Concat(&C2, T3);
309  VectorFst<Arc> C3(T2);
310  Concat(&C3, T3);
311  VectorFst<Arc> U2(C2);
312  Union(&U2, C3);
313 
314  CHECK(Equiv(C1, U2));
315  }
316 
317  if (Weight::Properties() & kLeftSemiring) {
318  VLOG(1) << "Check concatenation left distributes over union (delayed).";
319  UnionFst<Arc> U1(T1, T2);
320  ConcatFst<Arc> C1(T3, U1);
321 
322  ConcatFst<Arc> C2(T3, T1);
323  ConcatFst<Arc> C3(T3, T2);
324  UnionFst<Arc> U2(C2, C3);
325 
326  CHECK(Equiv(C1, U2));
327  }
328 
329  if (Weight::Properties() & kRightSemiring) {
330  VLOG(1) << "Check concatenation right distributes over union (delayed).";
331  UnionFst<Arc> U1(T1, T2);
332  ConcatFst<Arc> C1(U1, T3);
333 
334  ConcatFst<Arc> C2(T1, T3);
335  ConcatFst<Arc> C3(T2, T3);
336  UnionFst<Arc> U2(C2, C3);
337 
338  CHECK(Equiv(C1, U2));
339  }
340 
341  if (Weight::Properties() & kLeftSemiring) {
342  VLOG(1) << "Check T T* == T+ (destructive).";
343  VectorFst<Arc> S(T1);
344  Closure(&S, CLOSURE_STAR);
345  VectorFst<Arc> C(T1);
346  Concat(&C, S);
347 
348  VectorFst<Arc> P(T1);
349  Closure(&P, CLOSURE_PLUS);
350 
351  CHECK(Equiv(C, P));
352  }
353 
354  if (Weight::Properties() & kRightSemiring) {
355  VLOG(1) << "Check T* T == T+ (destructive).";
356  VectorFst<Arc> S(T1);
357  Closure(&S, CLOSURE_STAR);
358  VectorFst<Arc> C(S);
359  Concat(&C, T1);
360 
361  VectorFst<Arc> P(T1);
362  Closure(&P, CLOSURE_PLUS);
363 
364  CHECK(Equiv(C, P));
365  }
366 
367  if (Weight::Properties() & kLeftSemiring) {
368  VLOG(1) << "Check T T* == T+ (delayed).";
370  ConcatFst<Arc> C(T1, S);
371 
373 
374  CHECK(Equiv(C, P));
375  }
376 
377  if (Weight::Properties() & kRightSemiring) {
378  VLOG(1) << "Check T* T == T+ (delayed).";
380  ConcatFst<Arc> C(S, T1);
381 
383 
384  CHECK(Equiv(C, P));
385  }
386  }
387 
388  // Tests map-based operations.
389  void TestMap(const Fst<Arc> &T) {
390  {
391  VLOG(1) << "Check destructive and delayed projection are equivalent.";
392  VectorFst<Arc> P1(T);
395  CHECK(Equiv(P1, P2));
396  }
397 
398  {
399  VLOG(1) << "Check destructive and delayed inversion are equivalent.";
400  VectorFst<Arc> I1(T);
401  Invert(&I1);
402  InvertFst<Arc> I2(T);
403  CHECK(Equiv(I1, I2));
404  }
405 
406  {
407  VLOG(1) << "Check Pi_1(T) = Pi_2(T^-1) (destructive).";
408  VectorFst<Arc> P1(T);
409  VectorFst<Arc> I1(T);
411  Invert(&I1);
413  CHECK(Equiv(P1, I1));
414  }
415 
416  {
417  VLOG(1) << "Check Pi_2(T) = Pi_1(T^-1) (destructive).";
418  VectorFst<Arc> P1(T);
419  VectorFst<Arc> I1(T);
421  Invert(&I1);
423  CHECK(Equiv(P1, I1));
424  }
425 
426  {
427  VLOG(1) << "Check Pi_1(T) = Pi_2(T^-1) (delayed).";
429  InvertFst<Arc> I1(T);
431  CHECK(Equiv(P1, P2));
432  }
433 
434  {
435  VLOG(1) << "Check Pi_2(T) = Pi_1(T^-1) (delayed).";
437  InvertFst<Arc> I1(T);
439  CHECK(Equiv(P1, P2));
440  }
441 
442  {
443  VLOG(1) << "Check destructive relabeling";
444  static const int kNumLabels = 10;
445  // set up relabeling pairs
446  std::vector<Label> labelset(kNumLabels);
447  for (size_t i = 0; i < kNumLabels; ++i) labelset[i] = i;
448  for (size_t i = 0; i < kNumLabels; ++i) {
449  using std::swap;
450  const auto index =
451  std::uniform_int_distribution<>(0, kNumLabels - 1)(rand_);
452  swap(labelset[i], labelset[index]);
453  }
454 
455  std::vector<std::pair<Label, Label>> ipairs1(kNumLabels);
456  std::vector<std::pair<Label, Label>> opairs1(kNumLabels);
457  for (size_t i = 0; i < kNumLabels; ++i) {
458  ipairs1[i] = std::make_pair(i, labelset[i]);
459  opairs1[i] = std::make_pair(labelset[i], i);
460  }
461  VectorFst<Arc> R(T);
462  Relabel(&R, ipairs1, opairs1);
463 
464  std::vector<std::pair<Label, Label>> ipairs2(kNumLabels);
465  std::vector<std::pair<Label, Label>> opairs2(kNumLabels);
466  for (size_t i = 0; i < kNumLabels; ++i) {
467  ipairs2[i] = std::make_pair(labelset[i], i);
468  opairs2[i] = std::make_pair(i, labelset[i]);
469  }
470  Relabel(&R, ipairs2, opairs2);
471  CHECK(Equiv(R, T));
472 
473  VLOG(1) << "Check on-the-fly relabeling";
474  RelabelFst<Arc> Rdelay(T, ipairs1, opairs1);
475 
476  RelabelFst<Arc> RRdelay(Rdelay, ipairs2, opairs2);
477  CHECK(Equiv(RRdelay, T));
478  }
479 
480  {
481  VLOG(1) << "Check encoding/decoding (destructive).";
482  VectorFst<Arc> D(T);
483  uint8_t encode_props = 0;
484  if (std::bernoulli_distribution(.5)(rand_)) {
485  encode_props |= kEncodeLabels;
486  }
487  if (std::bernoulli_distribution(.5)(rand_)) {
488  encode_props |= kEncodeWeights;
489  }
490  EncodeMapper<Arc> encoder(encode_props, ENCODE);
491  Encode(&D, &encoder);
492  Decode(&D, encoder);
493  CHECK(Equiv(D, T));
494  }
495 
496  {
497  VLOG(1) << "Check encoding/decoding (delayed).";
498  uint8_t encode_props = 0;
499  if (std::bernoulli_distribution(.5)(rand_)) {
500  encode_props |= kEncodeLabels;
501  }
502  if (std::bernoulli_distribution(.5)(rand_)) {
503  encode_props |= kEncodeWeights;
504  }
505  EncodeMapper<Arc> encoder(encode_props, ENCODE);
506  EncodeFst<Arc> E(T, &encoder);
507  VectorFst<Arc> Encoded(E);
508  DecodeFst<Arc> D(Encoded, encoder);
509  CHECK(Equiv(D, T));
510  }
511 
512  {
513  VLOG(1) << "Check gallic mappers (constructive).";
514  ToGallicMapper<Arc> to_mapper;
515  FromGallicMapper<Arc> from_mapper;
517  VectorFst<Arc> F;
518  ArcMap(T, &G, to_mapper);
519  ArcMap(G, &F, from_mapper);
520  CHECK(Equiv(T, F));
521  }
522 
523  {
524  VLOG(1) << "Check gallic mappers (delayed).";
527  CHECK(Equiv(T, F));
528  }
529  }
530 
531  // Tests compose-based operations.
532  void TestCompose(const Fst<Arc> &T1, const Fst<Arc> &T2, const Fst<Arc> &T3) {
533  if (!(Weight::Properties() & kCommutative)) return;
534 
535  VectorFst<Arc> S1(T1);
536  VectorFst<Arc> S2(T2);
537  VectorFst<Arc> S3(T3);
538 
539  ILabelCompare<Arc> icomp;
540  OLabelCompare<Arc> ocomp;
541 
542  ArcSort(&S1, ocomp);
543  ArcSort(&S2, ocomp);
544  ArcSort(&S3, icomp);
545 
546  {
547  VLOG(1) << "Check composition is associative.";
548  ComposeFst<Arc> C1(S1, S2);
549  ComposeFst<Arc> C2(C1, S3);
550  ComposeFst<Arc> C3(S2, S3);
551  ComposeFst<Arc> C4(S1, C3);
552 
553  CHECK(Equiv(C2, C4));
554  }
555 
556  {
557  VLOG(1) << "Check composition left distributes over union.";
558  UnionFst<Arc> U1(S2, S3);
559  ComposeFst<Arc> C1(S1, U1);
560 
561  ComposeFst<Arc> C2(S1, S2);
562  ComposeFst<Arc> C3(S1, S3);
563  UnionFst<Arc> U2(C2, C3);
564 
565  CHECK(Equiv(C1, U2));
566  }
567 
568  {
569  VLOG(1) << "Check composition right distributes over union.";
570  UnionFst<Arc> U1(S1, S2);
571  ComposeFst<Arc> C1(U1, S3);
572 
573  ComposeFst<Arc> C2(S1, S3);
574  ComposeFst<Arc> C3(S2, S3);
575  UnionFst<Arc> U2(C2, C3);
576 
577  CHECK(Equiv(C1, U2));
578  }
579 
580  VectorFst<Arc> A1(S1);
581  VectorFst<Arc> A2(S2);
582  VectorFst<Arc> A3(S3);
586 
587  {
588  VLOG(1) << "Check intersection is commutative.";
589  IntersectFst<Arc> I1(A1, A2);
590  IntersectFst<Arc> I2(A2, A1);
591  CHECK(Equiv(I1, I2));
592  }
593 
594  {
595  VLOG(1) << "Check all epsilon filters leads to equivalent results.";
596  using M = Matcher<Fst<Arc>>;
597  ComposeFst<Arc> C1(S1, S2);
598  ComposeFst<Arc> C2(
600  ComposeFst<Arc> C3(S1, S2,
602 
603  CHECK(Equiv(C1, C2));
604  CHECK(Equiv(C1, C3));
605 
606  if ((Weight::Properties() & kIdempotent) ||
607  S1.Properties(kNoOEpsilons, false) ||
608  S2.Properties(kNoIEpsilons, false)) {
609  ComposeFst<Arc> C4(
610  S1, S2, ComposeFstOptions<Arc, M, TrivialComposeFilter<M>>());
611  CHECK(Equiv(C1, C4));
612  ComposeFst<Arc> C5(
613  S1, S2, ComposeFstOptions<Arc, M, NoMatchComposeFilter<M>>());
614  CHECK(Equiv(C1, C5));
615  }
616 
617  if (S1.Properties(kNoOEpsilons, false) &&
618  S2.Properties(kNoIEpsilons, false)) {
619  ComposeFst<Arc> C6(S1, S2,
621  CHECK(Equiv(C1, C6));
622  }
623  }
624 
625  {
626  VLOG(1) << "Check look-ahead filters lead to equivalent results.";
627  VectorFst<Arc> C1, C2;
628  Compose(S1, S2, &C1);
629  LookAheadCompose(S1, S2, &C2);
630  CHECK(Equiv(C1, C2));
631  }
632  }
633 
634  // Tests sorting operations
635  void TestSort(const Fst<Arc> &T) {
636  ILabelCompare<Arc> icomp;
637  OLabelCompare<Arc> ocomp;
638 
639  {
640  VLOG(1) << "Check arc sorted Fst is equivalent to its input.";
641  VectorFst<Arc> S1(T);
642  ArcSort(&S1, icomp);
643  CHECK(Equiv(T, S1));
644  }
645 
646  {
647  VLOG(1) << "Check destructive and delayed arcsort are equivalent.";
648  VectorFst<Arc> S1(T);
649  ArcSort(&S1, icomp);
651  CHECK(Equiv(S1, S2));
652  }
653 
654  {
655  VLOG(1) << "Check ilabel sorting vs. olabel sorting with inversions.";
656  VectorFst<Arc> S1(T);
657  VectorFst<Arc> S2(T);
658  ArcSort(&S1, icomp);
659  Invert(&S2);
660  ArcSort(&S2, ocomp);
661  Invert(&S2);
662  CHECK(Equiv(S1, S2));
663  }
664 
665  {
666  VLOG(1) << "Check topologically sorted Fst is equivalent to its input.";
667  VectorFst<Arc> S1(T);
668  TopSort(&S1);
669  CHECK(Equiv(T, S1));
670  }
671 
672  {
673  VLOG(1) << "Check reverse(reverse(T)) = T";
674  for (int i = 0; i < 2; ++i) {
676  VectorFst<Arc> R2;
677  bool require_superinitial = i == 1;
678  Reverse(T, &R1, require_superinitial);
679  Reverse(R1, &R2, require_superinitial);
680  CHECK(Equiv(T, R2));
681  }
682  }
683  }
684 
685  // Tests optimization operations
686  void TestOptimize(const Fst<Arc> &T) {
687  uint64_t tprops = T.Properties(kFstProperties, true);
688  uint64_t wprops = Weight::Properties();
689 
690  VectorFst<Arc> A(T);
692  {
693  VLOG(1) << "Check connected FST is equivalent to its input.";
694  VectorFst<Arc> C1(T);
695  Connect(&C1);
696  CHECK(Equiv(T, C1));
697  }
698 
699  if ((wprops & kSemiring) == kSemiring &&
700  (tprops & kAcyclic || wprops & kIdempotent)) {
701  VLOG(1) << "Check epsilon-removed FST is equivalent to its input.";
702  VectorFst<Arc> R1(T);
703  RmEpsilon(&R1);
704  CHECK(Equiv(T, R1));
705 
706  VLOG(1) << "Check destructive and delayed epsilon removal"
707  << "are equivalent.";
708  RmEpsilonFst<Arc> R2(T);
709  CHECK(Equiv(R1, R2));
710 
711  VLOG(1) << "Check an FST with a large proportion"
712  << " of epsilon transitions:";
713  // Maps all transitions of T to epsilon-transitions and append
714  // a non-epsilon transition.
715  VectorFst<Arc> U;
716  ArcMap(T, &U, EpsMapper<Arc>());
717  VectorFst<Arc> V;
718  V.SetStart(V.AddState());
719  Arc arc(1, 1, Weight::One(), V.AddState());
720  V.AddArc(V.Start(), arc);
721  V.SetFinal(arc.nextstate, Weight::One());
722  Concat(&U, V);
723  // Check that epsilon-removal preserves the shortest-distance
724  // from the initial state to the final states.
725  std::vector<Weight> d;
726  ShortestDistance(U, &d, true);
727  Weight w = U.Start() < d.size() ? d[U.Start()] : Weight::Zero();
728  VectorFst<Arc> U1(U);
729  RmEpsilon(&U1);
730  ShortestDistance(U1, &d, true);
731  Weight w1 = U1.Start() < d.size() ? d[U1.Start()] : Weight::Zero();
732  CHECK(ApproxEqual(w, w1, kTestDelta));
733  RmEpsilonFst<Arc> U2(U);
734  ShortestDistance(U2, &d, true);
735  Weight w2 = U2.Start() < d.size() ? d[U2.Start()] : Weight::Zero();
736  CHECK(ApproxEqual(w, w2, kTestDelta));
737  }
738 
739  if ((wprops & kSemiring) == kSemiring && tprops & kAcyclic) {
740  VLOG(1) << "Check determinized FSA is equivalent to its input.";
741  DeterminizeFst<Arc> D(A);
742  CHECK(Equiv(A, D));
743 
744  {
745  VLOG(1) << "Check determinized FST is equivalent to its input.";
748  DeterminizeFst<Arc> DT(T, opts);
749  CHECK(Equiv(T, DT));
750  }
751 
752  if ((wprops & (kPath | kCommutative)) == (kPath | kCommutative)) {
753  VLOG(1) << "Check pruning in determinization";
754  VectorFst<Arc> P;
755  const Weight threshold = generate_();
757  opts.weight_threshold = threshold;
758  Determinize(A, &P, opts);
759  CHECK(P.Properties(kIDeterministic, true));
760  CHECK(PruneEquiv(A, P, threshold));
761  }
762 
763  if ((wprops & kPath) == kPath) {
764  VLOG(1) << "Check min-determinization";
765 
766  // Ensures no input epsilons
767  VectorFst<Arc> R(T);
768  std::vector<std::pair<Label, Label>> ipairs, opairs;
769  ipairs.push_back(std::pair<Label, Label>(0, 1));
770  Relabel(&R, ipairs, opairs);
771 
772  VectorFst<Arc> M;
775  Determinize(R, &M, opts);
776  CHECK(M.Properties(kIDeterministic, true));
777  CHECK(MinRelated(M, R));
778  }
779 
780  int n;
781  {
782  VLOG(1) << "Check size(min(det(A))) <= size(det(A))"
783  << " and min(det(A)) equiv det(A)";
784  VectorFst<Arc> M(D);
785  n = M.NumStates();
786  Minimize(&M, static_cast<MutableFst<Arc> *>(nullptr), kDelta);
787  CHECK(Equiv(D, M));
788  CHECK(M.NumStates() <= n);
789  n = M.NumStates();
790  }
791 
792  if (n && (wprops & kIdempotent) == kIdempotent &&
793  A.Properties(kNoEpsilons, true)) {
794  VLOG(1) << "Check that Revuz's algorithm leads to the"
795  << " same number of states as Brozozowski's algorithm";
796 
797  // Skip test if A is the empty machine or contains epsilons or
798  // if the semiring is not idempotent (to avoid floating point
799  // errors)
801  Reverse(A, &R);
802  RmEpsilon(&R);
804  VectorFst<Arc> RD;
805  Reverse(DR, &RD);
806  DeterminizeFst<Arc> DRD(RD);
807  VectorFst<Arc> M(DRD);
808  CHECK_EQ(n + 1, M.NumStates()); // Accounts for the epsilon transition
809  // to the initial state
810  }
811  }
812 
813  if ((wprops & kSemiring) == kSemiring && tprops & kAcyclic) {
814  VLOG(1) << "Check disambiguated FSA is equivalent to its input.";
815  VectorFst<Arc> R(A), D;
816  RmEpsilon(&R);
817  Disambiguate(R, &D);
818  CHECK(Equiv(R, D));
819  VLOG(1) << "Check disambiguated FSA is unambiguous";
820  CHECK(Unambiguous(D));
821 
822  /* TODO(riley): find out why this fails
823  if ((wprops & (kPath | kCommutative)) == (kPath | kCommutative)) {
824  VLOG(1) << "Check pruning in disambiguation";
825  VectorFst<Arc> P;
826  const Weight threshold = generate_();
827  DisambiguateOptions<Arc> opts;
828  opts.weight_threshold = threshold;
829  Disambiguate(R, &P, opts);
830  CHECK(Unambiguous(P));
831  CHECK(PruneEquiv(A, P, threshold));
832  }
833  */
834  }
835 
836  if (Arc::Type() == LogArc::Type() || Arc::Type() == StdArc::Type()) {
837  VLOG(1) << "Check reweight(T) equiv T";
838  std::vector<Weight> potential;
839  VectorFst<Arc> RI(T);
840  VectorFst<Arc> RF(T);
841  while (potential.size() < RI.NumStates()) {
842  potential.push_back(generate_());
843  }
844 
845  Reweight(&RI, potential, REWEIGHT_TO_INITIAL);
846  CHECK(Equiv(T, RI));
847 
848  Reweight(&RF, potential, REWEIGHT_TO_FINAL);
849  CHECK(Equiv(T, RF));
850  }
851 
852  if ((wprops & kIdempotent) || (tprops & kAcyclic)) {
853  VLOG(1) << "Check pushed FST is equivalent to input FST.";
854  // Pushing towards the final state.
855  if (wprops & kRightSemiring) {
856  VectorFst<Arc> P1;
857  Push<Arc, REWEIGHT_TO_FINAL>(T, &P1, kPushLabels);
858  CHECK(Equiv(T, P1));
859 
860  VectorFst<Arc> P2;
861  Push<Arc, REWEIGHT_TO_FINAL>(T, &P2, kPushWeights);
862  CHECK(Equiv(T, P2));
863 
864  VectorFst<Arc> P3;
865  Push<Arc, REWEIGHT_TO_FINAL>(T, &P3, kPushLabels | kPushWeights);
866  CHECK(Equiv(T, P3));
867  }
868 
869  // Pushing towards the initial state.
870  if (wprops & kLeftSemiring) {
871  VectorFst<Arc> P1;
872  Push<Arc, REWEIGHT_TO_INITIAL>(T, &P1, kPushLabels);
873  CHECK(Equiv(T, P1));
874 
875  VectorFst<Arc> P2;
876  Push<Arc, REWEIGHT_TO_INITIAL>(T, &P2, kPushWeights);
877  CHECK(Equiv(T, P2));
878  VectorFst<Arc> P3;
879  Push<Arc, REWEIGHT_TO_INITIAL>(T, &P3, kPushLabels | kPushWeights);
880  CHECK(Equiv(T, P3));
881  }
882  }
883 
884  if constexpr (IsPath<Weight>::value) {
885  if ((wprops & (kPath | kCommutative)) == (kPath | kCommutative)) {
886  VLOG(1) << "Check pruning algorithm";
887  {
888  VLOG(1) << "Check equiv. of constructive and destructive algorithms";
889  const Weight threshold = generate_();
890  VectorFst<Arc> P1(T);
891  Prune(&P1, threshold);
892  VectorFst<Arc> P2;
893  Prune(T, &P2, threshold);
894  CHECK(Equiv(P1, P2));
895  }
896 
897  {
898  VLOG(1) << "Check prune(reverse) equiv reverse(prune)";
899  const Weight threshold = generate_();
901  VectorFst<Arc> P1(T);
902  VectorFst<Arc> P2;
903  Prune(&P1, threshold);
904  Reverse(T, &R);
905  Prune(&R, threshold.Reverse());
906  Reverse(R, &P2);
907  CHECK(Equiv(P1, P2));
908  }
909  {
910  VLOG(1) << "Check: ShortestDistance(A - prune(A))"
911  << " > ShortestDistance(A) times Threshold";
912  const Weight threshold = generate_();
913  VectorFst<Arc> P;
914  Prune(A, &P, threshold);
915  CHECK(PruneEquiv(A, P, threshold));
916  }
917  }
918  }
919  if (tprops & kAcyclic) {
920  VLOG(1) << "Check synchronize(T) equiv T";
921  SynchronizeFst<Arc> S(T);
922  CHECK(Equiv(T, S));
923  }
924  }
925 
926  // Tests search operations
927  void TestSearch(const Fst<Arc> &T) {
928  if constexpr (IsPath<Weight>::value) {
929  uint64_t wprops = Weight::Properties();
930 
931  VectorFst<Arc> A(T);
933 
934  if ((wprops & (kPath | kRightSemiring)) == (kPath | kRightSemiring)) {
935  VLOG(1) << "Check 1-best weight.";
936  VectorFst<Arc> path;
937  ShortestPath(T, &path);
938  Weight tsum = ShortestDistance(T);
939  Weight psum = ShortestDistance(path);
940  CHECK(ApproxEqual(tsum, psum, kTestDelta));
941  }
942 
943  if ((wprops & (kPath | kSemiring)) == (kPath | kSemiring)) {
944  VLOG(1) << "Check n-best weights";
945  VectorFst<Arc> R(A);
946  RmEpsilon(&R, /*connect=*/true, Arc::Weight::Zero(), kNoStateId,
947  kDelta);
948  const int nshortest = std::uniform_int_distribution<>(
949  0, kNumRandomShortestPaths + 1)(rand_);
950  VectorFst<Arc> paths;
951  ShortestPath(R, &paths, nshortest, /*unique=*/true,
952  /*first_path=*/false, Weight::Zero(), kNumShortestStates,
953  kDelta);
954  std::vector<Weight> distance;
955  ShortestDistance(paths, &distance, true, kDelta);
956  StateId pstart = paths.Start();
957  if (pstart != kNoStateId) {
958  ArcIterator<Fst<Arc>> piter(paths, pstart);
959  for (; !piter.Done(); piter.Next()) {
960  StateId s = piter.Value().nextstate;
961  Weight nsum = s < distance.size()
962  ? Times(piter.Value().weight, distance[s])
963  : Weight::Zero();
964  VectorFst<Arc> path;
965  ShortestPath(R, &path, 1, false, false, Weight::Zero(), kNoStateId,
966  kDelta);
967  Weight dsum = ShortestDistance(path, kDelta);
968  CHECK(ApproxEqual(nsum, dsum, kTestDelta));
969  ArcMap(&path, RmWeightMapper<Arc>());
970  VectorFst<Arc> S;
971  Difference(R, path, &S);
972  R = S;
973  }
974  }
975  }
976  }
977  }
978 
979  // Tests if two FSTS are equivalent by checking if random
980  // strings from one FST are transduced the same by both FSTs.
981  template <class A>
982  bool Equiv(const Fst<A> &fst1, const Fst<A> &fst2) {
983  VLOG(1) << "Check FSTs for sanity (including property bits).";
984  CHECK(Verify(fst1));
985  CHECK(Verify(fst2));
986 
987  // Ensures seed used once per instantiation.
988  static const UniformArcSelector<A> uniform_selector(seed_);
989  const RandGenOptions<UniformArcSelector<A>> opts(uniform_selector,
990  kRandomPathLength);
991  return RandEquivalent(fst1, fst2, kNumRandomPaths, opts, kTestDelta, seed_);
992  }
993 
994  // Tests FSA is unambiguous.
995  bool Unambiguous(const Fst<Arc> &fst) {
996  VectorFst<StdArc> sfst, dfst;
997  VectorFst<LogArc> lfst1, lfst2;
998  ArcMap(fst, &sfst, RmWeightMapper<Arc, StdArc>());
999  Determinize(sfst, &dfst);
1000  ArcMap(fst, &lfst1, RmWeightMapper<Arc, LogArc>());
1001  ArcMap(dfst, &lfst2, RmWeightMapper<StdArc, LogArc>());
1002  return Equiv(lfst1, lfst2);
1003  }
1004 
1005  // Ensures input-epsilon free transducers fst1 and fst2 have the
1006  // same domain and that for each string pair '(is, os)' in fst1,
1007  // '(is, os)' is the minimum weight match to 'is' in fst2.
1008  template <class A>
1009  bool MinRelated(const Fst<A> &fst1, const Fst<A> &fst2) {
1010  // Same domain
1011  VectorFst<Arc> P1(fst1), P2(fst2);
1014  if (!Equiv(P1, P2)) {
1015  LOG(ERROR) << "Inputs not equivalent";
1016  return false;
1017  }
1018 
1019  // Ensures seed used once per instantiation.
1020  static const UniformArcSelector<A> uniform_selector(seed_);
1021  const RandGenOptions<UniformArcSelector<A>> opts(uniform_selector,
1022  kRandomPathLength);
1023 
1024  VectorFst<Arc> path, paths1, paths2;
1025  for (ssize_t n = 0; n < kNumRandomPaths; ++n) {
1026  RandGen(fst1, &path, opts);
1027  Invert(&path);
1028  ArcMap(&path, RmWeightMapper<Arc>());
1029  Compose(path, fst2, &paths1);
1030  Weight sum1 = ShortestDistance(paths1);
1031  Compose(paths1, path, &paths2);
1032  Weight sum2 = ShortestDistance(paths2);
1033  if (!ApproxEqual(Plus(sum1, sum2), sum2, kTestDelta)) {
1034  LOG(ERROR) << "Sums not equivalent: " << sum1 << " " << sum2;
1035  return false;
1036  }
1037  }
1038  return true;
1039  }
1040 
1041  // Tests ShortestDistance(A - P) >= ShortestDistance(A) times Threshold.
1042  template <class A>
1043  bool PruneEquiv(const Fst<A> &fst, const Fst<A> &pfst, Weight threshold) {
1044  VLOG(1) << "Check FSTs for sanity (including property bits).";
1045  CHECK(Verify(fst));
1046  CHECK(Verify(pfst));
1047 
1049  ArcMapFst(pfst, RmWeightMapper<Arc>()))));
1050  const Weight sum1 = Times(ShortestDistance(fst), threshold);
1051  const Weight sum2 = ShortestDistance(D);
1052  return ApproxEqual(Plus(sum1, sum2), sum1, kTestDelta);
1053  }
1054 
1055  // Random seed.
1056  uint64_t seed_;
1057  // Random state (for randomness in this class).
1058  std::mt19937_64 rand_;
1059  // FST with no states
1060  VectorFst<Arc> zero_fst_;
1061  // FST with one state that accepts epsilon.
1062  VectorFst<Arc> one_fst_;
1063  // FST with one state that accepts all strings.
1064  VectorFst<Arc> univ_fst_;
1065  // Generates weights used in testing.
1066  WeightGenerator generate_;
1067  // Maximum random path length.
1068  static constexpr int kRandomPathLength = 25;
1069  // Number of random paths to explore.
1070  static constexpr int kNumRandomPaths = 100;
1071  // Maximum number of nshortest paths.
1072  static constexpr int kNumRandomShortestPaths = 100;
1073  // Maximum number of nshortest states.
1074  static constexpr int kNumShortestStates = 10000;
1075  // Delta for equivalence tests.
1076  static constexpr float kTestDelta = .05;
1077 
1078  WeightedTester(const WeightedTester &) = delete;
1079  WeightedTester &operator=(const WeightedTester &) = delete;
1080 };
1081 
1082 // This class tests a variety of identities and properties that must
1083 // hold for various algorithms on unweighted FSAs and that are not tested
1084 // by WeightedTester. Only the specialization does anything interesting.
1085 template <class Arc>
1087  public:
1088  UnweightedTester(const Fst<Arc> &zero_fsa, const Fst<Arc> &one_fsa,
1089  const Fst<Arc> &univ_fsa, uint64_t seed) {}
1090 
1091  void Test(const Fst<Arc> &A1, const Fst<Arc> &A2, const Fst<Arc> &A3) {}
1092 };
1093 
1094 // Specialization for StdArc. This should work for any commutative,
1095 // idempotent semiring when restricted to the unweighted case
1096 // (being isomorphic to the boolean semiring).
1097 template <>
1099  public:
1100  using Arc = StdArc;
1104 
1105  UnweightedTester(const Fst<Arc> &zero_fsa, const Fst<Arc> &one_fsa,
1106  const Fst<Arc> &univ_fsa, uint64_t seed)
1107  : zero_fsa_(zero_fsa),
1108  one_fsa_(one_fsa),
1109  univ_fsa_(univ_fsa),
1110  rand_(seed) {}
1111 
1112  void Test(const Fst<Arc> &A1, const Fst<Arc> &A2, const Fst<Arc> &A3) {
1113  TestRational(A1, A2, A3);
1114  TestIntersect(A1, A2, A3);
1115  TestOptimize(A1);
1116  }
1117 
1118  private:
1119  // Tests rational operations with identities.
1120  void TestRational(const Fst<Arc> &A1, const Fst<Arc> &A2,
1121  const Fst<Arc> &A3) {
1122  {
1123  VLOG(1) << "Check the union contains its arguments (destructive).";
1124  VectorFst<Arc> U(A1);
1125  Union(&U, A2);
1126 
1127  CHECK(Subset(A1, U));
1128  CHECK(Subset(A2, U));
1129  }
1130 
1131  {
1132  VLOG(1) << "Check the union contains its arguments (delayed).";
1133  UnionFst<Arc> U(A1, A2);
1134 
1135  CHECK(Subset(A1, U));
1136  CHECK(Subset(A2, U));
1137  }
1138 
1139  {
1140  VLOG(1) << "Check if A^n c A* (destructive).";
1141  VectorFst<Arc> C(one_fsa_);
1142  const int n = std::uniform_int_distribution<>(0, 4)(rand_);
1143  for (int i = 0; i < n; ++i) Concat(&C, A1);
1144 
1145  VectorFst<Arc> S(A1);
1146  Closure(&S, CLOSURE_STAR);
1147  CHECK(Subset(C, S));
1148  }
1149 
1150  {
1151  VLOG(1) << "Check if A^n c A* (delayed).";
1152  const int n = std::uniform_int_distribution<>(0, 4)(rand_);
1153  std::unique_ptr<Fst<Arc>> C = std::make_unique<VectorFst<Arc>>(one_fsa_);
1154  for (int i = 0; i < n; ++i) {
1155  C = std::make_unique<ConcatFst<Arc>>(*C, A1);
1156  }
1158  CHECK(Subset(*C, S));
1159  }
1160  }
1161 
1162  // Tests intersect-based operations.
1163  void TestIntersect(const Fst<Arc> &A1, const Fst<Arc> &A2,
1164  const Fst<Arc> &A3) {
1165  VectorFst<Arc> S1(A1);
1166  VectorFst<Arc> S2(A2);
1167  VectorFst<Arc> S3(A3);
1168 
1169  ILabelCompare<Arc> comp;
1170 
1171  ArcSort(&S1, comp);
1172  ArcSort(&S2, comp);
1173  ArcSort(&S3, comp);
1174 
1175  {
1176  VLOG(1) << "Check the intersection is contained in its arguments.";
1177  IntersectFst<Arc> I1(S1, S2);
1178  CHECK(Subset(I1, S1));
1179  CHECK(Subset(I1, S2));
1180  }
1181 
1182  {
1183  VLOG(1) << "Check union distributes over intersection.";
1184  IntersectFst<Arc> I1(S1, S2);
1185  UnionFst<Arc> U1(I1, S3);
1186 
1187  UnionFst<Arc> U2(S1, S3);
1188  UnionFst<Arc> U3(S2, S3);
1189  ArcSortFst<Arc, ILabelCompare<Arc>> S4(U3, comp);
1190  IntersectFst<Arc> I2(U2, S4);
1191 
1192  CHECK(Equiv(U1, I2));
1193  }
1194 
1195  VectorFst<Arc> C1;
1196  VectorFst<Arc> C2;
1197  Complement(S1, &C1);
1198  Complement(S2, &C2);
1199  ArcSort(&C1, comp);
1200  ArcSort(&C2, comp);
1201 
1202  {
1203  VLOG(1) << "Check S U S' = Sigma*";
1204  UnionFst<Arc> U(S1, C1);
1205  CHECK(Equiv(U, univ_fsa_));
1206  }
1207 
1208  {
1209  VLOG(1) << "Check S n S' = {}";
1210  IntersectFst<Arc> I(S1, C1);
1211  CHECK(Equiv(I, zero_fsa_));
1212  }
1213 
1214  {
1215  VLOG(1) << "Check (S1' U S2') == (S1 n S2)'";
1216  UnionFst<Arc> U(C1, C2);
1217 
1218  IntersectFst<Arc> I(S1, S2);
1219  VectorFst<Arc> C3;
1220  Complement(I, &C3);
1221  CHECK(Equiv(U, C3));
1222  }
1223 
1224  {
1225  VLOG(1) << "Check (S1' n S2') == (S1 U S2)'";
1226  IntersectFst<Arc> I(C1, C2);
1227 
1228  UnionFst<Arc> U(S1, S2);
1229  VectorFst<Arc> C3;
1230  Complement(U, &C3);
1231  CHECK(Equiv(I, C3));
1232  }
1233  }
1234 
1235  // Tests optimization operations.
1236  void TestOptimize(const Fst<Arc> &A) {
1237  {
1238  VLOG(1) << "Check determinized FSA is equivalent to its input.";
1239  DeterminizeFst<Arc> D(A);
1240  CHECK(Equiv(A, D));
1241  }
1242 
1243  {
1244  VLOG(1) << "Check disambiguated FSA is equivalent to its input.";
1245  VectorFst<Arc> R(A), D;
1246  RmEpsilon(&R);
1247 
1248  Disambiguate(R, &D);
1249  CHECK(Equiv(R, D));
1250  }
1251 
1252  {
1253  VLOG(1) << "Check minimized FSA is equivalent to its input.";
1254  int n;
1255  {
1256  RmEpsilonFst<Arc> R(A);
1257  DeterminizeFst<Arc> D(R);
1258  VectorFst<Arc> M(D);
1259  Minimize(&M, static_cast<MutableFst<Arc> *>(nullptr), kDelta);
1260  CHECK(Equiv(A, M));
1261  n = M.NumStates();
1262  }
1263 
1264  if (n) { // Skips test if A is the empty machine.
1265  VLOG(1) << "Check that Hopcroft's and Revuz's algorithms lead to the"
1266  << " same number of states as Brozozowski's algorithm";
1267  VectorFst<Arc> R;
1268  Reverse(A, &R);
1269  RmEpsilon(&R);
1270  DeterminizeFst<Arc> DR(R);
1271  VectorFst<Arc> RD;
1272  Reverse(DR, &RD);
1273  DeterminizeFst<Arc> DRD(RD);
1274  VectorFst<Arc> M(DRD);
1275  CHECK_EQ(n + 1, M.NumStates()); // Accounts for the epsilon transition
1276  // to the initial state.
1277  }
1278  }
1279  }
1280 
1281  // Tests if two FSAS are equivalent.
1282  bool Equiv(const Fst<Arc> &fsa1, const Fst<Arc> &fsa2) {
1283  VLOG(1) << "Check FSAs for sanity (including property bits).";
1284  CHECK(Verify(fsa1));
1285  CHECK(Verify(fsa2));
1286 
1287  VectorFst<Arc> vfsa1(fsa1);
1288  VectorFst<Arc> vfsa2(fsa2);
1289  RmEpsilon(&vfsa1);
1290  RmEpsilon(&vfsa2);
1291  DeterminizeFst<Arc> dfa1(vfsa1);
1292  DeterminizeFst<Arc> dfa2(vfsa2);
1293 
1294  // Test equivalence using union-find algorithm
1295  bool equiv1 = Equivalent(dfa1, dfa2);
1296 
1297  // Test equivalence by checking if (S1 - S2) U (S2 - S1) is empty
1298  ILabelCompare<Arc> comp;
1299  VectorFst<Arc> sdfa1(dfa1);
1300  ArcSort(&sdfa1, comp);
1301  VectorFst<Arc> sdfa2(dfa2);
1302  ArcSort(&sdfa2, comp);
1303 
1304  DifferenceFst<Arc> dfsa1(sdfa1, sdfa2);
1305  DifferenceFst<Arc> dfsa2(sdfa2, sdfa1);
1306 
1307  VectorFst<Arc> ufsa(dfsa1);
1308  Union(&ufsa, dfsa2);
1309  Connect(&ufsa);
1310  bool equiv2 = ufsa.NumStates() == 0;
1311 
1312  // Checks both equivalence tests match.
1313  CHECK((equiv1 && equiv2) || (!equiv1 && !equiv2));
1314 
1315  return equiv1;
1316  }
1317 
1318  // Tests if FSA1 is a subset of FSA2 (disregarding weights).
1319  bool Subset(const Fst<Arc> &fsa1, const Fst<Arc> &fsa2) {
1320  VLOG(1) << "Check FSAs (incl. property bits) for sanity";
1321  CHECK(Verify(fsa1));
1322  CHECK(Verify(fsa2));
1323 
1324  VectorFst<StdArc> vfsa1;
1325  VectorFst<StdArc> vfsa2;
1326  RmEpsilon(&vfsa1);
1327  RmEpsilon(&vfsa2);
1328  ILabelCompare<StdArc> comp;
1329  ArcSort(&vfsa1, comp);
1330  ArcSort(&vfsa2, comp);
1331  IntersectFst<StdArc> ifsa(vfsa1, vfsa2);
1332  DeterminizeFst<StdArc> dfa1(vfsa1);
1333  DeterminizeFst<StdArc> dfa2(ifsa);
1334  return Equivalent(dfa1, dfa2);
1335  }
1336 
1337  // Returns complement FSA.
1338  void Complement(const Fst<Arc> &ifsa, MutableFst<Arc> *ofsa) {
1339  RmEpsilonFst<Arc> rfsa(ifsa);
1340  DeterminizeFst<Arc> dfa(rfsa);
1341  DifferenceFst<Arc> cfsa(univ_fsa_, dfa);
1342  *ofsa = cfsa;
1343  }
1344 
1345  // FSA with no states.
1346  VectorFst<Arc> zero_fsa_;
1347  // FSA with one state that accepts epsilon.
1348  VectorFst<Arc> one_fsa_;
1349  // FSA with one state that accepts all strings.
1350  VectorFst<Arc> univ_fsa_;
1351  // Random state.
1352  std::mt19937_64 rand_;
1353 };
1354 
1355 // This class tests a variety of identities and properties that must
1356 // hold for various FST algorithms. It randomly generates FSTs, using
1357 // function object 'weight_generator' to select weights. 'WeightTester'
1358 // and 'UnweightedTester' are then called.
1359 template <class Arc>
1360 class AlgoTester {
1361  public:
1362  using Label = typename Arc::Label;
1363  using StateId = typename Arc::StateId;
1364  using Weight = typename Arc::Weight;
1366 
1367  AlgoTester(WeightGenerator generator, uint64_t seed)
1368  : generate_(std::move(generator)), rand_(seed) {
1369  one_fst_.AddState();
1370  one_fst_.SetStart(0);
1371  one_fst_.SetFinal(0);
1372 
1373  univ_fst_.AddState();
1374  univ_fst_.SetStart(0);
1375  univ_fst_.SetFinal(0);
1376  for (int i = 0; i < kNumRandomLabels; ++i) univ_fst_.EmplaceArc(0, i, i, 0);
1377 
1378  weighted_tester_.reset(new WeightedTester<Arc>(seed, zero_fst_, one_fst_,
1379  univ_fst_, generate_));
1380 
1381  unweighted_tester_.reset(
1382  new UnweightedTester<Arc>(zero_fst_, one_fst_, univ_fst_, seed));
1383  }
1384 
1386  RandFst<Arc, WeightGenerator>(kNumRandomStates, kNumRandomArcs,
1387  kNumRandomLabels, kAcyclicProb, generate_,
1388  rand_(), fst);
1389  }
1390 
1391  void Test() {
1392  VLOG(1) << "weight type = " << Weight::Type();
1393 
1394  for (int i = 0; i < FST_FLAGS_repeat; ++i) {
1395  // Random transducers
1396  VectorFst<Arc> T1;
1397  VectorFst<Arc> T2;
1398  VectorFst<Arc> T3;
1399  MakeRandFst(&T1);
1400  MakeRandFst(&T2);
1401  MakeRandFst(&T3);
1402  weighted_tester_->Test(T1, T2, T3);
1403 
1404  VectorFst<Arc> A1(T1);
1405  VectorFst<Arc> A2(T2);
1406  VectorFst<Arc> A3(T3);
1410  ArcMap(&A1, rm_weight_mapper_);
1411  ArcMap(&A2, rm_weight_mapper_);
1412  ArcMap(&A3, rm_weight_mapper_);
1413  unweighted_tester_->Test(A1, A2, A3);
1414  }
1415  }
1416 
1417  private:
1418  // Generates weights used in testing.
1419  WeightGenerator generate_;
1420  // Random state used to seed RandFst.
1421  std::mt19937_64 rand_;
1422  // FST with no states
1423  VectorFst<Arc> zero_fst_;
1424  // FST with one state that accepts epsilon.
1425  VectorFst<Arc> one_fst_;
1426  // FST with one state that accepts all strings.
1427  VectorFst<Arc> univ_fst_;
1428  // Tests weighted FSTs
1429  std::unique_ptr<WeightedTester<Arc>> weighted_tester_;
1430  // Tests unweighted FSTs
1431  std::unique_ptr<UnweightedTester<Arc>> unweighted_tester_;
1432  // Mapper to remove weights from an Fst
1433  RmWeightMapper<Arc> rm_weight_mapper_;
1434  // Maximum number of states in random test Fst.
1435  static constexpr int kNumRandomStates = 10;
1436  // Maximum number of arcs in random test Fst.
1437  static constexpr int kNumRandomArcs = 25;
1438  // Number of alternative random labels.
1439  static constexpr int kNumRandomLabels = 5;
1440  // Probability to force an acyclic Fst
1441  static constexpr float kAcyclicProb = .25;
1442  // Maximum random path length.
1443  static constexpr int kRandomPathLength = 25;
1444  // Number of random paths to explore.
1445  static constexpr int kNumRandomPaths = 100;
1446 
1447  AlgoTester(const AlgoTester &) = delete;
1448  AlgoTester &operator=(const AlgoTester &) = delete;
1449 };
1450 } // namespace fst
1451 
1452 #endif // FST_TEST_ALGO_TEST_H_
MapSymbolsAction
Definition: arc-map.h:65
constexpr uint64_t kSemiring
Definition: weight.h:141
void ArcMap(MutableFst< A > *fst, C *mapper)
Definition: arc-map.h:120
void AddArc(StateId s, const Arc &arc) override
Definition: mutable-fst.h:331
bool RandEquivalent(const Fst< Arc > &fst1, const Fst< Arc > &fst2, int32_t npath, const RandGenOptions< ArcSelector > &opts, float delta=kDelta, uint64_t seed=std::random_device()(), bool *error=nullptr)
void SetStart(StateId s) override
Definition: mutable-fst.h:303
void RmEpsilon(MutableFst< Arc > *fst, std::vector< typename Arc::Weight > *distance, const RmEpsilonOptions< Arc, Queue > &opts)
Definition: rmepsilon.h:217
void Invert(const Fst< Arc > &ifst, MutableFst< Arc > *ofst)
Definition: invert.h:73
S StateId
Definition: arc.h:48
virtual uint64_t Properties(uint64_t mask, bool test) const =0
constexpr uint64_t kOEpsilons
Definition: properties.h:89
void Encode(MutableFst< Arc > *fst, EncodeMapper< Arc > *mapper)
Definition: encode.h:500
constexpr uint8_t kPushLabels
Definition: push.h:110
void MakeRandFst(MutableFst< Arc > *fst)
Definition: algo_test.h:1385
void SetFinal(StateId s, Weight weight=Weight::One()) override
Definition: mutable-fst.h:308
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:61
ArcMapFst(const Fst< typename ArcMapper::FromArc > &, const ArcMapper &) -> ArcMapFst< typename ArcMapper::FromArc, typename ArcMapper::ToArc, ArcMapper >
void Closure(MutableFst< Arc > *fst, ClosureType closure_type)
Definition: closure.h:51
static void Relabel(MutableFst< Arc > *fst, const LFST &mfst, bool relabel_input)
DECLARE_int32(repeat)
void Determinize(const Fst< Arc > &ifst, MutableFst< Arc > *ofst, const DeterminizeOptions< Arc > &opts=DeterminizeOptions< Arc >())
Definition: determinize.h:1095
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:64
typename Arc::Label Label
Definition: algo_test.h:145
void DfsVisit(const FST &fst, Visitor *visitor, ArcFilter filter, bool access_only=false)
Definition: dfs-visit.h:112
#define LOG(type)
Definition: log.h:53
StateId Start() const override
Definition: impl-to-fst.h:47
static const std::string & Type()
Definition: arc.h:68
typename Arc::StateId StateId
Definition: algo_test.h:1363
bool TopSort(MutableFst< Arc > *fst)
Definition: topsort.h:90
ArcTpl< TropicalWeight > StdArc
Definition: arc.h:75
constexpr uint64_t kIdempotent
Definition: weight.h:147
void Connect(MutableFst< Arc > *fst)
Definition: connect.h:47
constexpr uint64_t kEpsilons
Definition: properties.h:79
DeterminizeType type
Definition: determinize.h:1060
constexpr uint64_t kNotOLabelSorted
Definition: properties.h:101
L Label
Definition: arc.h:47
constexpr int kNoStateId
Definition: fst.h:196
typename Arc::StateId StateId
Definition: algo_test.h:146
const Arc & Value() const
Definition: fst.h:536
void Relabel(MutableFst< Arc > *fst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &ipairs, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &opairs)
Definition: relabel.h:49
void Reverse(const Fst< Arc > &ifst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &parens, std::vector< typename Arc::Label > *assignments, MutableFst< RevArc > *ofst)
Definition: reverse.h:36
void Prune(MutableFst< Arc > *fst, const PruneOptions< Arc, ArcFilter > &opts=PruneOptions< Arc, ArcFilter >())
Definition: prune.h:114
constexpr uint64_t kRightSemiring
Definition: weight.h:139
void Difference(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const DifferenceOptions &opts=DifferenceOptions())
Definition: difference.h:175
StateId AddState() override
Definition: mutable-fst.h:321
void Test(const Fst< Arc > &A1, const Fst< Arc > &A2, const Fst< Arc > &A3)
Definition: algo_test.h:1091
MapFinalAction
Definition: arc-map.h:50
AlgoTester(WeightGenerator generator, uint64_t seed)
Definition: algo_test.h:1367
constexpr uint8_t kPushWeights
Definition: push.h:109
constexpr uint64_t kNotILabelSorted
Definition: properties.h:96
void RandGen(const Fst< FromArc > &ifst, MutableFst< ToArc > *ofst, const RandGenOptions< Selector > &opts)
Definition: randgen.h:751
MapSymbolsAction OutputSymbolsAction() const
Definition: algo_test.h:113
constexpr uint8_t kEncodeLabels
Definition: encode.h:55
constexpr uint64_t kNoOEpsilons
Definition: properties.h:91
void Union(RationalFst< Arc > *fst1, const Fst< Arc > &fst2)
Definition: union.h:117
void ArcSort(MutableFst< Arc > *fst, Compare comp)
Definition: arcsort.h:109
constexpr uint64_t kOLabelSorted
Definition: properties.h:99
constexpr uint64_t kNotAcceptor
Definition: properties.h:66
constexpr uint64_t kAcyclic
Definition: properties.h:111
constexpr uint64_t kCommutative
Definition: weight.h:144
constexpr uint64_t kNoEpsilons
Definition: properties.h:81
void Compose(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const ComposeOptions &opts=ComposeOptions())
Definition: compose.h:1005
UnweightedTester(const Fst< Arc > &zero_fsa, const Fst< Arc > &one_fsa, const Fst< Arc > &univ_fsa, uint64_t seed)
Definition: algo_test.h:1088
StateId NumStates() const override
Definition: expanded-fst.h:144
MapFinalAction FinalAction() const
Definition: algo_test.h:109
void LookAheadCompose(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst)
Definition: algo_test.h:118
A operator()(const A &arc) const
Definition: algo_test.h:95
#define VLOG(level)
Definition: log.h:54
bool Done() const
Definition: fst.h:532
constexpr uint64_t kIDeterministic
Definition: properties.h:69
void ShortestPath(const Fst< Arc > &ifst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &parens, MutableFst< Arc > *ofst, const PdtShortestPathOptions< Arc, Queue > &opts)
constexpr uint64_t kIEpsilons
Definition: properties.h:84
MapSymbolsAction InputSymbolsAction() const
Definition: algo_test.h:111
void Concat(MutableFst< Arc > *fst1, const Fst< Arc > &fst2)
Definition: concat.h:55
void Project(const Fst< Arc > &ifst, MutableFst< Arc > *ofst, ProjectType project_type)
Definition: project.h:89
bool Verify(const Fst< Arc > &fst, bool allow_negative_labels=false)
Definition: verify.h:36
constexpr uint64_t kPath
Definition: weight.h:150
constexpr uint8_t kEncodeWeights
Definition: encode.h:56
constexpr uint64_t kILabelSorted
Definition: properties.h:94
constexpr uint64_t kFstProperties
Definition: properties.h:326
void Minimize(MutableFst< Arc > *fst, MutableFst< Arc > *sfst=nullptr, float delta=kShortestDelta, bool allow_nondet=false)
Definition: minimize.h:512
uint64_t Properties(uint64_t props) const
Definition: algo_test.h:99
EpsMapper()=default
WeightedTester(uint64_t seed, const Fst< Arc > &zero_fst, const Fst< Arc > &one_fst, const Fst< Arc > &univ_fst, WeightGenerator weight_generator)
Definition: algo_test.h:150
typename Arc::Label Label
Definition: algo_test.h:1362
typename Arc::Weight Weight
Definition: algo_test.h:1364
#define CHECK_EQ(x, y)
Definition: log.h:66
void Test(const Fst< Arc > &A1, const Fst< Arc > &A2, const Fst< Arc > &A3)
Definition: algo_test.h:1112
bool Equivalent(const Fst< Arc > &fst1, const Fst< Arc > &fst2, float delta=kDelta, bool *error=nullptr)
Definition: equivalent.h:127
void Test(const Fst< Arc > &T1, const Fst< Arc > &T2, const Fst< Arc > &T3)
Definition: algo_test.h:160
void ShortestDistance(const Fst< Arc > &fst, std::vector< typename Arc::Weight > *distance, const ShortestDistanceOptions< Arc, Queue, ArcFilter > &opts)
#define CHECK(x)
Definition: log.h:65
W Weight
Definition: arc.h:46
UnweightedTester(const Fst< Arc > &zero_fsa, const Fst< Arc > &one_fsa, const Fst< Arc > &univ_fsa, uint64_t seed)
Definition: algo_test.h:1105
constexpr uint64_t kLeftSemiring
Definition: weight.h:136
constexpr float kDelta
Definition: weight.h:133
void Disambiguate(const Fst< Arc > &ifst, MutableFst< Arc > *ofst, const DisambiguateOptions< Arc > &opts=DisambiguateOptions< Arc >())
Definition: disambiguate.h:586
typename Arc::Weight Weight
Definition: algo_test.h:147
constexpr uint64_t kNoIEpsilons
Definition: properties.h:86
void Decode(MutableFst< Arc > *fst, const EncodeMapper< Arc > &mapper)
Definition: encode.h:507
std::bool_constant<(W::Properties()&kPath)!=0 > IsPath
Definition: weight.h:162
void Reweight(MutableFst< Arc > *fst, const std::vector< typename Arc::Weight > &potential, ReweightType type)
Definition: reweight.h:46
bool ApproxEqual(const ErrorWeight &, const ErrorWeight &, float)
Definition: error-weight.h:58
uint64_t Properties(uint64_t mask, bool test) const override
Definition: impl-to-fst.h:63
constexpr uint64_t kAcceptor
Definition: properties.h:64
void Next()
Definition: fst.h:540