FST  openfst-1.7.2
OpenFst Library
matcher.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Classes to allow matching labels leaving FST states.
5 
6 #ifndef FST_MATCHER_H_
7 #define FST_MATCHER_H_
8 
9 #include <algorithm>
10 #include <memory>
11 #include <unordered_map>
12 #include <utility>
13 
14 #include <fst/log.h>
15 
16 #include <fst/mutable-fst.h> // for all internal FST accessors.
17 
18 
19 namespace fst {
20 
21 // Matchers find and iterate through requested labels at FST states. In the
22 // simplest form, these are just some associative map or search keyed on labels.
23 // More generally, they may implement matching special labels that represent
24 // sets of labels such as sigma (all), rho (rest), or phi (fail). The Matcher
25 // interface is:
26 //
27 // template <class F>
28 // class Matcher {
29 // public:
30 // using FST = F;
31 // using Arc = typename FST::Arc;
32 // using Label = typename Arc::Label;
33 // using StateId = typename Arc::StateId;
34 // using Weight = typename Arc::Weight;
35 //
36 // // Required constructors. Note:
37 // // -- the constructors that copy the FST arg are useful for
38 // // letting the matcher manage the FST through copies
39 // // (esp with 'safe' copies); e.g. ComposeFst depends on this.
40 // // -- the constructor that does not copy is useful when the
41 // // the FST is mutated during the lifetime of the matcher
42 // // (o.w. the matcher would have its own unmutated deep copy).
43 //
44 // // This makes a copy of the FST.
45 // Matcher(const FST &fst, MatchType type);
46 // // This doesn't copy the FST.
47 // Matcher(const FST *fst, MatchType type);
48 // // This makes a copy of the FST.
49 // // See Copy() below.
50 // Matcher(const Matcher &matcher, bool safe = false);
51 //
52 // // If safe = true, the copy is thread-safe. See Fst<>::Copy() for
53 // // further doc.
54 // Matcher<FST> *Copy(bool safe = false) const override;
55 //
56 // // Returns the match type that can be provided (depending on compatibility
57 // of the input FST). It is either the requested match type, MATCH_NONE, or
58 // MATCH_UNKNOWN. If test is false, a costly testing is avoided, but
59 // MATCH_UNKNOWN may be returned. If test is true, a definite answer is
60 // returned, but may involve more costly computation (e.g., visiting the FST).
61 // MatchType Type(bool test) const override;
62 //
63 // // Specifies the current state.
64 // void SetState(StateId s) final;
65 //
66 // // Finds matches to a label at the current state, returning true if a match
67 // // found. kNoLabel matches any non-consuming transitions, e.g., epsilon
68 // // transitions, which do not require a matching symbol.
69 // bool Find(Label label) final;
70 //
71 // // Iterator methods. Note that initially and after SetState() these have
72 // undefined behavior until Find() is called.
73 //
74 // bool Done() const final;
75 //
76 // const Arc &Value() const final;
77 //
78 // void Next() final;
79 //
80 // // Returns final weight of a state.
81 // Weight Final(StateId) const final;
82 //
83 // // Indicates preference for being the side used for matching in
84 // // composition. If the value is kRequirePriority, then it is
85 // // mandatory that it be used. Calling this method without passing the
86 // // current state of the matcher invalidates the state of the matcher.
87 // ssize_t Priority(StateId s) final;
88 //
89 // // This specifies the known FST properties as viewed from this matcher. It
90 // // takes as argument the input FST's known properties.
91 // uint64 Properties(uint64 props) const override;
92 //
93 // // Returns matcher flags.
94 // uint32 Flags() const override;
95 //
96 // // Returns matcher FST.
97 // const FST &GetFst() const override;
98 // };
99 
100 // Basic matcher flags.
101 
102 // Matcher needs to be used as the matching side in composition for
103 // at least one state (has kRequirePriority).
104 constexpr uint32 kRequireMatch = 0x00000001;
105 
106 // Flags used for basic matchers (see also lookahead.h).
108 
109 // Matcher priority that is mandatory.
110 constexpr ssize_t kRequirePriority = -1;
111 
112 // Matcher interface, templated on the Arc definition; used for matcher
113 // specializations that are returned by the InitMatcher FST method.
114 template <class A>
115 class MatcherBase {
116  public:
117  using Arc = A;
118  using Label = typename Arc::Label;
119  using StateId = typename Arc::StateId;
120  using Weight = typename Arc::Weight;
121 
122  virtual ~MatcherBase() {}
123 
124  // Virtual interface.
125 
126  virtual MatcherBase<Arc> *Copy(bool safe = false) const = 0;
127  virtual MatchType Type(bool) const = 0;
128  virtual void SetState(StateId) = 0;
129  virtual bool Find(Label) = 0;
130  virtual bool Done() const = 0;
131  virtual const Arc &Value() const = 0;
132  virtual void Next() = 0;
133  virtual const Fst<Arc> &GetFst() const = 0;
134  virtual uint64 Properties(uint64) const = 0;
135 
136  // Trivial implementations that can be used by derived classes. Full
137  // devirtualization is expected for any derived class marked final.
138  virtual uint32 Flags() const { return 0; }
139 
140  virtual Weight Final(StateId s) const { return internal::Final(GetFst(), s); }
141 
142  virtual ssize_t Priority(StateId s) { return internal::NumArcs(GetFst(), s); }
143 };
144 
145 // A matcher that expects sorted labels on the side to be matched.
146 // If match_type == MATCH_INPUT, epsilons match the implicit self-loop
147 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
148 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
149 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
150 template <class F>
151 class SortedMatcher : public MatcherBase<typename F::Arc> {
152  public:
153  using FST = F;
154  using Arc = typename FST::Arc;
155  using Label = typename Arc::Label;
156  using StateId = typename Arc::StateId;
157  using Weight = typename Arc::Weight;
158 
161 
162  // Labels >= binary_label will be searched for by binary search;
163  // o.w. linear search is used.
164  // This makes a copy of the FST.
165  SortedMatcher(const FST &fst, MatchType match_type, Label binary_label = 1)
166  : SortedMatcher(fst.Copy(), match_type, binary_label) {
167  owned_fst_.reset(&fst_);
168  }
169 
170  // Labels >= binary_label will be searched for by binary search;
171  // o.w. linear search is used.
172  // This doesn't copy the FST.
173  SortedMatcher(const FST *fst, MatchType match_type, Label binary_label = 1)
174  : fst_(*fst),
175  state_(kNoStateId),
176  aiter_(nullptr),
177  match_type_(match_type),
178  binary_label_(binary_label),
179  match_label_(kNoLabel),
180  narcs_(0),
181  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
182  error_(false),
183  aiter_pool_(1) {
184  switch (match_type_) {
185  case MATCH_INPUT:
186  case MATCH_NONE:
187  break;
188  case MATCH_OUTPUT:
189  std::swap(loop_.ilabel, loop_.olabel);
190  break;
191  default:
192  FSTERROR() << "SortedMatcher: Bad match type";
193  match_type_ = MATCH_NONE;
194  error_ = true;
195  }
196  }
197 
198  // This makes a copy of the FST.
199  SortedMatcher(const SortedMatcher<FST> &matcher, bool safe = false)
200  : owned_fst_(matcher.fst_.Copy(safe)),
201  fst_(*owned_fst_),
202  state_(kNoStateId),
203  aiter_(nullptr),
204  match_type_(matcher.match_type_),
205  binary_label_(matcher.binary_label_),
206  match_label_(kNoLabel),
207  narcs_(0),
208  loop_(matcher.loop_),
209  error_(matcher.error_),
210  aiter_pool_(1) {}
211 
212  ~SortedMatcher() override { Destroy(aiter_, &aiter_pool_); }
213 
214  SortedMatcher<FST> *Copy(bool safe = false) const override {
215  return new SortedMatcher<FST>(*this, safe);
216  }
217 
218  MatchType Type(bool test) const override {
219  if (match_type_ == MATCH_NONE) return match_type_;
220  const auto true_prop =
221  match_type_ == MATCH_INPUT ? kILabelSorted : kOLabelSorted;
222  const auto false_prop =
223  match_type_ == MATCH_INPUT ? kNotILabelSorted : kNotOLabelSorted;
224  const auto props = fst_.Properties(true_prop | false_prop, test);
225  if (props & true_prop) {
226  return match_type_;
227  } else if (props & false_prop) {
228  return MATCH_NONE;
229  } else {
230  return MATCH_UNKNOWN;
231  }
232  }
233 
234  void SetState(StateId s) final {
235  if (state_ == s) return;
236  state_ = s;
237  if (match_type_ == MATCH_NONE) {
238  FSTERROR() << "SortedMatcher: Bad match type";
239  error_ = true;
240  }
241  Destroy(aiter_, &aiter_pool_);
242  aiter_ = new (&aiter_pool_) ArcIterator<FST>(fst_, s);
243  aiter_->SetFlags(kArcNoCache, kArcNoCache);
244  narcs_ = internal::NumArcs(fst_, s);
245  loop_.nextstate = s;
246  }
247 
248  bool Find(Label match_label) final {
249  exact_match_ = true;
250  if (error_) {
251  current_loop_ = false;
252  match_label_ = kNoLabel;
253  return false;
254  }
255  current_loop_ = match_label == 0;
256  match_label_ = match_label == kNoLabel ? 0 : match_label;
257  if (Search()) {
258  return true;
259  } else {
260  return current_loop_;
261  }
262  }
263 
264  // Positions matcher to the first position where inserting match_label would
265  // maintain the sort order.
266  void LowerBound(Label label) {
267  exact_match_ = false;
268  current_loop_ = false;
269  if (error_) {
270  match_label_ = kNoLabel;
271  return;
272  }
273  match_label_ = label;
274  Search();
275  }
276 
277  // After Find(), returns false if no more exact matches.
278  // After LowerBound(), returns false if no more arcs.
279  bool Done() const final {
280  if (current_loop_) return false;
281  if (aiter_->Done()) return true;
282  if (!exact_match_) return false;
283  aiter_->SetFlags(match_type_ == MATCH_INPUT ?
284  kArcILabelValue : kArcOLabelValue,
285  kArcValueFlags);
286  return GetLabel() != match_label_;
287  }
288 
289  const Arc &Value() const final {
290  if (current_loop_) return loop_;
291  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
292  return aiter_->Value();
293  }
294 
295  void Next() final {
296  if (current_loop_) {
297  current_loop_ = false;
298  } else {
299  aiter_->Next();
300  }
301  }
302 
303  Weight Final(StateId s) const final {
304  return MatcherBase<Arc>::Final(s);
305  }
306 
307  ssize_t Priority(StateId s) final {
308  return MatcherBase<Arc>::Priority(s);
309  }
310 
311  const FST &GetFst() const override { return fst_; }
312 
313  uint64 Properties(uint64 inprops) const override {
314  return inprops | (error_ ? kError : 0);
315  }
316 
317  size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
318 
319  private:
320  Label GetLabel() const {
321  const auto &arc = aiter_->Value();
322  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
323  }
324 
325  bool BinarySearch();
326  bool LinearSearch();
327  bool Search();
328 
329  std::unique_ptr<const FST> owned_fst_; // FST ptr if owned.
330  const FST &fst_; // FST for matching.
331  StateId state_; // Matcher state.
332  ArcIterator<FST> *aiter_; // Iterator for current state.
333  MatchType match_type_; // Type of match to perform.
334  Label binary_label_; // Least label for binary search.
335  Label match_label_; // Current label to be matched.
336  size_t narcs_; // Current state arc count.
337  Arc loop_; // For non-consuming symbols.
338  bool current_loop_; // Current arc is the implicit loop.
339  bool exact_match_; // Exact match or lower bound?
340  bool error_; // Error encountered?
341  MemoryPool<ArcIterator<FST>> aiter_pool_; // Pool of arc iterators.
342 };
343 
344 // Returns true iff match to match_label_. The arc iterator is positioned at the
345 // lower bound, that is, the first element greater than or equal to
346 // match_label_, or the end if all elements are less than match_label_.
347 // If multiple elements are equal to the `match_label_`, returns the rightmost
348 // one.
349 template <class FST>
350 inline bool SortedMatcher<FST>::BinarySearch() {
351  size_t size = narcs_;
352  if (size == 0) {
353  return false;
354  }
355  size_t high = size - 1;
356  while (size > 1) {
357  const size_t half = size / 2;
358  const size_t mid = high - half;
359  aiter_->Seek(mid);
360  if (GetLabel() >= match_label_) {
361  high = mid;
362  }
363  size -= half;
364  }
365  aiter_->Seek(high);
366  const auto label = GetLabel();
367  if (label == match_label_) {
368  return true;
369  }
370  if (label < match_label_) {
371  aiter_->Next();
372  }
373  return false;
374 }
375 
376 // Returns true iff match to match_label_, positioning arc iterator at lower
377 // bound.
378 template <class FST>
379 inline bool SortedMatcher<FST>::LinearSearch() {
380  for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
381  const auto label = GetLabel();
382  if (label == match_label_) return true;
383  if (label > match_label_) break;
384  }
385  return false;
386 }
387 
388 // Returns true iff match to match_label_, positioning arc iterator at lower
389 // bound.
390 template <class FST>
391 inline bool SortedMatcher<FST>::Search() {
392  aiter_->SetFlags(match_type_ == MATCH_INPUT ?
393  kArcILabelValue : kArcOLabelValue,
394  kArcValueFlags);
395  if (match_label_ >= binary_label_) {
396  return BinarySearch();
397  } else {
398  return LinearSearch();
399  }
400 }
401 
402 // A matcher that stores labels in a per-state hash table populated upon the
403 // first visit to that state. Sorting is not required. Treatment of
404 // epsilons are the same as with SortedMatcher.
405 template <class F>
406 class HashMatcher : public MatcherBase<typename F::Arc> {
407  public:
408  using FST = F;
409  using Arc = typename FST::Arc;
410  using Label = typename Arc::Label;
411  using StateId = typename Arc::StateId;
412  using Weight = typename Arc::Weight;
413 
417 
418  // This makes a copy of the FST.
419  HashMatcher(const FST &fst, MatchType match_type)
420  : HashMatcher(fst.Copy(), match_type) {
421  owned_fst_.reset(&fst_);
422  }
423 
424  // This doesn't copy the FST.
425  HashMatcher(const FST *fst, MatchType match_type)
426  : fst_(*fst),
427  state_(kNoStateId),
428  match_type_(match_type),
429  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
430  error_(false),
431  state_table_(std::make_shared<StateTable>()) {
432  switch (match_type_) {
433  case MATCH_INPUT:
434  case MATCH_NONE:
435  break;
436  case MATCH_OUTPUT:
437  std::swap(loop_.ilabel, loop_.olabel);
438  break;
439  default:
440  FSTERROR() << "HashMatcher: Bad match type";
441  match_type_ = MATCH_NONE;
442  error_ = true;
443  }
444  }
445 
446  // This makes a copy of the FST.
447  HashMatcher(const HashMatcher<FST> &matcher, bool safe = false)
448  : owned_fst_(matcher.fst_.Copy(safe)),
449  fst_(*owned_fst_),
450  state_(kNoStateId),
451  match_type_(matcher.match_type_),
452  loop_(matcher.loop_),
453  error_(matcher.error_),
454  state_table_(
455  safe ? std::make_shared<StateTable>() : matcher.state_table_) {}
456 
457  HashMatcher<FST> *Copy(bool safe = false) const override {
458  return new HashMatcher<FST>(*this, safe);
459  }
460 
461  // The argument is ignored as there are no relevant properties to test.
462  MatchType Type(bool test) const override { return match_type_; }
463 
464  void SetState(StateId s) final;
465 
466  bool Find(Label label) final {
467  current_loop_ = label == 0;
468  if (label == 0) {
469  Search(label);
470  return true;
471  }
472  if (label == kNoLabel) label = 0;
473  return Search(label);
474  }
475 
476  bool Done() const final {
477  if (current_loop_) return false;
478  return label_it_ == label_end_;
479  }
480 
481  const Arc &Value() const final {
482  if (current_loop_) return loop_;
483  aiter_->Seek(label_it_->second);
484  return aiter_->Value();
485  }
486 
487  void Next() final {
488  if (current_loop_) {
489  current_loop_ = false;
490  } else {
491  ++label_it_;
492  }
493  }
494 
495  const FST &GetFst() const override { return fst_; }
496 
497  uint64 Properties(uint64 inprops) const override {
498  return inprops | (error_ ? kError : 0);
499  }
500 
501  private:
502  Label GetLabel() const {
503  const auto &arc = aiter_->Value();
504  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
505  }
506 
507  bool Search(Label match_label);
508 
509  using LabelTable = std::unordered_multimap<Label, size_t>;
510  using StateTable = std::unordered_map<StateId, std::unique_ptr<LabelTable>>;
511 
512  std::unique_ptr<const FST> owned_fst_; // ptr to FST if owned.
513  const FST &fst_; // FST for matching.
514  StateId state_; // Matcher state.
515  MatchType match_type_;
516  Arc loop_; // The implicit loop itself.
517  bool current_loop_; // Is the current arc the implicit loop?
518  bool error_; // Error encountered?
519  std::unique_ptr<ArcIterator<FST>> aiter_;
520  std::shared_ptr<StateTable> state_table_; // Table from state to label table.
521  LabelTable *label_table_; // Pointer to current state's label table.
522  typename LabelTable::iterator label_it_; // Position for label.
523  typename LabelTable::iterator label_end_; // Position for last label + 1.
524 };
525 
526 template <class FST>
527 void HashMatcher<FST>::SetState(typename FST::Arc::StateId s) {
528  if (state_ == s) return;
529  // Resets everything for the state.
530  state_ = s;
531  loop_.nextstate = state_;
532  aiter_.reset(new ArcIterator<FST>(fst_, state_));
533  if (match_type_ == MATCH_NONE) {
534  FSTERROR() << "HashMatcher: Bad match type";
535  error_ = true;
536  }
537  // Attempts to insert a new label table.
538  auto it_and_success = state_table_->emplace(
539  state_, std::unique_ptr<LabelTable>(new LabelTable()));
540  // Sets instance's pointer to the label table for this state.
541  label_table_ = it_and_success.first->second.get();
542  // If it already exists, no additional work is done and we simply return.
543  if (!it_and_success.second) return;
544  // Otherwise, populate this new table.
545  // Populates the label table.
546  label_table_->reserve(internal::NumArcs(fst_, state_));
547  const auto aiter_flags =
548  (match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue) |
549  kArcNoCache;
550  aiter_->SetFlags(aiter_flags, kArcFlags);
551  for (; !aiter_->Done(); aiter_->Next()) {
552  label_table_->emplace(GetLabel(), aiter_->Position());
553  }
554  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
555 }
556 
557 template <class FST>
558 inline bool HashMatcher<FST>::Search(typename FST::Arc::Label match_label) {
559  auto range = label_table_->equal_range(match_label);
560  label_it_ = range.first;
561  label_end_ = range.second;
562  if (label_it_ == label_end_) return false;
563  aiter_->Seek(label_it_->second);
564  return true;
565 }
566 
567 // Specifies whether we rewrite both the input and output sides during matching.
569  MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
572 };
573 
574 // For any requested label that doesn't match at a state, this matcher
575 // considers the *unique* transition that matches the label 'phi_label'
576 // (phi = 'fail'), and recursively looks for a match at its
577 // destination. When 'phi_loop' is true, if no match is found but a
578 // phi self-loop is found, then the phi transition found is returned
579 // with the phi_label rewritten as the requested label (both sides if
580 // an acceptor, or if 'rewrite_both' is true and both input and output
581 // labels of the found transition are 'phi_label'). If 'phi_label' is
582 // kNoLabel, this special matching is not done. PhiMatcher is
583 // templated itself on a matcher, which is used to perform the
584 // underlying matching. By default, the underlying matcher is
585 // constructed by PhiMatcher. The user can instead pass in this
586 // object; in that case, PhiMatcher takes its ownership.
587 // Phi non-determinism not supported. No non-consuming symbols other
588 // than epsilon supported with the underlying template argument matcher.
589 template <class M>
590 class PhiMatcher : public MatcherBase<typename M::Arc> {
591  public:
592  using FST = typename M::FST;
593  using Arc = typename FST::Arc;
594  using Label = typename Arc::Label;
595  using StateId = typename Arc::StateId;
596  using Weight = typename Arc::Weight;
597 
598  // This makes a copy of the FST (w/o 'matcher' arg).
599  PhiMatcher(const FST &fst, MatchType match_type, Label phi_label = kNoLabel,
600  bool phi_loop = true,
602  M *matcher = nullptr)
603  : matcher_(matcher ? matcher : new M(fst, match_type)),
604  match_type_(match_type),
605  phi_label_(phi_label),
606  state_(kNoStateId),
607  phi_loop_(phi_loop),
608  error_(false) {
609  if (match_type == MATCH_BOTH) {
610  FSTERROR() << "PhiMatcher: Bad match type";
611  match_type_ = MATCH_NONE;
612  error_ = true;
613  }
614  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
615  rewrite_both_ = fst.Properties(kAcceptor, true);
616  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
617  rewrite_both_ = true;
618  } else {
619  rewrite_both_ = false;
620  }
621  }
622 
623  // This doesn't copy the FST.
624  PhiMatcher(const FST *fst, MatchType match_type, Label phi_label = kNoLabel,
625  bool phi_loop = true,
627  M *matcher = nullptr)
628  : PhiMatcher(*fst, match_type, phi_label, phi_loop, rewrite_mode,
629  matcher ? matcher : new M(fst, match_type)) { }
630 
631 
632  // This makes a copy of the FST.
633  PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false)
634  : matcher_(new M(*matcher.matcher_, safe)),
635  match_type_(matcher.match_type_),
636  phi_label_(matcher.phi_label_),
637  rewrite_both_(matcher.rewrite_both_),
638  state_(kNoStateId),
639  phi_loop_(matcher.phi_loop_),
640  error_(matcher.error_) {}
641 
642  PhiMatcher<M> *Copy(bool safe = false) const override {
643  return new PhiMatcher<M>(*this, safe);
644  }
645 
646  MatchType Type(bool test) const override { return matcher_->Type(test); }
647 
648  void SetState(StateId s) final {
649  if (state_ == s) return;
650  matcher_->SetState(s);
651  state_ = s;
652  has_phi_ = phi_label_ != kNoLabel;
653  }
654 
655  bool Find(Label match_label) final;
656 
657  bool Done() const final { return matcher_->Done(); }
658 
659  const Arc &Value() const final {
660  if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
661  return matcher_->Value();
662  } else if (phi_match_ == 0) { // Virtual epsilon loop.
663  phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
664  if (match_type_ == MATCH_OUTPUT) {
665  std::swap(phi_arc_.ilabel, phi_arc_.olabel);
666  }
667  return phi_arc_;
668  } else {
669  phi_arc_ = matcher_->Value();
670  phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
671  if (phi_match_ != kNoLabel) { // Phi loop match.
672  if (rewrite_both_) {
673  if (phi_arc_.ilabel == phi_label_) phi_arc_.ilabel = phi_match_;
674  if (phi_arc_.olabel == phi_label_) phi_arc_.olabel = phi_match_;
675  } else if (match_type_ == MATCH_INPUT) {
676  phi_arc_.ilabel = phi_match_;
677  } else {
678  phi_arc_.olabel = phi_match_;
679  }
680  }
681  return phi_arc_;
682  }
683  }
684 
685  void Next() final { matcher_->Next(); }
686 
687  Weight Final(StateId s) const final {
688  auto weight = matcher_->Final(s);
689  if (phi_label_ == kNoLabel || weight != Weight::Zero()) {
690  return weight;
691  }
692  weight = Weight::One();
693  matcher_->SetState(s);
694  while (matcher_->Final(s) == Weight::Zero()) {
695  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) break;
696  weight = Times(weight, matcher_->Value().weight);
697  if (s == matcher_->Value().nextstate) {
698  return Weight::Zero(); // Does not follow phi self-loops.
699  }
700  s = matcher_->Value().nextstate;
701  matcher_->SetState(s);
702  }
703  weight = Times(weight, matcher_->Final(s));
704  return weight;
705  }
706 
707  ssize_t Priority(StateId s) final {
708  if (phi_label_ != kNoLabel) {
709  matcher_->SetState(s);
710  const bool has_phi = matcher_->Find(phi_label_ == 0 ? -1 : phi_label_);
711  return has_phi ? kRequirePriority : matcher_->Priority(s);
712  } else {
713  return matcher_->Priority(s);
714  }
715  }
716 
717  const FST &GetFst() const override { return matcher_->GetFst(); }
718 
719  uint64 Properties(uint64 props) const override;
720 
721  uint32 Flags() const override {
722  if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) {
723  return matcher_->Flags();
724  }
725  return matcher_->Flags() | kRequireMatch;
726  }
727 
728  Label PhiLabel() const { return phi_label_; }
729 
730  private:
731  mutable std::unique_ptr<M> matcher_;
732  MatchType match_type_; // Type of match requested.
733  Label phi_label_; // Label that represents the phi transition.
734  bool rewrite_both_; // Rewrite both sides when both are phi_label_?
735  bool has_phi_; // Are there possibly phis at the current state?
736  Label phi_match_; // Current label that matches phi loop.
737  mutable Arc phi_arc_; // Arc to return.
738  StateId state_; // Matcher state.
739  Weight phi_weight_; // Product of the weights of phi transitions taken.
740  bool phi_loop_; // When true, phi self-loop are allowed and treated
741  // as rho (required for Aho-Corasick).
742  bool error_; // Error encountered?
743 
744  PhiMatcher &operator=(const PhiMatcher &) = delete;
745 };
746 
747 template <class M>
748 inline bool PhiMatcher<M>::Find(Label label) {
749  if (label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
750  FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
751  error_ = true;
752  return false;
753  }
754  matcher_->SetState(state_);
755  phi_match_ = kNoLabel;
756  phi_weight_ = Weight::One();
757  // If phi_label_ == 0, there are no more true epsilon arcs.
758  if (phi_label_ == 0) {
759  if (label == kNoLabel) {
760  return false;
761  }
762  if (label == 0) { // but a virtual epsilon loop needs to be returned.
763  if (!matcher_->Find(kNoLabel)) {
764  return matcher_->Find(0);
765  } else {
766  phi_match_ = 0;
767  return true;
768  }
769  }
770  }
771  if (!has_phi_ || label == 0 || label == kNoLabel) {
772  return matcher_->Find(label);
773  }
774  auto s = state_;
775  while (!matcher_->Find(label)) {
776  // Look for phi transition (if phi_label_ == 0, we need to look
777  // for -1 to avoid getting the virtual self-loop)
778  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) return false;
779  if (phi_loop_ && matcher_->Value().nextstate == s) {
780  phi_match_ = label;
781  return true;
782  }
783  phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
784  s = matcher_->Value().nextstate;
785  matcher_->Next();
786  if (!matcher_->Done()) {
787  FSTERROR() << "PhiMatcher: Phi non-determinism not supported";
788  error_ = true;
789  }
790  matcher_->SetState(s);
791  }
792  return true;
793 }
794 
795 template <class M>
796 inline uint64 PhiMatcher<M>::Properties(uint64 inprops) const {
797  auto outprops = matcher_->Properties(inprops);
798  if (error_) outprops |= kError;
799  if (match_type_ == MATCH_NONE) {
800  return outprops;
801  } else if (match_type_ == MATCH_INPUT) {
802  if (phi_label_ == 0) {
803  outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
804  outprops |= kNoEpsilons | kNoIEpsilons;
805  }
806  if (rewrite_both_) {
807  return outprops &
810  } else {
811  return outprops &
814  }
815  } else if (match_type_ == MATCH_OUTPUT) {
816  if (phi_label_ == 0) {
817  outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons;
818  outprops |= kNoEpsilons | kNoOEpsilons;
819  }
820  if (rewrite_both_) {
821  return outprops &
824  } else {
825  return outprops &
828  }
829  } else {
830  // Shouldn't ever get here.
831  FSTERROR() << "PhiMatcher: Bad match type: " << match_type_;
832  return 0;
833  }
834 }
835 
836 // For any requested label that doesn't match at a state, this matcher
837 // considers all transitions that match the label 'rho_label' (rho =
838 // 'rest'). Each such rho transition found is returned with the
839 // rho_label rewritten as the requested label (both sides if an
840 // acceptor, or if 'rewrite_both' is true and both input and output
841 // labels of the found transition are 'rho_label'). If 'rho_label' is
842 // kNoLabel, this special matching is not done. RhoMatcher is
843 // templated itself on a matcher, which is used to perform the
844 // underlying matching. By default, the underlying matcher is
845 // constructed by RhoMatcher. The user can instead pass in this
846 // object; in that case, RhoMatcher takes its ownership.
847 // No non-consuming symbols other than epsilon supported with
848 // the underlying template argument matcher.
849 template <class M>
850 class RhoMatcher : public MatcherBase<typename M::Arc> {
851  public:
852  using FST = typename M::FST;
853  using Arc = typename FST::Arc;
854  using Label = typename Arc::Label;
855  using StateId = typename Arc::StateId;
856  using Weight = typename Arc::Weight;
857 
858  // This makes a copy of the FST (w/o 'matcher' arg).
859  RhoMatcher(const FST &fst, MatchType match_type, Label rho_label = kNoLabel,
861  M *matcher = nullptr)
862  : matcher_(matcher ? matcher : new M(fst, match_type)),
863  match_type_(match_type),
864  rho_label_(rho_label),
865  error_(false),
866  state_(kNoStateId),
867  has_rho_(false) {
868  if (match_type == MATCH_BOTH) {
869  FSTERROR() << "RhoMatcher: Bad match type";
870  match_type_ = MATCH_NONE;
871  error_ = true;
872  }
873  if (rho_label == 0) {
874  FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
875  rho_label_ = kNoLabel;
876  error_ = true;
877  }
878  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
879  rewrite_both_ = fst.Properties(kAcceptor, true);
880  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
881  rewrite_both_ = true;
882  } else {
883  rewrite_both_ = false;
884  }
885  }
886 
887  // This doesn't copy the FST.
888  RhoMatcher(const FST *fst, MatchType match_type, Label rho_label = kNoLabel,
890  M *matcher = nullptr)
891  : RhoMatcher(*fst, match_type, rho_label, rewrite_mode,
892  matcher ? matcher : new M(fst, match_type)) { }
893 
894  // This makes a copy of the FST.
895  RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false)
896  : matcher_(new M(*matcher.matcher_, safe)),
897  match_type_(matcher.match_type_),
898  rho_label_(matcher.rho_label_),
899  rewrite_both_(matcher.rewrite_both_),
900  error_(matcher.error_),
901  state_(kNoStateId),
902  has_rho_(false) {}
903 
904  RhoMatcher<M> *Copy(bool safe = false) const override {
905  return new RhoMatcher<M>(*this, safe);
906  }
907 
908  MatchType Type(bool test) const override { return matcher_->Type(test); }
909 
910  void SetState(StateId s) final {
911  if (state_ == s) return;
912  state_ = s;
913  matcher_->SetState(s);
914  has_rho_ = rho_label_ != kNoLabel;
915  }
916 
917  bool Find(Label label) final {
918  if (label == rho_label_ && rho_label_ != kNoLabel) {
919  FSTERROR() << "RhoMatcher::Find: bad label (rho)";
920  error_ = true;
921  return false;
922  }
923  if (matcher_->Find(label)) {
924  rho_match_ = kNoLabel;
925  return true;
926  } else if (has_rho_ && label != 0 && label != kNoLabel &&
927  (has_rho_ = matcher_->Find(rho_label_))) {
928  rho_match_ = label;
929  return true;
930  } else {
931  return false;
932  }
933  }
934 
935  bool Done() const final { return matcher_->Done(); }
936 
937  const Arc &Value() const final {
938  if (rho_match_ == kNoLabel) {
939  return matcher_->Value();
940  } else {
941  rho_arc_ = matcher_->Value();
942  if (rewrite_both_) {
943  if (rho_arc_.ilabel == rho_label_) rho_arc_.ilabel = rho_match_;
944  if (rho_arc_.olabel == rho_label_) rho_arc_.olabel = rho_match_;
945  } else if (match_type_ == MATCH_INPUT) {
946  rho_arc_.ilabel = rho_match_;
947  } else {
948  rho_arc_.olabel = rho_match_;
949  }
950  return rho_arc_;
951  }
952  }
953 
954  void Next() final { matcher_->Next(); }
955 
956  Weight Final(StateId s) const final { return matcher_->Final(s); }
957 
958  ssize_t Priority(StateId s) final {
959  state_ = s;
960  matcher_->SetState(s);
961  has_rho_ = matcher_->Find(rho_label_);
962  if (has_rho_) {
963  return kRequirePriority;
964  } else {
965  return matcher_->Priority(s);
966  }
967  }
968 
969  const FST &GetFst() const override { return matcher_->GetFst(); }
970 
971  uint64 Properties(uint64 props) const override;
972 
973  uint32 Flags() const override {
974  if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) {
975  return matcher_->Flags();
976  }
977  return matcher_->Flags() | kRequireMatch;
978  }
979 
980  Label RhoLabel() const { return rho_label_; }
981 
982  private:
983  std::unique_ptr<M> matcher_;
984  MatchType match_type_; // Type of match requested.
985  Label rho_label_; // Label that represents the rho transition
986  bool rewrite_both_; // Rewrite both sides when both are rho_label_?
987  Label rho_match_; // Current label that matches rho transition.
988  mutable Arc rho_arc_; // Arc to return when rho match.
989  bool error_; // Error encountered?
990  StateId state_; // Matcher state.
991  bool has_rho_; // Are there possibly rhos at the current state?
992 };
993 
994 template <class M>
995 inline uint64 RhoMatcher<M>::Properties(uint64 inprops) const {
996  auto outprops = matcher_->Properties(inprops);
997  if (error_) outprops |= kError;
998  if (match_type_ == MATCH_NONE) {
999  return outprops;
1000  } else if (match_type_ == MATCH_INPUT) {
1001  if (rewrite_both_) {
1002  return outprops &
1005  } else {
1006  return outprops &
1009  }
1010  } else if (match_type_ == MATCH_OUTPUT) {
1011  if (rewrite_both_) {
1012  return outprops &
1015  } else {
1016  return outprops &
1019  }
1020  } else {
1021  // Shouldn't ever get here.
1022  FSTERROR() << "RhoMatcher: Bad match type: " << match_type_;
1023  return 0;
1024  }
1025 }
1026 
1027 // For any requested label, this matcher considers all transitions
1028 // that match the label 'sigma_label' (sigma = "any"), and this in
1029 // additions to transitions with the requested label. Each such sigma
1030 // transition found is returned with the sigma_label rewritten as the
1031 // requested label (both sides if an acceptor, or if 'rewrite_both' is
1032 // true and both input and output labels of the found transition are
1033 // 'sigma_label'). If 'sigma_label' is kNoLabel, this special
1034 // matching is not done. SigmaMatcher is templated itself on a
1035 // matcher, which is used to perform the underlying matching. By
1036 // default, the underlying matcher is constructed by SigmaMatcher.
1037 // The user can instead pass in this object; in that case,
1038 // SigmaMatcher takes its ownership. No non-consuming symbols other
1039 // than epsilon supported with the underlying template argument matcher.
1040 template <class M>
1041 class SigmaMatcher : public MatcherBase<typename M::Arc> {
1042  public:
1043  using FST = typename M::FST;
1044  using Arc = typename FST::Arc;
1045  using Label = typename Arc::Label;
1046  using StateId = typename Arc::StateId;
1047  using Weight = typename Arc::Weight;
1048 
1049  // This makes a copy of the FST (w/o 'matcher' arg).
1050  SigmaMatcher(const FST &fst, MatchType match_type,
1051  Label sigma_label = kNoLabel,
1053  M *matcher = nullptr)
1054  : matcher_(matcher ? matcher : new M(fst, match_type)),
1055  match_type_(match_type),
1056  sigma_label_(sigma_label),
1057  error_(false),
1058  state_(kNoStateId) {
1059  if (match_type == MATCH_BOTH) {
1060  FSTERROR() << "SigmaMatcher: Bad match type";
1061  match_type_ = MATCH_NONE;
1062  error_ = true;
1063  }
1064  if (sigma_label == 0) {
1065  FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
1066  sigma_label_ = kNoLabel;
1067  error_ = true;
1068  }
1069  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
1070  rewrite_both_ = fst.Properties(kAcceptor, true);
1071  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
1072  rewrite_both_ = true;
1073  } else {
1074  rewrite_both_ = false;
1075  }
1076  }
1077 
1078  // This doesn't copy the FST.
1079  SigmaMatcher(const FST *fst, MatchType match_type,
1080  Label sigma_label = kNoLabel,
1082  M *matcher = nullptr)
1083  : SigmaMatcher(*fst, match_type, sigma_label, rewrite_mode,
1084  matcher ? matcher : new M(fst, match_type)) { }
1085 
1086  // This makes a copy of the FST.
1087  SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false)
1088  : matcher_(new M(*matcher.matcher_, safe)),
1089  match_type_(matcher.match_type_),
1090  sigma_label_(matcher.sigma_label_),
1091  rewrite_both_(matcher.rewrite_both_),
1092  error_(matcher.error_),
1093  state_(kNoStateId) {}
1094 
1095  SigmaMatcher<M> *Copy(bool safe = false) const override {
1096  return new SigmaMatcher<M>(*this, safe);
1097  }
1098 
1099  MatchType Type(bool test) const override { return matcher_->Type(test); }
1100 
1101  void SetState(StateId s) final {
1102  if (state_ == s) return;
1103  state_ = s;
1104  matcher_->SetState(s);
1105  has_sigma_ =
1106  (sigma_label_ != kNoLabel) ? matcher_->Find(sigma_label_) : false;
1107  }
1108 
1109  bool Find(Label match_label) final {
1110  match_label_ = match_label;
1111  if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
1112  FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
1113  error_ = true;
1114  return false;
1115  }
1116  if (matcher_->Find(match_label)) {
1117  sigma_match_ = kNoLabel;
1118  return true;
1119  } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
1120  matcher_->Find(sigma_label_)) {
1121  sigma_match_ = match_label;
1122  return true;
1123  } else {
1124  return false;
1125  }
1126  }
1127 
1128  bool Done() const final { return matcher_->Done(); }
1129 
1130  const Arc &Value() const final {
1131  if (sigma_match_ == kNoLabel) {
1132  return matcher_->Value();
1133  } else {
1134  sigma_arc_ = matcher_->Value();
1135  if (rewrite_both_) {
1136  if (sigma_arc_.ilabel == sigma_label_) sigma_arc_.ilabel = sigma_match_;
1137  if (sigma_arc_.olabel == sigma_label_) sigma_arc_.olabel = sigma_match_;
1138  } else if (match_type_ == MATCH_INPUT) {
1139  sigma_arc_.ilabel = sigma_match_;
1140  } else {
1141  sigma_arc_.olabel = sigma_match_;
1142  }
1143  return sigma_arc_;
1144  }
1145  }
1146 
1147  void Next() final {
1148  matcher_->Next();
1149  if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
1150  (match_label_ > 0)) {
1151  matcher_->Find(sigma_label_);
1152  sigma_match_ = match_label_;
1153  }
1154  }
1155 
1156  Weight Final(StateId s) const final { return matcher_->Final(s); }
1157 
1158  ssize_t Priority(StateId s) final {
1159  if (sigma_label_ != kNoLabel) {
1160  SetState(s);
1161  return has_sigma_ ? kRequirePriority : matcher_->Priority(s);
1162  } else {
1163  return matcher_->Priority(s);
1164  }
1165  }
1166 
1167  const FST &GetFst() const override { return matcher_->GetFst(); }
1168 
1169  uint64 Properties(uint64 props) const override;
1170 
1171  uint32 Flags() const override {
1172  if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) {
1173  return matcher_->Flags();
1174  }
1175  return matcher_->Flags() | kRequireMatch;
1176  }
1177 
1178  Label SigmaLabel() const { return sigma_label_; }
1179 
1180  private:
1181  std::unique_ptr<M> matcher_;
1182  MatchType match_type_; // Type of match requested.
1183  Label sigma_label_; // Label that represents the sigma transition.
1184  bool rewrite_both_; // Rewrite both sides when both are sigma_label_?
1185  bool has_sigma_; // Are there sigmas at the current state?
1186  Label sigma_match_; // Current label that matches sigma transition.
1187  mutable Arc sigma_arc_; // Arc to return when sigma match.
1188  Label match_label_; // Label being matched.
1189  bool error_; // Error encountered?
1190  StateId state_; // Matcher state.
1191 };
1192 
1193 template <class M>
1195  auto outprops = matcher_->Properties(inprops);
1196  if (error_) outprops |= kError;
1197  if (match_type_ == MATCH_NONE) {
1198  return outprops;
1199  } else if (rewrite_both_) {
1200  return outprops &
1204  } else if (match_type_ == MATCH_INPUT) {
1205  return outprops &
1208  kAcceptor);
1209  } else if (match_type_ == MATCH_OUTPUT) {
1210  return outprops &
1213  kAcceptor);
1214  } else {
1215  // Shouldn't ever get here.
1216  FSTERROR() << "SigmaMatcher: Bad match type: " << match_type_;
1217  return 0;
1218  }
1219 }
1220 
1221 // Flags for MultiEpsMatcher.
1222 
1223 // Return multi-epsilon arcs for Find(kNoLabel).
1224 const uint32 kMultiEpsList = 0x00000001;
1225 
1226 // Return a kNolabel loop for Find(multi_eps).
1227 const uint32 kMultiEpsLoop = 0x00000002;
1228 
1229 // MultiEpsMatcher: allows treating multiple non-0 labels as
1230 // non-consuming labels in addition to 0 that is always
1231 // non-consuming. Precise behavior controlled by 'flags' argument. By
1232 // default, the underlying matcher is constructed by
1233 // MultiEpsMatcher. The user can instead pass in this object; in that
1234 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
1235 // true.
1236 template <class M>
1238  public:
1239  using FST = typename M::FST;
1240  using Arc = typename FST::Arc;
1241  using Label = typename Arc::Label;
1242  using StateId = typename Arc::StateId;
1243  using Weight = typename Arc::Weight;
1244 
1245  // This makes a copy of the FST (w/o 'matcher' arg).
1246  MultiEpsMatcher(const FST &fst, MatchType match_type,
1247  uint32 flags = (kMultiEpsLoop | kMultiEpsList),
1248  M *matcher = nullptr, bool own_matcher = true)
1249  : matcher_(matcher ? matcher : new M(fst, match_type)),
1250  flags_(flags),
1251  own_matcher_(matcher ? own_matcher : true) {
1252  Init(match_type);
1253  }
1254 
1255  // This doesn't copy the FST.
1256  MultiEpsMatcher(const FST *fst, MatchType match_type,
1257  uint32 flags = (kMultiEpsLoop | kMultiEpsList),
1258  M *matcher = nullptr, bool own_matcher = true)
1259  : matcher_(matcher ? matcher : new M(fst, match_type)),
1260  flags_(flags),
1261  own_matcher_(matcher ? own_matcher : true) {
1262  Init(match_type);
1263  }
1264 
1265  // This makes a copy of the FST.
1266  MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false)
1267  : matcher_(new M(*matcher.matcher_, safe)),
1268  flags_(matcher.flags_),
1269  own_matcher_(true),
1270  multi_eps_labels_(matcher.multi_eps_labels_),
1271  loop_(matcher.loop_) {
1272  loop_.nextstate = kNoStateId;
1273  }
1274 
1276  if (own_matcher_) delete matcher_;
1277  }
1278 
1279  MultiEpsMatcher<M> *Copy(bool safe = false) const {
1280  return new MultiEpsMatcher<M>(*this, safe);
1281  }
1282 
1283  MatchType Type(bool test) const { return matcher_->Type(test); }
1284 
1285  void SetState(StateId state) {
1286  matcher_->SetState(state);
1287  loop_.nextstate = state;
1288  }
1289 
1290  bool Find(Label label);
1291 
1292  bool Done() const { return done_; }
1293 
1294  const Arc &Value() const { return current_loop_ ? loop_ : matcher_->Value(); }
1295 
1296  void Next() {
1297  if (!current_loop_) {
1298  matcher_->Next();
1299  done_ = matcher_->Done();
1300  if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
1301  ++multi_eps_iter_;
1302  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1303  !matcher_->Find(*multi_eps_iter_)) {
1304  ++multi_eps_iter_;
1305  }
1306  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1307  done_ = false;
1308  } else {
1309  done_ = !matcher_->Find(kNoLabel);
1310  }
1311  }
1312  } else {
1313  done_ = true;
1314  }
1315  }
1316 
1317  const FST &GetFst() const { return matcher_->GetFst(); }
1318 
1319  uint64 Properties(uint64 props) const { return matcher_->Properties(props); }
1320 
1321  const M *GetMatcher() const { return matcher_; }
1322 
1323  Weight Final(StateId s) const { return matcher_->Final(s); }
1324 
1325  uint32 Flags() const { return matcher_->Flags(); }
1326 
1327  ssize_t Priority(StateId s) { return matcher_->Priority(s); }
1328 
1329  void AddMultiEpsLabel(Label label) {
1330  if (label == 0) {
1331  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1332  } else {
1333  multi_eps_labels_.Insert(label);
1334  }
1335  }
1336 
1338  if (label == 0) {
1339  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1340  } else {
1341  multi_eps_labels_.Erase(label);
1342  }
1343  }
1344 
1345  void ClearMultiEpsLabels() { multi_eps_labels_.Clear(); }
1346 
1347  private:
1348  void Init(MatchType match_type) {
1349  if (match_type == MATCH_INPUT) {
1350  loop_.ilabel = kNoLabel;
1351  loop_.olabel = 0;
1352  } else {
1353  loop_.ilabel = 0;
1354  loop_.olabel = kNoLabel;
1355  }
1356  loop_.weight = Weight::One();
1357  loop_.nextstate = kNoStateId;
1358  }
1359 
1360  M *matcher_;
1361  uint32 flags_;
1362  bool own_matcher_; // Does this class delete the matcher?
1363 
1364  // Multi-eps label set.
1365  CompactSet<Label, kNoLabel> multi_eps_labels_;
1366  typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
1367 
1368  bool current_loop_; // Current arc is the implicit loop?
1369  mutable Arc loop_; // For non-consuming symbols.
1370  bool done_; // Matching done?
1371 
1372  MultiEpsMatcher &operator=(const MultiEpsMatcher &) = delete;
1373 };
1374 
1375 template <class M>
1376 inline bool MultiEpsMatcher<M>::Find(Label label) {
1377  multi_eps_iter_ = multi_eps_labels_.End();
1378  current_loop_ = false;
1379  bool ret;
1380  if (label == 0) {
1381  ret = matcher_->Find(0);
1382  } else if (label == kNoLabel) {
1383  if (flags_ & kMultiEpsList) {
1384  // Returns all non-consuming arcs (including epsilon).
1385  multi_eps_iter_ = multi_eps_labels_.Begin();
1386  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1387  !matcher_->Find(*multi_eps_iter_)) {
1388  ++multi_eps_iter_;
1389  }
1390  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1391  ret = true;
1392  } else {
1393  ret = matcher_->Find(kNoLabel);
1394  }
1395  } else {
1396  // Returns all epsilon arcs.
1397  ret = matcher_->Find(kNoLabel);
1398  }
1399  } else if ((flags_ & kMultiEpsLoop) &&
1400  multi_eps_labels_.Find(label) != multi_eps_labels_.End()) {
1401  // Returns implicit loop.
1402  current_loop_ = true;
1403  ret = true;
1404  } else {
1405  ret = matcher_->Find(label);
1406  }
1407  done_ = !ret;
1408  return ret;
1409 }
1410 
1411 // This class discards any implicit matches (e.g., the implicit epsilon
1412 // self-loops in the SortedMatcher). Matchers are most often used in
1413 // composition/intersection where the implicit matches are needed
1414 // e.g. for epsilon processing. However, if a matcher is simply being
1415 // used to look-up explicit label matches, this class saves the user
1416 // from having to check for and discard the unwanted implicit matches
1417 // themselves.
1418 template <class M>
1419 class ExplicitMatcher : public MatcherBase<typename M::Arc> {
1420  public:
1421  using FST = typename M::FST;
1422  using Arc = typename FST::Arc;
1423  using Label = typename Arc::Label;
1424  using StateId = typename Arc::StateId;
1425  using Weight = typename Arc::Weight;
1426 
1427  // This makes a copy of the FST.
1428  ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher = nullptr)
1429  : matcher_(matcher ? matcher : new M(fst, match_type)),
1430  match_type_(match_type),
1431  error_(false) {}
1432 
1433  // This doesn't copy the FST.
1434  ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher = nullptr)
1435  : matcher_(matcher ? matcher : new M(fst, match_type)),
1436  match_type_(match_type),
1437  error_(false) {}
1438 
1439  // This makes a copy of the FST.
1440  ExplicitMatcher(const ExplicitMatcher<M> &matcher, bool safe = false)
1441  : matcher_(new M(*matcher.matcher_, safe)),
1442  match_type_(matcher.match_type_),
1443  error_(matcher.error_) {}
1444 
1445  ExplicitMatcher<M> *Copy(bool safe = false) const override {
1446  return new ExplicitMatcher<M>(*this, safe);
1447  }
1448 
1449  MatchType Type(bool test) const override { return matcher_->Type(test); }
1450 
1451  void SetState(StateId s) final { matcher_->SetState(s); }
1452 
1453  bool Find(Label label) final {
1454  matcher_->Find(label);
1455  CheckArc();
1456  return !Done();
1457  }
1458 
1459  bool Done() const final { return matcher_->Done(); }
1460 
1461  const Arc &Value() const final { return matcher_->Value(); }
1462 
1463  void Next() final {
1464  matcher_->Next();
1465  CheckArc();
1466  }
1467 
1468  Weight Final(StateId s) const final { return matcher_->Final(s); }
1469 
1470  ssize_t Priority(StateId s) final { return matcher_->Priority(s); }
1471 
1472  const FST &GetFst() const final { return matcher_->GetFst(); }
1473 
1474  uint64 Properties(uint64 inprops) const override {
1475  return matcher_->Properties(inprops);
1476  }
1477 
1478  const M *GetMatcher() const { return matcher_.get(); }
1479 
1480  uint32 Flags() const override { return matcher_->Flags(); }
1481 
1482  private:
1483  // Checks current arc if available and explicit. If not available, stops. If
1484  // not explicit, checks next ones.
1485  void CheckArc() {
1486  for (; !matcher_->Done(); matcher_->Next()) {
1487  const auto label = match_type_ == MATCH_INPUT ? matcher_->Value().ilabel
1488  : matcher_->Value().olabel;
1489  if (label != kNoLabel) return;
1490  }
1491  }
1492 
1493  std::unique_ptr<M> matcher_;
1494  MatchType match_type_; // Type of match requested.
1495  bool error_; // Error encountered?
1496 };
1497 
1498 // Generic matcher, templated on the FST definition.
1499 //
1500 // Here is a typical use:
1501 //
1502 // Matcher<StdFst> matcher(fst, MATCH_INPUT);
1503 // matcher.SetState(state);
1504 // if (matcher.Find(label))
1505 // for (; !matcher.Done(); matcher.Next()) {
1506 // auto &arc = matcher.Value();
1507 // ...
1508 // }
1509 template <class F>
1510 class Matcher {
1511  public:
1512  using FST = F;
1513  using Arc = typename F::Arc;
1514  using Label = typename Arc::Label;
1515  using StateId = typename Arc::StateId;
1516  using Weight = typename Arc::Weight;
1517 
1518  // This makes a copy of the FST.
1519  Matcher(const FST &fst, MatchType match_type)
1520  : owned_fst_(fst.Copy()),
1521  base_(owned_fst_->InitMatcher(match_type)) {
1522  if (!base_) base_.reset(new SortedMatcher<FST>(owned_fst_.get(),
1523  match_type));
1524  }
1525 
1526  // This doesn't copy the FST.
1527  Matcher(const FST *fst, MatchType match_type)
1528  : base_(fst->InitMatcher(match_type)) {
1529  if (!base_) base_.reset(new SortedMatcher<FST>(fst, match_type));
1530  }
1531 
1532  // This makes a copy of the FST.
1533  Matcher(const Matcher<FST> &matcher, bool safe = false)
1534  : base_(matcher.base_->Copy(safe)) { }
1535 
1536  // Takes ownership of the provided matcher.
1537  explicit Matcher(MatcherBase<Arc> *base_matcher)
1538  : base_(base_matcher) { }
1539 
1540  Matcher<FST> *Copy(bool safe = false) const {
1541  return new Matcher<FST>(*this, safe);
1542  }
1543 
1544  MatchType Type(bool test) const { return base_->Type(test); }
1545 
1546  void SetState(StateId s) { base_->SetState(s); }
1547 
1548  bool Find(Label label) { return base_->Find(label); }
1549 
1550  bool Done() const { return base_->Done(); }
1551 
1552  const Arc &Value() const { return base_->Value(); }
1553 
1554  void Next() { base_->Next(); }
1555 
1556  const FST &GetFst() const {
1557  return static_cast<const FST &>(base_->GetFst());
1558  }
1559 
1560  uint64 Properties(uint64 props) const { return base_->Properties(props); }
1561 
1562  Weight Final(StateId s) const { return base_->Final(s); }
1563 
1564  uint32 Flags() const { return base_->Flags() & kMatcherFlags; }
1565 
1566  ssize_t Priority(StateId s) { return base_->Priority(s); }
1567 
1568  private:
1569  std::unique_ptr<const FST> owned_fst_;
1570  std::unique_ptr<MatcherBase<Arc>> base_;
1571 };
1572 
1573 } // namespace fst
1574 
1575 #endif // FST_MATCHER_H_
uint64 Properties(uint64 props) const override
Definition: matcher.h:995
bool Find(Label label) final
Definition: matcher.h:917
MatchType Type(bool test) const
Definition: matcher.h:1544
void Next() final
Definition: matcher.h:295
void SetState(StateId s) final
Definition: matcher.h:234
uint64 Properties(uint64 props) const
Definition: matcher.h:1560
constexpr uint64 kNoEpsilons
Definition: properties.h:62
Matcher(const Matcher< FST > &matcher, bool safe=false)
Definition: matcher.h:1533
virtual void Next()=0
Weight Final(StateId s) const final
Definition: matcher.h:687
const Arc & Value() const final
Definition: matcher.h:659
SigmaMatcher< M > * Copy(bool safe=false) const override
Definition: matcher.h:1095
typename FST::Arc Arc
Definition: matcher.h:853
void SetState(StateId s) final
Definition: matcher.h:910
const uint32 kMultiEpsLoop
Definition: matcher.h:1227
constexpr uint32 kRequireMatch
Definition: matcher.h:104
constexpr ssize_t kRequirePriority
Definition: matcher.h:110
typename typename Filter::Matcher2::FST FST
Definition: matcher.h:1239
Matcher(const FST &fst, MatchType match_type)
Definition: matcher.h:1519
const FST & GetFst() const
Definition: matcher.h:1317
void AddMultiEpsLabel(Label label)
Definition: matcher.h:1329
constexpr int kNoLabel
Definition: fst.h:179
size_t Position() const
Definition: matcher.h:317
constexpr uint64 kNotOLabelSorted
Definition: properties.h:82
RhoMatcher< M > * Copy(bool safe=false) const override
Definition: matcher.h:904
uint64_t uint64
Definition: types.h:32
SortedMatcher(const SortedMatcher< FST > &matcher, bool safe=false)
Definition: matcher.h:199
ssize_t Priority(StateId s) final
Definition: matcher.h:1158
const Arc & Value() const final
Definition: matcher.h:937
ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1434
RhoMatcher(const RhoMatcher< M > &matcher, bool safe=false)
Definition: matcher.h:895
bool Done() const final
Definition: matcher.h:476
void LowerBound(Label label)
Definition: matcher.h:266
typename Arc::Label Label
Definition: matcher.h:1423
bool Find(Label label)
Definition: matcher.h:1376
const Arc & Value() const final
Definition: matcher.h:289
MatchType Type(bool test) const override
Definition: matcher.h:1449
ssize_t Priority(StateId s) final
Definition: matcher.h:958
const M * GetMatcher() const
Definition: matcher.h:1321
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:82
MatchType
Definition: fst.h:171
constexpr uint64 kILabelSorted
Definition: properties.h:75
uint32 Flags() const
Definition: matcher.h:1325
PhiMatcher(const FST *fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:624
Weight Final(StateId s) const
Definition: matcher.h:1562
bool Find(Label label) final
Definition: matcher.h:1453
typename Arc::StateId StateId
Definition: matcher.h:411
virtual bool Find(Label)=0
typename Arc::Label Label
Definition: matcher.h:1514
ExpectationWeight< X1, X2 > Times(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
MatchType Type(bool test) const override
Definition: matcher.h:646
Weight Final(StateId s) const final
Definition: matcher.h:1156
SigmaMatcher(const FST &fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1050
void Next() final
Definition: matcher.h:954
uint64 Properties(uint64 inprops) const override
Definition: matcher.h:313
void SetState(StateId s) final
Definition: matcher.h:1101
const FST & GetFst() const override
Definition: matcher.h:969
void SetState(StateId s) final
Definition: matcher.h:648
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:88
constexpr uint64 kNotILabelSorted
Definition: properties.h:77
ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1428
typename M::FST FST
Definition: matcher.h:1421
const uint32 kMultiEpsList
Definition: matcher.h:1224
uint64 Properties(uint64 inprops) const override
Definition: matcher.h:1474
Label SigmaLabel() const
Definition: matcher.h:1178
MatchType Type(bool test) const override
Definition: matcher.h:218
void SetState(StateId state)
Definition: matcher.h:1285
constexpr uint64 kNonIDeterministic
Definition: properties.h:52
RhoMatcher(const FST &fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:859
PhiMatcher< M > * Copy(bool safe=false) const override
Definition: matcher.h:642
typename Arc::StateId StateId
Definition: matcher.h:1515
virtual uint64 Properties(uint64) const =0
constexpr int kNoStateId
Definition: fst.h:180
PhiMatcher(const PhiMatcher< M > &matcher, bool safe=false)
Definition: matcher.h:633
typename F::Arc Arc
Definition: matcher.h:1513
HashMatcher(const HashMatcher< FST > &matcher, bool safe=false)
Definition: matcher.h:447
typename Arc::Weight Weight
Definition: matcher.h:596
Label RhoLabel() const
Definition: matcher.h:980
typename Arc::StateId StateId
Definition: matcher.h:1424
void RemoveMultiEpsLabel(Label label)
Definition: matcher.h:1337
void Next() final
Definition: matcher.h:487
MatchType Type(bool test) const
Definition: matcher.h:1283
void Next() final
Definition: matcher.h:1147
constexpr uint64 kEpsilons
Definition: properties.h:60
#define FSTERROR()
Definition: util.h:35
ExplicitMatcher< M > * Copy(bool safe=false) const override
Definition: matcher.h:1445
bool Done() const final
Definition: matcher.h:1128
SigmaMatcher(const SigmaMatcher< M > &matcher, bool safe=false)
Definition: matcher.h:1087
bool Done() const final
Definition: matcher.h:279
const FST & GetFst() const override
Definition: matcher.h:495
const Arc & Value() const
Definition: matcher.h:1294
typename Arc::Label Label
Definition: matcher.h:1045
void Next() final
Definition: matcher.h:685
const Arc & Value() const final
Definition: matcher.h:481
virtual ~MatcherBase()
Definition: matcher.h:122
MatchType Type(bool test) const override
Definition: matcher.h:1099
bool Find(Label label) final
Definition: matcher.h:466
bool Find(Label match_label) final
Definition: matcher.h:248
bool Done() const final
Definition: matcher.h:935
virtual ssize_t Priority(StateId s)
Definition: matcher.h:142
uint32 Flags() const override
Definition: matcher.h:1480
virtual Weight Final(StateId s) const
Definition: matcher.h:140
typename FST::Arc Arc
Definition: matcher.h:1044
typename Arc::StateId StateId
Definition: matcher.h:855
typename FST::Arc Arc
Definition: matcher.h:409
constexpr uint64 kNonODeterministic
Definition: properties.h:57
const M * GetMatcher() const
Definition: matcher.h:1478
bool Done() const
Definition: matcher.h:1550
typename M::FST FST
Definition: matcher.h:852
Weight Final(StateId s) const final
Definition: matcher.h:956
virtual void SetState(StateId)=0
ssize_t Priority(StateId s)
Definition: matcher.h:1566
uint32 Flags() const override
Definition: matcher.h:721
const Arc & Value() const final
Definition: matcher.h:1461
Matcher(const FST *fst, MatchType match_type)
Definition: matcher.h:1527
const FST & GetFst() const override
Definition: matcher.h:717
constexpr uint64 kOLabelSorted
Definition: properties.h:80
void SetFlags(uint32 flags, uint32 mask)
Definition: fst.h:541
constexpr uint64 kIEpsilons
Definition: properties.h:65
constexpr uint64 kIDeterministic
Definition: properties.h:50
Weight Final(StateId s) const final
Definition: matcher.h:303
typename FST::Arc Arc
Definition: matcher.h:1422
void Next()
Definition: matcher.h:1554
void SetState(StateId s) final
Definition: matcher.h:1451
ExplicitMatcher(const ExplicitMatcher< M > &matcher, bool safe=false)
Definition: matcher.h:1440
bool Find(Label label)
Definition: matcher.h:1548
MultiEpsMatcher(const FST *fst, MatchType match_type, uint32 flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1256
virtual const Fst< Arc > & GetFst() const =0
Matcher(MatcherBase< Arc > *base_matcher)
Definition: matcher.h:1537
constexpr uint64 kAcceptor
Definition: properties.h:45
~SortedMatcher() override
Definition: matcher.h:212
virtual uint32 Flags() const
Definition: matcher.h:138
bool Find(Label match_label) final
Definition: matcher.h:748
typename M::FST FST
Definition: matcher.h:592
const FST & GetFst() const override
Definition: matcher.h:1167
constexpr uint64 kNoOEpsilons
Definition: properties.h:72
Label PhiLabel() const
Definition: matcher.h:728
void ClearMultiEpsLabels()
Definition: matcher.h:1345
constexpr uint32 kMatcherFlags
Definition: matcher.h:107
void SetState(StateId s)
Definition: matcher.h:1546
ssize_t Priority(StateId s) final
Definition: matcher.h:307
SortedMatcher(const FST &fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:165
void SetState(StateId s) final
Definition: matcher.h:527
constexpr uint64 kString
Definition: properties.h:117
uint32_t uint32
Definition: types.h:31
MatcherRewriteMode
Definition: matcher.h:568
HashMatcher< FST > * Copy(bool safe=false) const override
Definition: matcher.h:457
bool Done() const final
Definition: matcher.h:657
ssize_t Priority(StateId s) final
Definition: matcher.h:1470
virtual MatcherBase< Arc > * Copy(bool safe=false) const =0
ssize_t Priority(StateId s) final
Definition: matcher.h:707
const FST & GetFst() const final
Definition: matcher.h:1472
typename FST::Arc Arc
Definition: matcher.h:593
constexpr uint64 kError
Definition: properties.h:33
uint64 Properties(uint64 inprops) const override
Definition: matcher.h:497
typename Arc::Weight Weight
Definition: matcher.h:1047
SortedMatcher(const FST *fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:173
typename Arc::Weight Weight
Definition: matcher.h:412
const FST & GetFst() const override
Definition: matcher.h:311
MultiEpsMatcher< M > * Copy(bool safe=false) const
Definition: matcher.h:1279
uint32 Flags() const
Definition: matcher.h:1564
typename Arc::Weight Weight
Definition: matcher.h:120
typename Arc::Label Label
Definition: matcher.h:118
virtual MatchType Type(bool) const =0
Weight Final(StateId s) const final
Definition: matcher.h:1468
bool Done() const
Definition: matcher.h:1292
void Next() final
Definition: matcher.h:1463
uint64 Properties(uint64 props) const
Definition: matcher.h:1319
uint64 Properties(uint64 props) const override
Definition: matcher.h:1194
SigmaMatcher(const FST *fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1079
HashMatcher(const FST &fst, MatchType match_type)
Definition: matcher.h:419
bool Done() const final
Definition: matcher.h:1459
PhiMatcher(const FST &fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:599
MatchType Type(bool test) const override
Definition: matcher.h:462
SortedMatcher< FST > * Copy(bool safe=false) const override
Definition: matcher.h:214
uint32 Flags() const override
Definition: matcher.h:973
ssize_t Priority(StateId s)
Definition: matcher.h:1327
typename Arc::Weight Weight
Definition: matcher.h:856
const Arc & Value() const
Definition: matcher.h:1552
MultiEpsMatcher(const FST &fst, MatchType match_type, uint32 flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1246
typename Arc::Label Label
Definition: matcher.h:594
MatchType Type(bool test) const override
Definition: matcher.h:908
constexpr uint64 kODeterministic
Definition: properties.h:55
typename Arc::Label Label
Definition: matcher.h:854
typename M::FST FST
Definition: matcher.h:1043
Matcher< FST > * Copy(bool safe=false) const
Definition: matcher.h:1540
typename Arc::Weight Weight
Definition: matcher.h:1516
typename Arc::Label Label
Definition: matcher.h:410
constexpr uint64 kOEpsilons
Definition: properties.h:70
typename Arc::StateId StateId
Definition: matcher.h:1046
typename Arc::Weight Weight
Definition: matcher.h:1425
void Destroy(ArcIterator< FST > *aiter, MemoryPool< ArcIterator< FST >> *pool)
Definition: fst.h:564
bool Find(Label match_label) final
Definition: matcher.h:1109
uint64 Properties(uint64 props) const override
Definition: matcher.h:796
MultiEpsMatcher(const MultiEpsMatcher< M > &matcher, bool safe=false)
Definition: matcher.h:1266
uint32 Flags() const override
Definition: matcher.h:1171
RhoMatcher(const FST *fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:888
HashMatcher(const FST *fst, MatchType match_type)
Definition: matcher.h:425
constexpr uint64 kNoIEpsilons
Definition: properties.h:67
virtual bool Done() const =0
virtual const Arc & Value() const =0
const Arc & Value() const final
Definition: matcher.h:1130
Weight Final(StateId s) const
Definition: matcher.h:1323
const FST & GetFst() const
Definition: matcher.h:1556
typename Arc::StateId StateId
Definition: matcher.h:595
typename Arc::StateId StateId
Definition: matcher.h:119