FST  openfst-1.8.2
OpenFst Library
matcher.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes to allow matching labels leaving FST states.
19 
20 #ifndef FST_MATCHER_H_
21 #define FST_MATCHER_H_
22 
23 #include <algorithm>
24 #include <cstdint>
25 #include <memory>
26 #include <tuple>
27 #include <utility>
28 
29 #include <fst/log.h>
30 
31 #include <fst/mutable-fst.h> // for all internal FST accessors.
32 
33 #include <unordered_map>
34 #include <optional>
35 
36 namespace fst {
37 
38 // Matchers find and iterate through requested labels at FST states. In the
39 // simplest form, these are just some associative map or search keyed on labels.
40 // More generally, they may implement matching special labels that represent
41 // sets of labels such as sigma (all), rho (rest), or phi (fail). The Matcher
42 // interface is:
43 //
44 // template <class F>
45 // class Matcher {
46 // public:
47 // using FST = F;
48 // using Arc = typename FST::Arc;
49 // using Label = typename Arc::Label;
50 // using StateId = typename Arc::StateId;
51 // using Weight = typename Arc::Weight;
52 //
53 // // Required constructors. Note:
54 // // -- the constructors that copy the FST arg are useful for
55 // // letting the matcher manage the FST through copies
56 // // (esp with 'safe' copies); e.g. ComposeFst depends on this.
57 // // -- the constructor that does not copy is useful when the
58 // // the FST is mutated during the lifetime of the matcher
59 // // (o.w. the matcher would have its own unmutated deep copy).
60 //
61 // // This makes a copy of the FST.
62 // Matcher(const FST &fst, MatchType type);
63 // // This doesn't copy the FST.
64 // Matcher(const FST *fst, MatchType type);
65 // // This makes a copy of the FST.
66 // // See Copy() below.
67 // Matcher(const Matcher &matcher, bool safe = false);
68 //
69 // // If safe = true, the copy is thread-safe. See Fst<>::Copy() for
70 // // further doc.
71 // Matcher *Copy(bool safe = false) const override;
72 //
73 // // Returns the match type that can be provided (depending on compatibility
74 // // of the input FST). It is either the requested match type, MATCH_NONE,
75 // // or MATCH_UNKNOWN. If test is false, a costly testing is avoided, but
76 // // MATCH_UNKNOWN may be returned. If test is true, a definite answer is
77 // // returned, but may involve more costly computation (e.g., visiting
78 // // the FST).
79 // // MatchType Type(bool test) const override;
80 //
81 // // Specifies the current state.
82 // void SetState(StateId s) final;
83 //
84 // // Finds matches to a label at the current state, returning true if a match
85 // // found. kNoLabel matches any non-consuming transitions, e.g., epsilon
86 // // transitions, which do not require a matching symbol.
87 // bool Find(Label label) final;
88 //
89 // // Iterator methods. Note that initially and after SetState() these have
90 // // undefined behavior until Find() is called.
91 //
92 // bool Done() const final;
93 //
94 // const Arc &Value() const final;
95 //
96 // void Next() final;
97 //
98 // // Returns final weight of a state.
99 // Weight Final(StateId) const final;
100 //
101 // // Indicates preference for being the side used for matching in
102 // // composition. If the value is kRequirePriority, then it is
103 // // mandatory that it be used. Calling this method without passing the
104 // // current state of the matcher invalidates the state of the matcher.
105 // ssize_t Priority(StateId s) final;
106 //
107 // // This specifies the known FST properties as viewed from this matcher. It
108 // // takes as argument the input FST's known properties.
109 // uint64_t Properties(uint64_t props) const override;
110 //
111 // // Returns matcher flags.
112 // uint32_t Flags() const override;
113 //
114 // // Returns matcher FST.
115 // const FST &GetFst() const override;
116 // };
117 
118 // Basic matcher flags.
119 
120 // Matcher needs to be used as the matching side in composition for
121 // at least one state (has kRequirePriority).
122 inline constexpr uint32_t kRequireMatch = 0x00000001;
123 
124 // Flags used for basic matchers (see also lookahead.h).
125 inline constexpr uint32_t kMatcherFlags = kRequireMatch;
126 
127 // Matcher priority that is mandatory.
128 inline constexpr ssize_t kRequirePriority = -1;
129 
130 // Matcher interface, templated on the Arc definition; used for matcher
131 // specializations that are returned by the InitMatcher FST method.
132 template <class A>
133 class MatcherBase {
134  public:
135  using Arc = A;
136  using Label = typename Arc::Label;
137  using StateId = typename Arc::StateId;
138  using Weight = typename Arc::Weight;
139 
140  virtual ~MatcherBase() {}
141 
142  // Virtual interface.
143 
144  virtual MatcherBase *Copy(bool safe = false) const = 0;
145  virtual MatchType Type(bool) const = 0;
146  virtual void SetState(StateId) = 0;
147  virtual bool Find(Label) = 0;
148  virtual bool Done() const = 0;
149  virtual const Arc &Value() const = 0;
150  virtual void Next() = 0;
151  virtual const Fst<Arc> &GetFst() const = 0;
152  virtual uint64_t Properties(uint64_t) const = 0;
153 
154  // Trivial implementations that can be used by derived classes. Full
155  // devirtualization is expected for any derived class marked final.
156  virtual uint32_t Flags() const { return 0; }
157 
158  virtual Weight Final(StateId s) const { return internal::Final(GetFst(), s); }
159 
160  virtual ssize_t Priority(StateId s) { return internal::NumArcs(GetFst(), s); }
161 };
162 
163 // A matcher that expects sorted labels on the side to be matched.
164 // If match_type == MATCH_INPUT, epsilons match the implicit self-loop
165 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
166 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
167 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
168 template <class F>
169 class SortedMatcher : public MatcherBase<typename F::Arc> {
170  public:
171  using FST = F;
172  using Arc = typename FST::Arc;
173  using Label = typename Arc::Label;
174  using StateId = typename Arc::StateId;
175  using Weight = typename Arc::Weight;
176 
179 
180  // Labels >= binary_label will be searched for by binary search;
181  // o.w. linear search is used.
182  // This makes a copy of the FST.
183  SortedMatcher(const FST &fst, MatchType match_type, Label binary_label = 1)
184  : SortedMatcher(fst.Copy(), match_type, binary_label) {
185  owned_fst_.reset(&fst_);
186  }
187 
188  // Labels >= binary_label will be searched for by binary search;
189  // o.w. linear search is used.
190  // This doesn't copy the FST.
191  SortedMatcher(const FST *fst, MatchType match_type, Label binary_label = 1)
192  : fst_(*fst),
193  state_(kNoStateId),
194  aiter_(std::nullopt),
195  match_type_(match_type),
196  binary_label_(binary_label),
197  match_label_(kNoLabel),
198  narcs_(0),
199  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
200  error_(false) {
201  switch (match_type_) {
202  case MATCH_INPUT:
203  case MATCH_NONE:
204  break;
205  case MATCH_OUTPUT:
206  std::swap(loop_.ilabel, loop_.olabel);
207  break;
208  default:
209  FSTERROR() << "SortedMatcher: Bad match type";
210  match_type_ = MATCH_NONE;
211  error_ = true;
212  }
213  }
214 
215  // This makes a copy of the FST.
216  SortedMatcher(const SortedMatcher &matcher, bool safe = false)
217  : owned_fst_(matcher.fst_.Copy(safe)),
218  fst_(*owned_fst_),
219  state_(kNoStateId),
220  aiter_(std::nullopt),
221  match_type_(matcher.match_type_),
222  binary_label_(matcher.binary_label_),
223  match_label_(kNoLabel),
224  narcs_(0),
225  loop_(matcher.loop_),
226  error_(matcher.error_) {}
227 
228  ~SortedMatcher() override = default;
229 
230  SortedMatcher *Copy(bool safe = false) const override {
231  return new SortedMatcher(*this, safe);
232  }
233 
234  MatchType Type(bool test) const override {
235  if (match_type_ == MATCH_NONE) return match_type_;
236  const auto true_prop =
237  match_type_ == MATCH_INPUT ? kILabelSorted : kOLabelSorted;
238  const auto false_prop =
239  match_type_ == MATCH_INPUT ? kNotILabelSorted : kNotOLabelSorted;
240  const auto props = fst_.Properties(true_prop | false_prop, test);
241  if (props & true_prop) {
242  return match_type_;
243  } else if (props & false_prop) {
244  return MATCH_NONE;
245  } else {
246  return MATCH_UNKNOWN;
247  }
248  }
249 
250  void SetState(StateId s) final {
251  if (state_ == s) return;
252  state_ = s;
253  if (match_type_ == MATCH_NONE) {
254  FSTERROR() << "SortedMatcher: Bad match type";
255  error_ = true;
256  }
257  aiter_.emplace(fst_, s);
258  aiter_->SetFlags(kArcNoCache, kArcNoCache);
259  narcs_ = internal::NumArcs(fst_, s);
260  loop_.nextstate = s;
261  }
262 
263  bool Find(Label match_label) final {
264  exact_match_ = true;
265  if (error_) {
266  current_loop_ = false;
267  match_label_ = kNoLabel;
268  return false;
269  }
270  current_loop_ = match_label == 0;
271  match_label_ = match_label == kNoLabel ? 0 : match_label;
272  if (Search()) {
273  return true;
274  } else {
275  return current_loop_;
276  }
277  }
278 
279  // Positions matcher to the first position where inserting match_label would
280  // maintain the sort order.
281  void LowerBound(Label label) {
282  exact_match_ = false;
283  current_loop_ = false;
284  if (error_) {
285  match_label_ = kNoLabel;
286  return;
287  }
288  match_label_ = label;
289  Search();
290  }
291 
292  // After Find(), returns false if no more exact matches.
293  // After LowerBound(), returns false if no more arcs.
294  bool Done() const final {
295  if (current_loop_) return false;
296  if (aiter_->Done()) return true;
297  if (!exact_match_) return false;
298  aiter_->SetFlags(
299  match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
301  return GetLabel() != match_label_;
302  }
303 
304  const Arc &Value() const final {
305  if (current_loop_) return loop_;
306  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
307  return aiter_->Value();
308  }
309 
310  void Next() final {
311  if (current_loop_) {
312  current_loop_ = false;
313  } else {
314  aiter_->Next();
315  }
316  }
317 
318  Weight Final(StateId s) const final { return MatcherBase<Arc>::Final(s); }
319 
320  ssize_t Priority(StateId s) final { return MatcherBase<Arc>::Priority(s); }
321 
322  const FST &GetFst() const override { return fst_; }
323 
324  uint64_t Properties(uint64_t inprops) const override {
325  return inprops | (error_ ? kError : 0);
326  }
327 
328  size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
329 
330  private:
331  Label GetLabel() const {
332  const auto &arc = aiter_->Value();
333  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
334  }
335 
336  bool BinarySearch();
337  bool LinearSearch();
338  bool Search();
339 
340  std::unique_ptr<const FST> owned_fst_; // FST ptr if owned.
341  const FST &fst_; // FST for matching.
342  StateId state_; // Matcher state.
343  mutable std::optional<ArcIterator<FST>>
344  aiter_; // Iterator for current state.
345  MatchType match_type_; // Type of match to perform.
346  Label binary_label_; // Least label for binary search.
347  Label match_label_; // Current label to be matched.
348  size_t narcs_; // Current state arc count.
349  Arc loop_; // For non-consuming symbols.
350  bool current_loop_; // Current arc is the implicit loop.
351  bool exact_match_; // Exact match or lower bound?
352  bool error_; // Error encountered?
353 };
354 
355 // Returns true iff match to match_label_. The arc iterator is positioned at the
356 // lower bound, that is, the first element greater than or equal to
357 // match_label_, or the end if all elements are less than match_label_.
358 // If multiple elements are equal to the `match_label_`, returns the rightmost
359 // one.
360 template <class FST>
361 inline bool SortedMatcher<FST>::BinarySearch() {
362  size_t size = narcs_;
363  if (size == 0) {
364  return false;
365  }
366  size_t high = size - 1;
367  while (size > 1) {
368  const size_t half = size / 2;
369  const size_t mid = high - half;
370  aiter_->Seek(mid);
371  if (GetLabel() >= match_label_) {
372  high = mid;
373  }
374  size -= half;
375  }
376  aiter_->Seek(high);
377  const auto label = GetLabel();
378  if (label == match_label_) {
379  return true;
380  }
381  if (label < match_label_) {
382  aiter_->Next();
383  }
384  return false;
385 }
386 
387 // Returns true iff match to match_label_, positioning arc iterator at lower
388 // bound.
389 template <class FST>
390 inline bool SortedMatcher<FST>::LinearSearch() {
391  for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
392  const auto label = GetLabel();
393  if (label == match_label_) return true;
394  if (label > match_label_) break;
395  }
396  return false;
397 }
398 
399 // Returns true iff match to match_label_, positioning arc iterator at lower
400 // bound.
401 template <class FST>
402 inline bool SortedMatcher<FST>::Search() {
403  aiter_->SetFlags(
404  match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
406  if (match_label_ >= binary_label_) {
407  return BinarySearch();
408  } else {
409  return LinearSearch();
410  }
411 }
412 
413 // A matcher that stores labels in a per-state hash table populated upon the
414 // first visit to that state. Sorting is not required. Treatment of
415 // epsilons are the same as with SortedMatcher.
416 template <class F>
417 class HashMatcher : public MatcherBase<typename F::Arc> {
418  public:
419  using FST = F;
420  using Arc = typename FST::Arc;
421  using Label = typename Arc::Label;
422  using StateId = typename Arc::StateId;
423  using Weight = typename Arc::Weight;
424 
428 
429  // This makes a copy of the FST.
430  HashMatcher(const FST &fst, MatchType match_type)
431  : HashMatcher(fst.Copy(), match_type) {
432  owned_fst_.reset(&fst_);
433  }
434 
435  // This doesn't copy the FST.
436  HashMatcher(const FST *fst, MatchType match_type)
437  : fst_(*fst),
438  state_(kNoStateId),
439  match_type_(match_type),
440  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
441  error_(false),
442  state_table_(std::make_shared<StateTable>()) {
443  switch (match_type_) {
444  case MATCH_INPUT:
445  case MATCH_NONE:
446  break;
447  case MATCH_OUTPUT:
448  std::swap(loop_.ilabel, loop_.olabel);
449  break;
450  default:
451  FSTERROR() << "HashMatcher: Bad match type";
452  match_type_ = MATCH_NONE;
453  error_ = true;
454  }
455  }
456 
457  // This makes a copy of the FST.
458  HashMatcher(const HashMatcher &matcher, bool safe = false)
459  : owned_fst_(matcher.fst_.Copy(safe)),
460  fst_(*owned_fst_),
461  state_(kNoStateId),
462  match_type_(matcher.match_type_),
463  loop_(matcher.loop_),
464  error_(matcher.error_),
465  state_table_(safe ? std::make_shared<StateTable>()
466  : matcher.state_table_) {}
467 
468  HashMatcher *Copy(bool safe = false) const override {
469  return new HashMatcher(*this, safe);
470  }
471 
472  // The argument is ignored as there are no relevant properties to test.
473  MatchType Type(bool test) const override { return match_type_; }
474 
475  void SetState(StateId s) final;
476 
477  bool Find(Label label) final {
478  current_loop_ = label == 0;
479  if (label == 0) {
480  Search(label);
481  return true;
482  }
483  if (label == kNoLabel) label = 0;
484  return Search(label);
485  }
486 
487  bool Done() const final {
488  if (current_loop_) return false;
489  return label_it_ == label_end_;
490  }
491 
492  const Arc &Value() const final {
493  if (current_loop_) return loop_;
494  aiter_->Seek(label_it_->second);
495  return aiter_->Value();
496  }
497 
498  void Next() final {
499  if (current_loop_) {
500  current_loop_ = false;
501  } else {
502  ++label_it_;
503  }
504  }
505 
506  const FST &GetFst() const override { return fst_; }
507 
508  uint64_t Properties(uint64_t inprops) const override {
509  return inprops | (error_ ? kError : 0);
510  }
511 
512  private:
513  Label GetLabel() const {
514  const auto &arc = aiter_->Value();
515  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
516  }
517 
518  bool Search(Label match_label);
519 
520  using LabelTable = std::unordered_multimap<Label, size_t>;
521  using StateTable = std::unordered_map<StateId, std::unique_ptr<LabelTable>>;
522 
523  std::unique_ptr<const FST> owned_fst_; // ptr to FST if owned.
524  const FST &fst_; // FST for matching.
525  StateId state_; // Matcher state.
526  MatchType match_type_;
527  Arc loop_; // The implicit loop itself.
528  bool current_loop_; // Is the current arc the implicit loop?
529  bool error_; // Error encountered?
530  std::unique_ptr<ArcIterator<FST>> aiter_;
531  std::shared_ptr<StateTable> state_table_; // Table from state to label table.
532  LabelTable *label_table_; // Pointer to current state's label table.
533  typename LabelTable::iterator label_it_; // Position for label.
534  typename LabelTable::iterator label_end_; // Position for last label + 1.
535 };
536 
537 template <class FST>
538 void HashMatcher<FST>::SetState(typename FST::Arc::StateId s) {
539  if (state_ == s) return;
540  // Resets everything for the state.
541  state_ = s;
542  loop_.nextstate = state_;
543  aiter_ = std::make_unique<ArcIterator<FST>>(fst_, state_);
544  if (match_type_ == MATCH_NONE) {
545  FSTERROR() << "HashMatcher: Bad match type";
546  error_ = true;
547  }
548  // Attempts to insert a new label table.
549  const auto &[it, success] =
550  state_table_->emplace(state_, std::make_unique<LabelTable>());
551  // Sets instance's pointer to the label table for this state.
552  label_table_ = it->second.get();
553  // If it already exists, no additional work is done and we simply return.
554  if (!success) return;
555  // Otherwise, populate this new table.
556  // Populates the label table.
557  label_table_->reserve(internal::NumArcs(fst_, state_));
558  const auto aiter_flags =
559  (match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue) |
560  kArcNoCache;
561  aiter_->SetFlags(aiter_flags, kArcFlags);
562  for (; !aiter_->Done(); aiter_->Next()) {
563  label_table_->emplace(GetLabel(), aiter_->Position());
564  }
565  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
566 }
567 
568 template <class FST>
569 inline bool HashMatcher<FST>::Search(typename FST::Arc::Label match_label) {
570  std::tie(label_it_, label_end_) = label_table_->equal_range(match_label);
571  if (label_it_ == label_end_) return false;
572  aiter_->Seek(label_it_->second);
573  return true;
574 }
575 
576 // Specifies whether we rewrite both the input and output sides during matching.
578  MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
581 };
582 
583 // For any requested label that doesn't match at a state, this matcher
584 // considers the *unique* transition that matches the label 'phi_label'
585 // (phi = 'fail'), and recursively looks for a match at its
586 // destination. When 'phi_loop' is true, if no match is found but a
587 // phi self-loop is found, then the phi transition found is returned
588 // with the phi_label rewritten as the requested label (both sides if
589 // an acceptor, or if 'rewrite_both' is true and both input and output
590 // labels of the found transition are 'phi_label'). If 'phi_label' is
591 // kNoLabel, this special matching is not done. PhiMatcher is
592 // templated itself on a matcher, which is used to perform the
593 // underlying matching. By default, the underlying matcher is
594 // constructed by PhiMatcher. The user can instead pass in this
595 // object; in that case, PhiMatcher takes its ownership.
596 // Phi non-determinism not supported. No non-consuming symbols other
597 // than epsilon supported with the underlying template argument matcher.
598 template <class M>
599 class PhiMatcher : public MatcherBase<typename M::Arc> {
600  public:
601  using FST = typename M::FST;
602  using Arc = typename FST::Arc;
603  using Label = typename Arc::Label;
604  using StateId = typename Arc::StateId;
605  using Weight = typename Arc::Weight;
606 
607  // This makes a copy of the FST (w/o 'matcher' arg).
608  PhiMatcher(const FST &fst, MatchType match_type, Label phi_label = kNoLabel,
609  bool phi_loop = true,
611  M *matcher = nullptr)
612  : matcher_(matcher ? matcher : new M(fst, match_type)),
613  match_type_(match_type),
614  phi_label_(phi_label),
615  state_(kNoStateId),
616  phi_loop_(phi_loop),
617  error_(false) {
618  if (match_type == MATCH_BOTH) {
619  FSTERROR() << "PhiMatcher: Bad match type";
620  match_type_ = MATCH_NONE;
621  error_ = true;
622  }
623  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
624  rewrite_both_ = fst.Properties(kAcceptor, true);
625  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
626  rewrite_both_ = true;
627  } else {
628  rewrite_both_ = false;
629  }
630  }
631 
632  // This doesn't copy the FST.
633  PhiMatcher(const FST *fst, MatchType match_type, Label phi_label = kNoLabel,
634  bool phi_loop = true,
636  M *matcher = nullptr)
637  : PhiMatcher(*fst, match_type, phi_label, phi_loop, rewrite_mode,
638  matcher ? matcher : new M(fst, match_type)) {}
639 
640  // This makes a copy of the FST.
641  PhiMatcher(const PhiMatcher &matcher, bool safe = false)
642  : matcher_(new M(*matcher.matcher_, safe)),
643  match_type_(matcher.match_type_),
644  phi_label_(matcher.phi_label_),
645  rewrite_both_(matcher.rewrite_both_),
646  state_(kNoStateId),
647  phi_loop_(matcher.phi_loop_),
648  error_(matcher.error_) {}
649 
650  PhiMatcher *Copy(bool safe = false) const override {
651  return new PhiMatcher(*this, safe);
652  }
653 
654  MatchType Type(bool test) const override { return matcher_->Type(test); }
655 
656  void SetState(StateId s) final {
657  if (state_ == s) return;
658  matcher_->SetState(s);
659  state_ = s;
660  has_phi_ = phi_label_ != kNoLabel;
661  }
662 
663  bool Find(Label match_label) final;
664 
665  bool Done() const final { return matcher_->Done(); }
666 
667  const Arc &Value() const final {
668  if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
669  return matcher_->Value();
670  } else if (phi_match_ == 0) { // Virtual epsilon loop.
671  phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
672  if (match_type_ == MATCH_OUTPUT) {
673  std::swap(phi_arc_.ilabel, phi_arc_.olabel);
674  }
675  return phi_arc_;
676  } else {
677  phi_arc_ = matcher_->Value();
678  phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
679  if (phi_match_ != kNoLabel) { // Phi loop match.
680  if (rewrite_both_) {
681  if (phi_arc_.ilabel == phi_label_) phi_arc_.ilabel = phi_match_;
682  if (phi_arc_.olabel == phi_label_) phi_arc_.olabel = phi_match_;
683  } else if (match_type_ == MATCH_INPUT) {
684  phi_arc_.ilabel = phi_match_;
685  } else {
686  phi_arc_.olabel = phi_match_;
687  }
688  }
689  return phi_arc_;
690  }
691  }
692 
693  void Next() final { matcher_->Next(); }
694 
695  Weight Final(StateId s) const final {
696  auto weight = matcher_->Final(s);
697  if (phi_label_ == kNoLabel || weight != Weight::Zero()) {
698  return weight;
699  }
700  weight = Weight::One();
701  matcher_->SetState(s);
702  while (matcher_->Final(s) == Weight::Zero()) {
703  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) break;
704  weight = Times(weight, matcher_->Value().weight);
705  if (s == matcher_->Value().nextstate) {
706  return Weight::Zero(); // Does not follow phi self-loops.
707  }
708  s = matcher_->Value().nextstate;
709  matcher_->SetState(s);
710  }
711  weight = Times(weight, matcher_->Final(s));
712  return weight;
713  }
714 
715  ssize_t Priority(StateId s) final {
716  if (phi_label_ != kNoLabel) {
717  matcher_->SetState(s);
718  const bool has_phi = matcher_->Find(phi_label_ == 0 ? -1 : phi_label_);
719  return has_phi ? kRequirePriority : matcher_->Priority(s);
720  } else {
721  return matcher_->Priority(s);
722  }
723  }
724 
725  const FST &GetFst() const override { return matcher_->GetFst(); }
726 
727  uint64_t Properties(uint64_t props) const override;
728 
729  uint32_t Flags() const override {
730  if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) {
731  return matcher_->Flags();
732  }
733  return matcher_->Flags() | kRequireMatch;
734  }
735 
736  Label PhiLabel() const { return phi_label_; }
737 
738  private:
739  mutable std::unique_ptr<M> matcher_;
740  MatchType match_type_; // Type of match requested.
741  Label phi_label_; // Label that represents the phi transition.
742  bool rewrite_both_; // Rewrite both sides when both are phi_label_?
743  bool has_phi_; // Are there possibly phis at the current state?
744  Label phi_match_; // Current label that matches phi loop.
745  mutable Arc phi_arc_; // Arc to return.
746  StateId state_; // Matcher state.
747  Weight phi_weight_; // Product of the weights of phi transitions taken.
748  bool phi_loop_; // When true, phi self-loop are allowed and treated
749  // as rho (required for Aho-Corasick).
750  bool error_; // Error encountered?
751 
752  PhiMatcher &operator=(const PhiMatcher &) = delete;
753 };
754 
755 template <class M>
756 inline bool PhiMatcher<M>::Find(Label label) {
757  if (label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
758  FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
759  error_ = true;
760  return false;
761  }
762  matcher_->SetState(state_);
763  phi_match_ = kNoLabel;
764  phi_weight_ = Weight::One();
765  // If phi_label_ == 0, there are no more true epsilon arcs.
766  if (phi_label_ == 0) {
767  if (label == kNoLabel) {
768  return false;
769  }
770  if (label == 0) { // but a virtual epsilon loop needs to be returned.
771  if (!matcher_->Find(kNoLabel)) {
772  return matcher_->Find(0);
773  } else {
774  phi_match_ = 0;
775  return true;
776  }
777  }
778  }
779  if (!has_phi_ || label == 0 || label == kNoLabel) {
780  return matcher_->Find(label);
781  }
782  auto s = state_;
783  while (!matcher_->Find(label)) {
784  // Look for phi transition (if phi_label_ == 0, we need to look
785  // for -1 to avoid getting the virtual self-loop)
786  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) return false;
787  if (phi_loop_ && matcher_->Value().nextstate == s) {
788  phi_match_ = label;
789  return true;
790  }
791  phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
792  s = matcher_->Value().nextstate;
793  matcher_->Next();
794  if (!matcher_->Done()) {
795  FSTERROR() << "PhiMatcher: Phi non-determinism not supported";
796  error_ = true;
797  }
798  matcher_->SetState(s);
799  }
800  return true;
801 }
802 
803 template <class M>
804 inline uint64_t PhiMatcher<M>::Properties(uint64_t inprops) const {
805  auto outprops = matcher_->Properties(inprops);
806  if (error_) outprops |= kError;
807  if (match_type_ == MATCH_NONE) {
808  return outprops;
809  } else if (match_type_ == MATCH_INPUT) {
810  if (phi_label_ == 0) {
811  outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons);
812  outprops |= kNoEpsilons | kNoIEpsilons;
813  }
814  if (rewrite_both_) {
815  return outprops &
818  } else {
819  return outprops &
822  }
823  } else if (match_type_ == MATCH_OUTPUT) {
824  if (phi_label_ == 0) {
825  outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons);
826  outprops |= kNoEpsilons | kNoOEpsilons;
827  }
828  if (rewrite_both_) {
829  return outprops &
832  } else {
833  return outprops &
836  }
837  } else {
838  // Shouldn't ever get here.
839  FSTERROR() << "PhiMatcher: Bad match type: " << match_type_;
840  return 0;
841  }
842 }
843 
844 // For any requested label that doesn't match at a state, this matcher
845 // considers all transitions that match the label 'rho_label' (rho =
846 // 'rest'). Each such rho transition found is returned with the
847 // rho_label rewritten as the requested label (both sides if an
848 // acceptor, or if 'rewrite_both' is true and both input and output
849 // labels of the found transition are 'rho_label'). If 'rho_label' is
850 // kNoLabel, this special matching is not done. RhoMatcher is
851 // templated itself on a matcher, which is used to perform the
852 // underlying matching. By default, the underlying matcher is
853 // constructed by RhoMatcher. The user can instead pass in this
854 // object; in that case, RhoMatcher takes its ownership.
855 // No non-consuming symbols other than epsilon supported with
856 // the underlying template argument matcher.
857 template <class M>
858 class RhoMatcher : public MatcherBase<typename M::Arc> {
859  public:
860  using FST = typename M::FST;
861  using Arc = typename FST::Arc;
862  using Label = typename Arc::Label;
863  using StateId = typename Arc::StateId;
864  using Weight = typename Arc::Weight;
865 
866  // This makes a copy of the FST (w/o 'matcher' arg).
867  RhoMatcher(const FST &fst, MatchType match_type, Label rho_label = kNoLabel,
869  M *matcher = nullptr)
870  : matcher_(matcher ? matcher : new M(fst, match_type)),
871  match_type_(match_type),
872  rho_label_(rho_label),
873  error_(false),
874  state_(kNoStateId),
875  has_rho_(false) {
876  if (match_type == MATCH_BOTH) {
877  FSTERROR() << "RhoMatcher: Bad match type";
878  match_type_ = MATCH_NONE;
879  error_ = true;
880  }
881  if (rho_label == 0) {
882  FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
883  rho_label_ = kNoLabel;
884  error_ = true;
885  }
886  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
887  rewrite_both_ = fst.Properties(kAcceptor, true);
888  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
889  rewrite_both_ = true;
890  } else {
891  rewrite_both_ = false;
892  }
893  }
894 
895  // This doesn't copy the FST.
896  RhoMatcher(const FST *fst, MatchType match_type, Label rho_label = kNoLabel,
898  M *matcher = nullptr)
899  : RhoMatcher(*fst, match_type, rho_label, rewrite_mode,
900  matcher ? matcher : new M(fst, match_type)) {}
901 
902  // This makes a copy of the FST.
903  RhoMatcher(const RhoMatcher &matcher, bool safe = false)
904  : matcher_(new M(*matcher.matcher_, safe)),
905  match_type_(matcher.match_type_),
906  rho_label_(matcher.rho_label_),
907  rewrite_both_(matcher.rewrite_both_),
908  error_(matcher.error_),
909  state_(kNoStateId),
910  has_rho_(false) {}
911 
912  RhoMatcher *Copy(bool safe = false) const override {
913  return new RhoMatcher(*this, safe);
914  }
915 
916  MatchType Type(bool test) const override { return matcher_->Type(test); }
917 
918  void SetState(StateId s) final {
919  if (state_ == s) return;
920  state_ = s;
921  matcher_->SetState(s);
922  has_rho_ = rho_label_ != kNoLabel;
923  }
924 
925  bool Find(Label label) final {
926  if (label == rho_label_ && rho_label_ != kNoLabel) {
927  FSTERROR() << "RhoMatcher::Find: bad label (rho)";
928  error_ = true;
929  return false;
930  }
931  if (matcher_->Find(label)) {
932  rho_match_ = kNoLabel;
933  return true;
934  } else if (has_rho_ && label != 0 && label != kNoLabel &&
935  (has_rho_ = matcher_->Find(rho_label_))) {
936  rho_match_ = label;
937  return true;
938  } else {
939  return false;
940  }
941  }
942 
943  bool Done() const final { return matcher_->Done(); }
944 
945  const Arc &Value() const final {
946  if (rho_match_ == kNoLabel) {
947  return matcher_->Value();
948  } else {
949  rho_arc_ = matcher_->Value();
950  if (rewrite_both_) {
951  if (rho_arc_.ilabel == rho_label_) rho_arc_.ilabel = rho_match_;
952  if (rho_arc_.olabel == rho_label_) rho_arc_.olabel = rho_match_;
953  } else if (match_type_ == MATCH_INPUT) {
954  rho_arc_.ilabel = rho_match_;
955  } else {
956  rho_arc_.olabel = rho_match_;
957  }
958  return rho_arc_;
959  }
960  }
961 
962  void Next() final { matcher_->Next(); }
963 
964  Weight Final(StateId s) const final { return matcher_->Final(s); }
965 
966  ssize_t Priority(StateId s) final {
967  state_ = s;
968  matcher_->SetState(s);
969  has_rho_ = matcher_->Find(rho_label_);
970  if (has_rho_) {
971  return kRequirePriority;
972  } else {
973  return matcher_->Priority(s);
974  }
975  }
976 
977  const FST &GetFst() const override { return matcher_->GetFst(); }
978 
979  uint64_t Properties(uint64_t props) const override;
980 
981  uint32_t Flags() const override {
982  if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) {
983  return matcher_->Flags();
984  }
985  return matcher_->Flags() | kRequireMatch;
986  }
987 
988  Label RhoLabel() const { return rho_label_; }
989 
990  private:
991  std::unique_ptr<M> matcher_;
992  MatchType match_type_; // Type of match requested.
993  Label rho_label_; // Label that represents the rho transition
994  bool rewrite_both_; // Rewrite both sides when both are rho_label_?
995  Label rho_match_; // Current label that matches rho transition.
996  mutable Arc rho_arc_; // Arc to return when rho match.
997  bool error_; // Error encountered?
998  StateId state_; // Matcher state.
999  bool has_rho_; // Are there possibly rhos at the current state?
1000 };
1001 
1002 template <class M>
1003 inline uint64_t RhoMatcher<M>::Properties(uint64_t inprops) const {
1004  auto outprops = matcher_->Properties(inprops);
1005  if (error_) outprops |= kError;
1006  if (match_type_ == MATCH_NONE) {
1007  return outprops;
1008  } else if (match_type_ == MATCH_INPUT) {
1009  if (rewrite_both_) {
1010  return outprops &
1013  } else {
1014  return outprops & ~(kODeterministic | kAcceptor | kString |
1016  }
1017  } else if (match_type_ == MATCH_OUTPUT) {
1018  if (rewrite_both_) {
1019  return outprops &
1022  } else {
1023  return outprops & ~(kIDeterministic | kAcceptor | kString |
1025  }
1026  } else {
1027  // Shouldn't ever get here.
1028  FSTERROR() << "RhoMatcher: Bad match type: " << match_type_;
1029  return 0;
1030  }
1031 }
1032 
1033 // For any requested label, this matcher considers all transitions
1034 // that match the label 'sigma_label' (sigma = "any"), and this in
1035 // additions to transitions with the requested label. Each such sigma
1036 // transition found is returned with the sigma_label rewritten as the
1037 // requested label (both sides if an acceptor, or if 'rewrite_both' is
1038 // true and both input and output labels of the found transition are
1039 // 'sigma_label'). If 'sigma_label' is kNoLabel, this special
1040 // matching is not done. SigmaMatcher is templated itself on a
1041 // matcher, which is used to perform the underlying matching. By
1042 // default, the underlying matcher is constructed by SigmaMatcher.
1043 // The user can instead pass in this object; in that case,
1044 // SigmaMatcher takes its ownership. No non-consuming symbols other
1045 // than epsilon supported with the underlying template argument matcher.
1046 template <class M>
1047 class SigmaMatcher : public MatcherBase<typename M::Arc> {
1048  public:
1049  using FST = typename M::FST;
1050  using Arc = typename FST::Arc;
1051  using Label = typename Arc::Label;
1052  using StateId = typename Arc::StateId;
1053  using Weight = typename Arc::Weight;
1054 
1055  // This makes a copy of the FST (w/o 'matcher' arg).
1056  SigmaMatcher(const FST &fst, MatchType match_type,
1057  Label sigma_label = kNoLabel,
1059  M *matcher = nullptr)
1060  : matcher_(matcher ? matcher : new M(fst, match_type)),
1061  match_type_(match_type),
1062  sigma_label_(sigma_label),
1063  error_(false),
1064  state_(kNoStateId) {
1065  if (match_type == MATCH_BOTH) {
1066  FSTERROR() << "SigmaMatcher: Bad match type";
1067  match_type_ = MATCH_NONE;
1068  error_ = true;
1069  }
1070  if (sigma_label == 0) {
1071  FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
1072  sigma_label_ = kNoLabel;
1073  error_ = true;
1074  }
1075  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
1076  rewrite_both_ = fst.Properties(kAcceptor, true);
1077  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
1078  rewrite_both_ = true;
1079  } else {
1080  rewrite_both_ = false;
1081  }
1082  }
1083 
1084  // This doesn't copy the FST.
1085  SigmaMatcher(const FST *fst, MatchType match_type,
1086  Label sigma_label = kNoLabel,
1088  M *matcher = nullptr)
1089  : SigmaMatcher(*fst, match_type, sigma_label, rewrite_mode,
1090  matcher ? matcher : new M(fst, match_type)) {}
1091 
1092  // This makes a copy of the FST.
1093  SigmaMatcher(const SigmaMatcher &matcher, bool safe = false)
1094  : matcher_(new M(*matcher.matcher_, safe)),
1095  match_type_(matcher.match_type_),
1096  sigma_label_(matcher.sigma_label_),
1097  rewrite_both_(matcher.rewrite_both_),
1098  error_(matcher.error_),
1099  state_(kNoStateId) {}
1100 
1101  SigmaMatcher *Copy(bool safe = false) const override {
1102  return new SigmaMatcher(*this, safe);
1103  }
1104 
1105  MatchType Type(bool test) const override { return matcher_->Type(test); }
1106 
1107  void SetState(StateId s) final {
1108  if (state_ == s) return;
1109  state_ = s;
1110  matcher_->SetState(s);
1111  has_sigma_ =
1112  (sigma_label_ != kNoLabel) ? matcher_->Find(sigma_label_) : false;
1113  }
1114 
1115  bool Find(Label match_label) final {
1116  match_label_ = match_label;
1117  if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
1118  FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
1119  error_ = true;
1120  return false;
1121  }
1122  if (matcher_->Find(match_label)) {
1123  sigma_match_ = kNoLabel;
1124  return true;
1125  } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
1126  matcher_->Find(sigma_label_)) {
1127  sigma_match_ = match_label;
1128  return true;
1129  } else {
1130  return false;
1131  }
1132  }
1133 
1134  bool Done() const final { return matcher_->Done(); }
1135 
1136  const Arc &Value() const final {
1137  if (sigma_match_ == kNoLabel) {
1138  return matcher_->Value();
1139  } else {
1140  sigma_arc_ = matcher_->Value();
1141  if (rewrite_both_) {
1142  if (sigma_arc_.ilabel == sigma_label_) sigma_arc_.ilabel = sigma_match_;
1143  if (sigma_arc_.olabel == sigma_label_) sigma_arc_.olabel = sigma_match_;
1144  } else if (match_type_ == MATCH_INPUT) {
1145  sigma_arc_.ilabel = sigma_match_;
1146  } else {
1147  sigma_arc_.olabel = sigma_match_;
1148  }
1149  return sigma_arc_;
1150  }
1151  }
1152 
1153  void Next() final {
1154  matcher_->Next();
1155  if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
1156  (match_label_ > 0)) {
1157  matcher_->Find(sigma_label_);
1158  sigma_match_ = match_label_;
1159  }
1160  }
1161 
1162  Weight Final(StateId s) const final { return matcher_->Final(s); }
1163 
1164  ssize_t Priority(StateId s) final {
1165  if (sigma_label_ != kNoLabel) {
1166  SetState(s);
1167  return has_sigma_ ? kRequirePriority : matcher_->Priority(s);
1168  } else {
1169  return matcher_->Priority(s);
1170  }
1171  }
1172 
1173  const FST &GetFst() const override { return matcher_->GetFst(); }
1174 
1175  uint64_t Properties(uint64_t props) const override;
1176 
1177  uint32_t Flags() const override {
1178  if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) {
1179  return matcher_->Flags();
1180  }
1181  return matcher_->Flags() | kRequireMatch;
1182  }
1183 
1184  Label SigmaLabel() const { return sigma_label_; }
1185 
1186  private:
1187  std::unique_ptr<M> matcher_;
1188  MatchType match_type_; // Type of match requested.
1189  Label sigma_label_; // Label that represents the sigma transition.
1190  bool rewrite_both_; // Rewrite both sides when both are sigma_label_?
1191  bool has_sigma_; // Are there sigmas at the current state?
1192  Label sigma_match_; // Current label that matches sigma transition.
1193  mutable Arc sigma_arc_; // Arc to return when sigma match.
1194  Label match_label_; // Label being matched.
1195  bool error_; // Error encountered?
1196  StateId state_; // Matcher state.
1197 };
1198 
1199 template <class M>
1200 inline uint64_t SigmaMatcher<M>::Properties(uint64_t inprops) const {
1201  auto outprops = matcher_->Properties(inprops);
1202  if (error_) outprops |= kError;
1203  if (match_type_ == MATCH_NONE) {
1204  return outprops;
1205  } else if (rewrite_both_) {
1206  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1209  } else if (match_type_ == MATCH_INPUT) {
1210  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1212  kString | kAcceptor);
1213  } else if (match_type_ == MATCH_OUTPUT) {
1214  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1216  kString | kAcceptor);
1217  } else {
1218  // Shouldn't ever get here.
1219  FSTERROR() << "SigmaMatcher: Bad match type: " << match_type_;
1220  return 0;
1221  }
1222 }
1223 
1224 // Flags for MultiEpsMatcher.
1225 
1226 // Return multi-epsilon arcs for Find(kNoLabel).
1227 inline constexpr uint32_t kMultiEpsList = 0x00000001;
1228 
1229 // Return a kNolabel loop for Find(multi_eps).
1230 inline constexpr uint32_t kMultiEpsLoop = 0x00000002;
1231 
1232 // MultiEpsMatcher: allows treating multiple non-0 labels as
1233 // non-consuming labels in addition to 0 that is always
1234 // non-consuming. Precise behavior controlled by 'flags' argument. By
1235 // default, the underlying matcher is constructed by
1236 // MultiEpsMatcher. The user can instead pass in this object; in that
1237 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
1238 // true.
1239 template <class M>
1241  public:
1242  using FST = typename M::FST;
1243  using Arc = typename FST::Arc;
1244  using Label = typename Arc::Label;
1245  using StateId = typename Arc::StateId;
1246  using Weight = typename Arc::Weight;
1247 
1248  // This makes a copy of the FST (w/o 'matcher' arg).
1249  MultiEpsMatcher(const FST &fst, MatchType match_type,
1250  uint32_t flags = (kMultiEpsLoop | kMultiEpsList),
1251  M *matcher = nullptr, bool own_matcher = true)
1252  : matcher_(matcher ? matcher : new M(fst, match_type)),
1253  flags_(flags),
1254  own_matcher_(matcher ? own_matcher : true) {
1255  Init(match_type);
1256  }
1257 
1258  // This doesn't copy the FST.
1259  MultiEpsMatcher(const FST *fst, MatchType match_type,
1260  uint32_t flags = (kMultiEpsLoop | kMultiEpsList),
1261  M *matcher = nullptr, bool own_matcher = true)
1262  : matcher_(matcher ? matcher : new M(fst, match_type)),
1263  flags_(flags),
1264  own_matcher_(matcher ? own_matcher : true) {
1265  Init(match_type);
1266  }
1267 
1268  // This makes a copy of the FST.
1269  MultiEpsMatcher(const MultiEpsMatcher &matcher, bool safe = false)
1270  : matcher_(new M(*matcher.matcher_, safe)),
1271  flags_(matcher.flags_),
1272  own_matcher_(true),
1273  multi_eps_labels_(matcher.multi_eps_labels_),
1274  loop_(matcher.loop_) {
1275  loop_.nextstate = kNoStateId;
1276  }
1277 
1279  if (own_matcher_) delete matcher_;
1280  }
1281 
1282  MultiEpsMatcher *Copy(bool safe = false) const {
1283  return new MultiEpsMatcher(*this, safe);
1284  }
1285 
1286  MatchType Type(bool test) const { return matcher_->Type(test); }
1287 
1288  void SetState(StateId state) {
1289  matcher_->SetState(state);
1290  loop_.nextstate = state;
1291  }
1292 
1293  bool Find(Label label);
1294 
1295  bool Done() const { return done_; }
1296 
1297  const Arc &Value() const { return current_loop_ ? loop_ : matcher_->Value(); }
1298 
1299  void Next() {
1300  if (!current_loop_) {
1301  matcher_->Next();
1302  done_ = matcher_->Done();
1303  if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
1304  ++multi_eps_iter_;
1305  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1306  !matcher_->Find(*multi_eps_iter_)) {
1307  ++multi_eps_iter_;
1308  }
1309  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1310  done_ = false;
1311  } else {
1312  done_ = !matcher_->Find(kNoLabel);
1313  }
1314  }
1315  } else {
1316  done_ = true;
1317  }
1318  }
1319 
1320  const FST &GetFst() const { return matcher_->GetFst(); }
1321 
1322  uint64_t Properties(uint64_t props) const {
1323  return matcher_->Properties(props);
1324  }
1325 
1326  const M *GetMatcher() const { return matcher_; }
1327 
1328  Weight Final(StateId s) const { return matcher_->Final(s); }
1329 
1330  uint32_t Flags() const { return matcher_->Flags(); }
1331 
1332  ssize_t Priority(StateId s) { return matcher_->Priority(s); }
1333 
1334  void AddMultiEpsLabel(Label label) {
1335  if (label == 0) {
1336  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1337  } else {
1338  multi_eps_labels_.Insert(label);
1339  }
1340  }
1341 
1343  if (label == 0) {
1344  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1345  } else {
1346  multi_eps_labels_.Erase(label);
1347  }
1348  }
1349 
1350  void ClearMultiEpsLabels() { multi_eps_labels_.Clear(); }
1351 
1352  private:
1353  void Init(MatchType match_type) {
1354  if (match_type == MATCH_INPUT) {
1355  loop_.ilabel = kNoLabel;
1356  loop_.olabel = 0;
1357  } else {
1358  loop_.ilabel = 0;
1359  loop_.olabel = kNoLabel;
1360  }
1361  loop_.weight = Weight::One();
1362  loop_.nextstate = kNoStateId;
1363  }
1364 
1365  M *matcher_;
1366  uint32_t flags_;
1367  bool own_matcher_; // Does this class delete the matcher?
1368 
1369  // Multi-eps label set.
1370  CompactSet<Label, kNoLabel> multi_eps_labels_;
1371  typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
1372 
1373  bool current_loop_; // Current arc is the implicit loop?
1374  mutable Arc loop_; // For non-consuming symbols.
1375  bool done_; // Matching done?
1376 
1377  MultiEpsMatcher &operator=(const MultiEpsMatcher &) = delete;
1378 };
1379 
1380 template <class M>
1381 inline bool MultiEpsMatcher<M>::Find(Label label) {
1382  multi_eps_iter_ = multi_eps_labels_.End();
1383  current_loop_ = false;
1384  bool ret;
1385  if (label == 0) {
1386  ret = matcher_->Find(0);
1387  } else if (label == kNoLabel) {
1388  if (flags_ & kMultiEpsList) {
1389  // Returns all non-consuming arcs (including epsilon).
1390  multi_eps_iter_ = multi_eps_labels_.Begin();
1391  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1392  !matcher_->Find(*multi_eps_iter_)) {
1393  ++multi_eps_iter_;
1394  }
1395  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1396  ret = true;
1397  } else {
1398  ret = matcher_->Find(kNoLabel);
1399  }
1400  } else {
1401  // Returns all epsilon arcs.
1402  ret = matcher_->Find(kNoLabel);
1403  }
1404  } else if ((flags_ & kMultiEpsLoop) &&
1405  multi_eps_labels_.Find(label) != multi_eps_labels_.End()) {
1406  // Returns implicit loop.
1407  current_loop_ = true;
1408  ret = true;
1409  } else {
1410  ret = matcher_->Find(label);
1411  }
1412  done_ = !ret;
1413  return ret;
1414 }
1415 
1416 // This class discards any implicit matches (e.g., the implicit epsilon
1417 // self-loops in the SortedMatcher). Matchers are most often used in
1418 // composition/intersection where the implicit matches are needed
1419 // e.g. for epsilon processing. However, if a matcher is simply being
1420 // used to look-up explicit label matches, this class saves the user
1421 // from having to check for and discard the unwanted implicit matches
1422 // themselves.
1423 template <class M>
1424 class ExplicitMatcher : public MatcherBase<typename M::Arc> {
1425  public:
1426  using FST = typename M::FST;
1427  using Arc = typename FST::Arc;
1428  using Label = typename Arc::Label;
1429  using StateId = typename Arc::StateId;
1430  using Weight = typename Arc::Weight;
1431 
1432  // This makes a copy of the FST.
1433  ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher = nullptr)
1434  : matcher_(matcher ? matcher : new M(fst, match_type)),
1435  match_type_(match_type),
1436  error_(false) {}
1437 
1438  // This doesn't copy the FST.
1439  ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher = nullptr)
1440  : matcher_(matcher ? matcher : new M(fst, match_type)),
1441  match_type_(match_type),
1442  error_(false) {}
1443 
1444  // This makes a copy of the FST.
1445  ExplicitMatcher(const ExplicitMatcher &matcher, bool safe = false)
1446  : matcher_(new M(*matcher.matcher_, safe)),
1447  match_type_(matcher.match_type_),
1448  error_(matcher.error_) {}
1449 
1450  ExplicitMatcher *Copy(bool safe = false) const override {
1451  return new ExplicitMatcher(*this, safe);
1452  }
1453 
1454  MatchType Type(bool test) const override { return matcher_->Type(test); }
1455 
1456  void SetState(StateId s) final { matcher_->SetState(s); }
1457 
1458  bool Find(Label label) final {
1459  matcher_->Find(label);
1460  CheckArc();
1461  return !Done();
1462  }
1463 
1464  bool Done() const final { return matcher_->Done(); }
1465 
1466  const Arc &Value() const final { return matcher_->Value(); }
1467 
1468  void Next() final {
1469  matcher_->Next();
1470  CheckArc();
1471  }
1472 
1473  Weight Final(StateId s) const final { return matcher_->Final(s); }
1474 
1475  ssize_t Priority(StateId s) final { return matcher_->Priority(s); }
1476 
1477  const FST &GetFst() const final { return matcher_->GetFst(); }
1478 
1479  uint64_t Properties(uint64_t inprops) const override {
1480  return matcher_->Properties(inprops);
1481  }
1482 
1483  const M *GetMatcher() const { return matcher_.get(); }
1484 
1485  uint32_t Flags() const override { return matcher_->Flags(); }
1486 
1487  private:
1488  // Checks current arc if available and explicit. If not available, stops. If
1489  // not explicit, checks next ones.
1490  void CheckArc() {
1491  for (; !matcher_->Done(); matcher_->Next()) {
1492  const auto label = match_type_ == MATCH_INPUT ? matcher_->Value().ilabel
1493  : matcher_->Value().olabel;
1494  if (label != kNoLabel) return;
1495  }
1496  }
1497 
1498  std::unique_ptr<M> matcher_;
1499  MatchType match_type_; // Type of match requested.
1500  bool error_; // Error encountered?
1501 };
1502 
1503 // Generic matcher, templated on the FST definition.
1504 //
1505 // Here is a typical use:
1506 //
1507 // Matcher<StdFst> matcher(fst, MATCH_INPUT);
1508 // matcher.SetState(state);
1509 // if (matcher.Find(label))
1510 // for (; !matcher.Done(); matcher.Next()) {
1511 // auto &arc = matcher.Value();
1512 // ...
1513 // }
1514 template <class F>
1515 class Matcher {
1516  public:
1517  using FST = F;
1518  using Arc = typename F::Arc;
1519  using Label = typename Arc::Label;
1520  using StateId = typename Arc::StateId;
1521  using Weight = typename Arc::Weight;
1522 
1523  // This makes a copy of the FST.
1524  Matcher(const FST &fst, MatchType match_type)
1525  : owned_fst_(fst.Copy()), base_(owned_fst_->InitMatcher(match_type)) {
1526  if (!base_)
1527  base_ =
1528  std::make_unique<SortedMatcher<FST>>(owned_fst_.get(), match_type);
1529  }
1530 
1531  // This doesn't copy the FST.
1532  Matcher(const FST *fst, MatchType match_type)
1533  : base_(fst->InitMatcher(match_type)) {
1534  if (!base_) base_ = std::make_unique<SortedMatcher<FST>>(fst, match_type);
1535  }
1536 
1537  // This makes a copy of the FST.
1538  Matcher(const Matcher &matcher, bool safe = false)
1539  : base_(matcher.base_->Copy(safe)) {}
1540 
1541  // Takes ownership of the provided matcher.
1542  explicit Matcher(MatcherBase<Arc> *base_matcher) : base_(base_matcher) {}
1543 
1544  Matcher *Copy(bool safe = false) const { return new Matcher(*this, safe); }
1545 
1546  MatchType Type(bool test) const { return base_->Type(test); }
1547 
1548  void SetState(StateId s) { base_->SetState(s); }
1549 
1550  bool Find(Label label) { return base_->Find(label); }
1551 
1552  bool Done() const { return base_->Done(); }
1553 
1554  const Arc &Value() const { return base_->Value(); }
1555 
1556  void Next() { base_->Next(); }
1557 
1558  const FST &GetFst() const { return down_cast<const FST &>(base_->GetFst()); }
1559 
1560  uint64_t Properties(uint64_t props) const { return base_->Properties(props); }
1561 
1562  Weight Final(StateId s) const { return base_->Final(s); }
1563 
1564  uint32_t Flags() const { return base_->Flags() & kMatcherFlags; }
1565 
1566  ssize_t Priority(StateId s) { return base_->Priority(s); }
1567 
1568  private:
1569  std::unique_ptr<const FST> owned_fst_;
1570  std::unique_ptr<MatcherBase<Arc>> base_;
1571 };
1572 
1573 } // namespace fst
1574 
1575 #endif // FST_MATCHER_H_
bool Find(Label label) final
Definition: matcher.h:925
MatchType Type(bool test) const
Definition: matcher.h:1546
void Next() final
Definition: matcher.h:310
void SetState(StateId s) final
Definition: matcher.h:250
virtual void Next()=0
RhoMatcher(const RhoMatcher &matcher, bool safe=false)
Definition: matcher.h:903
Weight Final(StateId s) const final
Definition: matcher.h:695
const Arc & Value() const final
Definition: matcher.h:667
typename FST::Arc Arc
Definition: matcher.h:861
void SetState(StateId s) final
Definition: matcher.h:918
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
virtual uint64_t Properties(uint64_t) const =0
constexpr ssize_t kRequirePriority
Definition: matcher.h:128
typename typename Filter::Matcher2::FST FST
Definition: matcher.h:1242
Matcher(const FST &fst, MatchType match_type)
Definition: matcher.h:1524
const FST & GetFst() const
Definition: matcher.h:1320
void AddMultiEpsLabel(Label label)
Definition: matcher.h:1334
constexpr int kNoLabel
Definition: fst.h:201
size_t Position() const
Definition: matcher.h:328
constexpr uint8_t kArcNoCache
Definition: fst.h:452
SortedMatcher * Copy(bool safe=false) const override
Definition: matcher.h:230
constexpr uint64_t kOEpsilons
Definition: properties.h:88
uint32_t Flags() const override
Definition: matcher.h:1485
uint32_t Flags() const
Definition: matcher.h:1564
ssize_t Priority(StateId s) final
Definition: matcher.h:1164
const Arc & Value() const final
Definition: matcher.h:945
ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1439
bool Done() const final
Definition: matcher.h:487
void LowerBound(Label label)
Definition: matcher.h:281
typename Arc::Label Label
Definition: matcher.h:1428
bool Find(Label label)
Definition: matcher.h:1381
PhiMatcher * Copy(bool safe=false) const override
Definition: matcher.h:650
const Arc & Value() const final
Definition: matcher.h:304
MatchType Type(bool test) const override
Definition: matcher.h:1454
ssize_t Priority(StateId s) final
Definition: matcher.h:966
uint64_t Properties(uint64_t props) const
Definition: matcher.h:1560
const M * GetMatcher() const
Definition: matcher.h:1326
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:1003
SigmaMatcher(const SigmaMatcher &matcher, bool safe=false)
Definition: matcher.h:1093
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:97
constexpr uint32_t kMultiEpsList
Definition: matcher.h:1227
MatchType
Definition: fst.h:193
constexpr uint8_t kArcFlags
Definition: fst.h:455
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:63
constexpr uint64_t kError
Definition: properties.h:51
PhiMatcher(const FST *fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:633
Weight Final(StateId s) const
Definition: matcher.h:1562
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:804
bool Find(Label label) final
Definition: matcher.h:1458
typename Arc::StateId StateId
Definition: matcher.h:422
virtual bool Find(Label)=0
typename Arc::Label Label
Definition: matcher.h:1519
uint64_t Properties(uint64_t props) const
Definition: matcher.h:1322
MatchType Type(bool test) const override
Definition: matcher.h:654
Weight Final(StateId s) const final
Definition: matcher.h:1162
Matcher(const Matcher &matcher, bool safe=false)
Definition: matcher.h:1538
SigmaMatcher(const FST &fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1056
void Next() final
Definition: matcher.h:962
ExplicitMatcher(const ExplicitMatcher &matcher, bool safe=false)
Definition: matcher.h:1445
ExplicitMatcher * Copy(bool safe=false) const override
Definition: matcher.h:1450
void SetState(StateId s) final
Definition: matcher.h:1107
const FST & GetFst() const override
Definition: matcher.h:977
void SetState(StateId s) final
Definition: matcher.h:656
constexpr uint8_t kArcILabelValue
Definition: fst.h:444
constexpr uint64_t kEpsilons
Definition: properties.h:78
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:103
To down_cast(From *f)
Definition: compat.h:50
constexpr uint64_t kNotOLabelSorted
Definition: properties.h:100
HashMatcher * Copy(bool safe=false) const override
Definition: matcher.h:468
ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1433
typename M::FST FST
Definition: matcher.h:1426
Label SigmaLabel() const
Definition: matcher.h:1184
uint32_t Flags() const override
Definition: matcher.h:1177
MatchType Type(bool test) const override
Definition: matcher.h:234
void SetState(StateId state)
Definition: matcher.h:1288
PhiMatcher(const PhiMatcher &matcher, bool safe=false)
Definition: matcher.h:641
RhoMatcher(const FST &fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:867
typename Arc::StateId StateId
Definition: matcher.h:1520
constexpr uint64_t kODeterministic
Definition: properties.h:73
constexpr int kNoStateId
Definition: fst.h:202
MultiEpsMatcher(const FST *fst, MatchType match_type, uint32_t flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1259
typename F::Arc Arc
Definition: matcher.h:1518
typename Arc::Weight Weight
Definition: matcher.h:605
Label RhoLabel() const
Definition: matcher.h:988
typename Arc::StateId StateId
Definition: matcher.h:1429
void RemoveMultiEpsLabel(Label label)
Definition: matcher.h:1342
void Next() final
Definition: matcher.h:498
MatchType Type(bool test) const
Definition: matcher.h:1286
uint32_t Flags() const override
Definition: matcher.h:729
void Next() final
Definition: matcher.h:1153
#define FSTERROR()
Definition: util.h:53
bool Done() const final
Definition: matcher.h:1134
constexpr uint32_t kMultiEpsLoop
Definition: matcher.h:1230
bool Done() const final
Definition: matcher.h:294
const FST & GetFst() const override
Definition: matcher.h:506
constexpr uint64_t kNonIDeterministic
Definition: properties.h:70
const Arc & Value() const
Definition: matcher.h:1297
typename Arc::Label Label
Definition: matcher.h:1051
void Next() final
Definition: matcher.h:693
const Arc & Value() const final
Definition: matcher.h:492
virtual ~MatcherBase()
Definition: matcher.h:140
virtual uint32_t Flags() const
Definition: matcher.h:156
MatchType Type(bool test) const override
Definition: matcher.h:1105
constexpr uint64_t kNotILabelSorted
Definition: properties.h:95
bool Find(Label label) final
Definition: matcher.h:477
bool Find(Label match_label) final
Definition: matcher.h:263
bool Done() const final
Definition: matcher.h:943
virtual ssize_t Priority(StateId s)
Definition: matcher.h:160
virtual Weight Final(StateId s) const
Definition: matcher.h:158
typename FST::Arc Arc
Definition: matcher.h:1050
typename Arc::StateId StateId
Definition: matcher.h:863
typename FST::Arc Arc
Definition: matcher.h:420
constexpr uint64_t kNoOEpsilons
Definition: properties.h:90
const M * GetMatcher() const
Definition: matcher.h:1483
bool Done() const
Definition: matcher.h:1552
typename M::FST FST
Definition: matcher.h:860
constexpr uint64_t kOLabelSorted
Definition: properties.h:98
Weight Final(StateId s) const final
Definition: matcher.h:964
virtual void SetState(StateId)=0
ssize_t Priority(StateId s)
Definition: matcher.h:1566
constexpr uint64_t kNoEpsilons
Definition: properties.h:80
const Arc & Value() const final
Definition: matcher.h:1466
Matcher(const FST *fst, MatchType match_type)
Definition: matcher.h:1532
const FST & GetFst() const override
Definition: matcher.h:725
constexpr uint8_t kArcOLabelValue
Definition: fst.h:446
Weight Final(StateId s) const final
Definition: matcher.h:318
typename FST::Arc Arc
Definition: matcher.h:1427
RhoMatcher * Copy(bool safe=false) const override
Definition: matcher.h:912
Matcher * Copy(bool safe=false) const
Definition: matcher.h:1544
void Next()
Definition: matcher.h:1556
void SetState(StateId s) final
Definition: matcher.h:1456
constexpr uint64_t kIDeterministic
Definition: properties.h:68
bool Find(Label label)
Definition: matcher.h:1550
virtual const Fst< Arc > & GetFst() const =0
Matcher(MatcherBase< Arc > *base_matcher)
Definition: matcher.h:1542
bool Find(Label match_label) final
Definition: matcher.h:756
constexpr uint64_t kIEpsilons
Definition: properties.h:83
typename M::FST FST
Definition: matcher.h:601
const FST & GetFst() const override
Definition: matcher.h:1173
uint32_t Flags() const override
Definition: matcher.h:981
HashMatcher(const HashMatcher &matcher, bool safe=false)
Definition: matcher.h:458
SigmaMatcher * Copy(bool safe=false) const override
Definition: matcher.h:1101
Label PhiLabel() const
Definition: matcher.h:736
void ClearMultiEpsLabels()
Definition: matcher.h:1350
constexpr uint64_t kILabelSorted
Definition: properties.h:93
void SetState(StateId s)
Definition: matcher.h:1548
ssize_t Priority(StateId s) final
Definition: matcher.h:320
SortedMatcher(const FST &fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:183
void SetState(StateId s) final
Definition: matcher.h:538
MultiEpsMatcher(const MultiEpsMatcher &matcher, bool safe=false)
Definition: matcher.h:1269
MatcherRewriteMode
Definition: matcher.h:577
bool Done() const final
Definition: matcher.h:665
virtual MatcherBase * Copy(bool safe=false) const =0
ssize_t Priority(StateId s) final
Definition: matcher.h:1475
constexpr uint64_t kNonODeterministic
Definition: properties.h:75
ssize_t Priority(StateId s) final
Definition: matcher.h:715
const FST & GetFst() const final
Definition: matcher.h:1477
typename FST::Arc Arc
Definition: matcher.h:602
constexpr uint32_t kMatcherFlags
Definition: matcher.h:125
typename Arc::Weight Weight
Definition: matcher.h:1053
uint32_t Flags() const
Definition: matcher.h:1330
SortedMatcher(const FST *fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:191
typename Arc::Weight Weight
Definition: matcher.h:423
SortedMatcher(const SortedMatcher &matcher, bool safe=false)
Definition: matcher.h:216
const FST & GetFst() const override
Definition: matcher.h:322
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:1200
typename Arc::Weight Weight
Definition: matcher.h:138
constexpr uint64_t kString
Definition: properties.h:135
typename Arc::Label Label
Definition: matcher.h:136
virtual MatchType Type(bool) const =0
Weight Final(StateId s) const final
Definition: matcher.h:1473
bool Done() const
Definition: matcher.h:1295
void Next() final
Definition: matcher.h:1468
SigmaMatcher(const FST *fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1085
HashMatcher(const FST &fst, MatchType match_type)
Definition: matcher.h:430
bool Done() const final
Definition: matcher.h:1464
PhiMatcher(const FST &fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:608
MatchType Type(bool test) const override
Definition: matcher.h:473
ssize_t Priority(StateId s)
Definition: matcher.h:1332
typename Arc::Weight Weight
Definition: matcher.h:864
const Arc & Value() const
Definition: matcher.h:1554
typename Arc::Label Label
Definition: matcher.h:603
MatchType Type(bool test) const override
Definition: matcher.h:916
typename Arc::Label Label
Definition: matcher.h:862
typename M::FST FST
Definition: matcher.h:1049
typename Arc::Weight Weight
Definition: matcher.h:1521
typename Arc::Label Label
Definition: matcher.h:421
typename Arc::StateId StateId
Definition: matcher.h:1052
MultiEpsMatcher(const FST &fst, MatchType match_type, uint32_t flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1249
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:1479
typename Arc::Weight Weight
Definition: matcher.h:1430
constexpr uint64_t kNoIEpsilons
Definition: properties.h:85
bool Find(Label match_label) final
Definition: matcher.h:1115
MultiEpsMatcher * Copy(bool safe=false) const
Definition: matcher.h:1282
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:508
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:324
RhoMatcher(const FST *fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:896
HashMatcher(const FST *fst, MatchType match_type)
Definition: matcher.h:436
virtual bool Done() const =0
virtual const Arc & Value() const =0
const Arc & Value() const final
Definition: matcher.h:1136
constexpr uint64_t kAcceptor
Definition: properties.h:63
Weight Final(StateId s) const
Definition: matcher.h:1328
const FST & GetFst() const
Definition: matcher.h:1558
typename Arc::StateId StateId
Definition: matcher.h:604
constexpr uint32_t kRequireMatch
Definition: matcher.h:122
typename Arc::StateId StateId
Definition: matcher.h:137