FST  openfst-1.8.3
OpenFst Library
matcher.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes to allow matching labels leaving FST states.
19 
20 #ifndef FST_MATCHER_H_
21 #define FST_MATCHER_H_
22 
23 #include <sys/types.h>
24 
25 #include <algorithm>
26 #include <cstddef>
27 #include <cstdint>
28 #include <map>
29 #include <memory>
30 #include <optional>
31 #include <tuple>
32 #include <unordered_map>
33 #include <utility>
34 
35 #include <fst/log.h>
36 #include <fst/fst.h>
37 #include <fst/mutable-fst.h> // for all internal FST accessors.
38 #include <fst/properties.h>
39 #include <fst/util.h>
40 #include <unordered_map>
41 #include <optional>
42 
43 namespace fst {
44 
45 // Matchers find and iterate through requested labels at FST states. In the
46 // simplest form, these are just some associative map or search keyed on labels.
47 // More generally, they may implement matching special labels that represent
48 // sets of labels such as sigma (all), rho (rest), or phi (fail). The Matcher
49 // interface is:
50 //
51 // template <class F>
52 // class Matcher {
53 // public:
54 // using FST = F;
55 // using Arc = typename FST::Arc;
56 // using Label = typename Arc::Label;
57 // using StateId = typename Arc::StateId;
58 // using Weight = typename Arc::Weight;
59 //
60 // // Required constructors. Note:
61 // // -- the constructors that copy the FST arg are useful for
62 // // letting the matcher manage the FST through copies
63 // // (esp with 'safe' copies); e.g. ComposeFst depends on this.
64 // // -- the constructor that does not copy is useful when the
65 // // the FST is mutated during the lifetime of the matcher
66 // // (o.w. the matcher would have its own unmutated deep copy).
67 //
68 // // This makes a copy of the FST.
69 // Matcher(const FST &fst, MatchType type);
70 // // This doesn't copy the FST.
71 // Matcher(const FST *fst, MatchType type);
72 // // This makes a copy of the FST.
73 // // See Copy() below.
74 // Matcher(const Matcher &matcher, bool safe = false);
75 //
76 // // If safe = true, the copy is thread-safe. See Fst<>::Copy() for
77 // // further doc.
78 // Matcher *Copy(bool safe = false) const override;
79 //
80 // // Returns the match type that can be provided (depending on compatibility
81 // // of the input FST). It is either the requested match type, MATCH_NONE,
82 // // or MATCH_UNKNOWN. If test is false, a costly testing is avoided, but
83 // // MATCH_UNKNOWN may be returned. If test is true, a definite answer is
84 // // returned, but may involve more costly computation (e.g., visiting
85 // // the FST).
86 // // MatchType Type(bool test) const override;
87 //
88 // // Specifies the current state.
89 // void SetState(StateId s) final;
90 //
91 // // Finds matches to a label at the current state, returning true if a match
92 // // found. kNoLabel matches any non-consuming transitions, e.g., epsilon
93 // // transitions, which do not require a matching symbol.
94 // bool Find(Label label) final;
95 //
96 // // Iterator methods. Note that initially and after SetState() these have
97 // // undefined behavior until Find() is called.
98 //
99 // bool Done() const final;
100 //
101 // const Arc &Value() const final;
102 //
103 // void Next() final;
104 //
105 // // Returns final weight of a state.
106 // Weight Final(StateId) const final;
107 //
108 // // Indicates preference for being the side used for matching in
109 // // composition. If the value is kRequirePriority, then it is
110 // // mandatory that it be used. Calling this method without passing the
111 // // current state of the matcher invalidates the state of the matcher.
112 // ssize_t Priority(StateId s) final;
113 //
114 // // This specifies the known FST properties as viewed from this matcher. It
115 // // takes as argument the input FST's known properties.
116 // uint64_t Properties(uint64_t props) const override;
117 //
118 // // Returns matcher flags.
119 // uint32_t Flags() const override;
120 //
121 // // Returns matcher FST.
122 // const FST &GetFst() const override;
123 // };
124 
125 // Basic matcher flags.
126 
127 // Matcher needs to be used as the matching side in composition for
128 // at least one state (has kRequirePriority).
129 inline constexpr uint32_t kRequireMatch = 0x00000001;
130 
131 // Flags used for basic matchers (see also lookahead.h).
132 inline constexpr uint32_t kMatcherFlags = kRequireMatch;
133 
134 // Matcher priority that is mandatory.
135 inline constexpr ssize_t kRequirePriority = -1;
136 
137 // Matcher interface, templated on the Arc definition; used for matcher
138 // specializations that are returned by the InitMatcher FST method.
139 template <class A>
140 class MatcherBase {
141  public:
142  using Arc = A;
143  using Label = typename Arc::Label;
144  using StateId = typename Arc::StateId;
145  using Weight = typename Arc::Weight;
146 
147  virtual ~MatcherBase() = default;
148 
149  // Virtual interface.
150 
151  virtual MatcherBase *Copy(bool safe = false) const = 0;
152  virtual MatchType Type(bool) const = 0;
153  virtual void SetState(StateId) = 0;
154  virtual bool Find(Label) = 0;
155  virtual bool Done() const = 0;
156  virtual const Arc &Value() const = 0;
157  virtual void Next() = 0;
158  virtual const Fst<Arc> &GetFst() const = 0;
159  virtual uint64_t Properties(uint64_t) const = 0;
160 
161  // Trivial implementations that can be used by derived classes. Full
162  // devirtualization is expected for any derived class marked final.
163  virtual uint32_t Flags() const { return 0; }
164 
165  virtual Weight Final(StateId s) const { return internal::Final(GetFst(), s); }
166 
167  virtual ssize_t Priority(StateId s) { return internal::NumArcs(GetFst(), s); }
168 };
169 
170 // A matcher that expects sorted labels on the side to be matched.
171 // If match_type == MATCH_INPUT, epsilons match the implicit self-loop
172 // Arc(kNoLabel, 0, Weight::One(), current_state) as well as any
173 // actual epsilon transitions. If match_type == MATCH_OUTPUT, then
174 // Arc(0, kNoLabel, Weight::One(), current_state) is instead matched.
175 template <class F>
176 class SortedMatcher : public MatcherBase<typename F::Arc> {
177  public:
178  using FST = F;
179  using Arc = typename FST::Arc;
180  using Label = typename Arc::Label;
181  using StateId = typename Arc::StateId;
182  using Weight = typename Arc::Weight;
183 
186 
187  // Labels >= binary_label will be searched for by binary search;
188  // o.w. linear search is used.
189  // This makes a copy of the FST.
190  SortedMatcher(const FST &fst, MatchType match_type, Label binary_label = 1)
191  : SortedMatcher(fst.Copy(), match_type, binary_label) {
192  owned_fst_.reset(&fst_);
193  }
194 
195  // Labels >= binary_label will be searched for by binary search;
196  // o.w. linear search is used.
197  // This doesn't copy the FST.
198  SortedMatcher(const FST *fst, MatchType match_type, Label binary_label = 1)
199  : fst_(*fst),
200  state_(kNoStateId),
201  aiter_(std::nullopt),
202  match_type_(match_type),
203  binary_label_(binary_label),
204  match_label_(kNoLabel),
205  narcs_(0),
206  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
207  error_(false) {
208  switch (match_type_) {
209  case MATCH_INPUT:
210  case MATCH_NONE:
211  break;
212  case MATCH_OUTPUT:
213  std::swap(loop_.ilabel, loop_.olabel);
214  break;
215  default:
216  FSTERROR() << "SortedMatcher: Bad match type";
217  match_type_ = MATCH_NONE;
218  error_ = true;
219  }
220  }
221 
222  // This makes a copy of the FST.
223  SortedMatcher(const SortedMatcher &matcher, bool safe = false)
224  : owned_fst_(matcher.fst_.Copy(safe)),
225  fst_(*owned_fst_),
226  state_(kNoStateId),
227  aiter_(std::nullopt),
228  match_type_(matcher.match_type_),
229  binary_label_(matcher.binary_label_),
230  match_label_(kNoLabel),
231  narcs_(0),
232  loop_(matcher.loop_),
233  error_(matcher.error_) {}
234 
235  ~SortedMatcher() override = default;
236 
237  SortedMatcher *Copy(bool safe = false) const override {
238  return new SortedMatcher(*this, safe);
239  }
240 
241  MatchType Type(bool test) const override {
242  if (match_type_ == MATCH_NONE) return match_type_;
243  const auto true_prop =
244  match_type_ == MATCH_INPUT ? kILabelSorted : kOLabelSorted;
245  const auto false_prop =
246  match_type_ == MATCH_INPUT ? kNotILabelSorted : kNotOLabelSorted;
247  const auto props = fst_.Properties(true_prop | false_prop, test);
248  if (props & true_prop) {
249  return match_type_;
250  } else if (props & false_prop) {
251  return MATCH_NONE;
252  } else {
253  return MATCH_UNKNOWN;
254  }
255  }
256 
257  void SetState(StateId s) final {
258  if (state_ == s) return;
259  state_ = s;
260  if (match_type_ == MATCH_NONE) {
261  FSTERROR() << "SortedMatcher: Bad match type";
262  error_ = true;
263  }
264  aiter_.emplace(fst_, s);
265  aiter_->SetFlags(kArcNoCache, kArcNoCache);
266  narcs_ = internal::NumArcs(fst_, s);
267  loop_.nextstate = s;
268  }
269 
270  bool Find(Label match_label) final {
271  exact_match_ = true;
272  if (error_) {
273  current_loop_ = false;
274  match_label_ = kNoLabel;
275  return false;
276  }
277  current_loop_ = match_label == 0;
278  match_label_ = match_label == kNoLabel ? 0 : match_label;
279  if (Search()) {
280  return true;
281  } else {
282  return current_loop_;
283  }
284  }
285 
286  // Positions matcher to the first position where inserting match_label would
287  // maintain the sort order.
288  void LowerBound(Label label) {
289  exact_match_ = false;
290  current_loop_ = false;
291  if (error_) {
292  match_label_ = kNoLabel;
293  return;
294  }
295  match_label_ = label;
296  Search();
297  }
298 
299  // After Find(), returns false if no more exact matches.
300  // After LowerBound(), returns false if no more arcs.
301  bool Done() const final {
302  if (current_loop_) return false;
303  if (aiter_->Done()) return true;
304  if (!exact_match_) return false;
305  aiter_->SetFlags(
306  match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
308  return GetLabel() != match_label_;
309  }
310 
311  const Arc &Value() const final {
312  if (current_loop_) return loop_;
313  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
314  return aiter_->Value();
315  }
316 
317  void Next() final {
318  if (current_loop_) {
319  current_loop_ = false;
320  } else {
321  aiter_->Next();
322  }
323  }
324 
325  Weight Final(StateId s) const final { return MatcherBase<Arc>::Final(s); }
326 
327  ssize_t Priority(StateId s) final { return MatcherBase<Arc>::Priority(s); }
328 
329  const FST &GetFst() const override { return fst_; }
330 
331  uint64_t Properties(uint64_t inprops) const override {
332  return inprops | (error_ ? kError : 0);
333  }
334 
335  size_t Position() const { return aiter_ ? aiter_->Position() : 0; }
336 
337  private:
338  Label GetLabel() const {
339  const auto &arc = aiter_->Value();
340  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
341  }
342 
343  bool BinarySearch();
344  bool LinearSearch();
345  bool Search();
346 
347  std::unique_ptr<const FST> owned_fst_; // FST ptr if owned.
348  const FST &fst_; // FST for matching.
349  StateId state_; // Matcher state.
350  mutable std::optional<ArcIterator<FST>>
351  aiter_; // Iterator for current state.
352  MatchType match_type_; // Type of match to perform.
353  Label binary_label_; // Least label for binary search.
354  Label match_label_; // Current label to be matched.
355  size_t narcs_; // Current state arc count.
356  Arc loop_; // For non-consuming symbols.
357  bool current_loop_; // Current arc is the implicit loop.
358  bool exact_match_; // Exact match or lower bound?
359  bool error_; // Error encountered?
360 };
361 
362 // Returns true iff match to match_label_. The arc iterator is positioned at the
363 // lower bound, that is, the first element greater than or equal to
364 // match_label_, or the end if all elements are less than match_label_.
365 // If multiple elements are equal to the `match_label_`, returns the rightmost
366 // one.
367 template <class FST>
368 inline bool SortedMatcher<FST>::BinarySearch() {
369  size_t size = narcs_;
370  if (size == 0) {
371  return false;
372  }
373  size_t high = size - 1;
374  while (size > 1) {
375  const size_t half = size / 2;
376  const size_t mid = high - half;
377  aiter_->Seek(mid);
378  if (GetLabel() >= match_label_) {
379  high = mid;
380  }
381  size -= half;
382  }
383  aiter_->Seek(high);
384  const auto label = GetLabel();
385  if (label == match_label_) {
386  return true;
387  }
388  if (label < match_label_) {
389  aiter_->Next();
390  }
391  return false;
392 }
393 
394 // Returns true iff match to match_label_, positioning arc iterator at lower
395 // bound.
396 template <class FST>
397 inline bool SortedMatcher<FST>::LinearSearch() {
398  for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) {
399  const auto label = GetLabel();
400  if (label == match_label_) return true;
401  if (label > match_label_) break;
402  }
403  return false;
404 }
405 
406 // Returns true iff match to match_label_, positioning arc iterator at lower
407 // bound.
408 template <class FST>
409 inline bool SortedMatcher<FST>::Search() {
410  aiter_->SetFlags(
411  match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue,
413  if (match_label_ >= binary_label_) {
414  return BinarySearch();
415  } else {
416  return LinearSearch();
417  }
418 }
419 
420 // A matcher that stores labels in a per-state hash table populated upon the
421 // first visit to that state. Sorting is not required. Treatment of
422 // epsilons are the same as with SortedMatcher.
423 template <class F>
424 class HashMatcher : public MatcherBase<typename F::Arc> {
425  public:
426  using FST = F;
427  using Arc = typename FST::Arc;
428  using Label = typename Arc::Label;
429  using StateId = typename Arc::StateId;
430  using Weight = typename Arc::Weight;
431 
435 
436  // This makes a copy of the FST.
437  HashMatcher(const FST &fst, MatchType match_type)
438  : HashMatcher(fst.Copy(), match_type) {
439  owned_fst_.reset(&fst_);
440  }
441 
442  // This doesn't copy the FST.
443  HashMatcher(const FST *fst, MatchType match_type)
444  : fst_(*fst),
445  state_(kNoStateId),
446  match_type_(match_type),
447  loop_(kNoLabel, 0, Weight::One(), kNoStateId),
448  error_(false),
449  state_table_(std::make_shared<StateTable>()) {
450  switch (match_type_) {
451  case MATCH_INPUT:
452  case MATCH_NONE:
453  break;
454  case MATCH_OUTPUT:
455  std::swap(loop_.ilabel, loop_.olabel);
456  break;
457  default:
458  FSTERROR() << "HashMatcher: Bad match type";
459  match_type_ = MATCH_NONE;
460  error_ = true;
461  }
462  }
463 
464  // This makes a copy of the FST.
465  HashMatcher(const HashMatcher &matcher, bool safe = false)
466  : owned_fst_(matcher.fst_.Copy(safe)),
467  fst_(*owned_fst_),
468  state_(kNoStateId),
469  match_type_(matcher.match_type_),
470  loop_(matcher.loop_),
471  error_(matcher.error_),
472  state_table_(safe ? std::make_shared<StateTable>()
473  : matcher.state_table_) {}
474 
475  HashMatcher *Copy(bool safe = false) const override {
476  return new HashMatcher(*this, safe);
477  }
478 
479  // The argument is ignored as there are no relevant properties to test.
480  MatchType Type(bool test) const override { return match_type_; }
481 
482  void SetState(StateId s) final;
483 
484  bool Find(Label label) final {
485  current_loop_ = label == 0;
486  if (label == 0) {
487  Search(label);
488  return true;
489  }
490  if (label == kNoLabel) label = 0;
491  return Search(label);
492  }
493 
494  bool Done() const final {
495  if (current_loop_) return false;
496  return label_it_ == label_end_;
497  }
498 
499  const Arc &Value() const final {
500  if (current_loop_) return loop_;
501  aiter_->Seek(label_it_->second);
502  return aiter_->Value();
503  }
504 
505  void Next() final {
506  if (current_loop_) {
507  current_loop_ = false;
508  } else {
509  ++label_it_;
510  }
511  }
512 
513  const FST &GetFst() const override { return fst_; }
514 
515  uint64_t Properties(uint64_t inprops) const override {
516  return inprops | (error_ ? kError : 0);
517  }
518 
519  private:
520  Label GetLabel() const {
521  const auto &arc = aiter_->Value();
522  return match_type_ == MATCH_INPUT ? arc.ilabel : arc.olabel;
523  }
524 
525  bool Search(Label match_label);
526 
527  using LabelTable = std::unordered_multimap<Label, size_t>;
528  using StateTable = std::unordered_map<StateId, std::unique_ptr<LabelTable>>;
529 
530  std::unique_ptr<const FST> owned_fst_; // ptr to FST if owned.
531  const FST &fst_; // FST for matching.
532  StateId state_; // Matcher state.
533  MatchType match_type_;
534  Arc loop_; // The implicit loop itself.
535  bool current_loop_; // Is the current arc the implicit loop?
536  bool error_; // Error encountered?
537  std::unique_ptr<ArcIterator<FST>> aiter_;
538  std::shared_ptr<StateTable> state_table_; // Table from state to label table.
539  LabelTable *label_table_; // Pointer to current state's label table.
540  typename LabelTable::iterator label_it_; // Position for label.
541  typename LabelTable::iterator label_end_; // Position for last label + 1.
542 };
543 
544 template <class FST>
545 void HashMatcher<FST>::SetState(typename FST::Arc::StateId s) {
546  if (state_ == s) return;
547  // Resets everything for the state.
548  state_ = s;
549  loop_.nextstate = state_;
550  aiter_ = std::make_unique<ArcIterator<FST>>(fst_, state_);
551  if (match_type_ == MATCH_NONE) {
552  FSTERROR() << "HashMatcher: Bad match type";
553  error_ = true;
554  }
555  // Attempts to insert a new label table.
556  const auto &[it, success] =
557  state_table_->emplace(state_, std::make_unique<LabelTable>());
558  // Sets instance's pointer to the label table for this state.
559  label_table_ = it->second.get();
560  // If it already exists, no additional work is done and we simply return.
561  if (!success) return;
562  // Otherwise, populate this new table.
563  // Populates the label table.
564  label_table_->reserve(internal::NumArcs(fst_, state_));
565  const auto aiter_flags =
566  (match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue) |
567  kArcNoCache;
568  aiter_->SetFlags(aiter_flags, kArcFlags);
569  for (; !aiter_->Done(); aiter_->Next()) {
570  label_table_->emplace(GetLabel(), aiter_->Position());
571  }
572  aiter_->SetFlags(kArcValueFlags, kArcValueFlags);
573 }
574 
575 template <class FST>
576 inline bool HashMatcher<FST>::Search(typename FST::Arc::Label match_label) {
577  std::tie(label_it_, label_end_) = label_table_->equal_range(match_label);
578  if (label_it_ == label_end_) return false;
579  aiter_->Seek(label_it_->second);
580  return true;
581 }
582 
583 // Specifies whether we rewrite both the input and output sides during matching.
585  MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor.
588 };
589 
590 // For any requested label that doesn't match at a state, this matcher
591 // considers the *unique* transition that matches the label 'phi_label'
592 // (phi = 'fail'), and recursively looks for a match at its
593 // destination. When 'phi_loop' is true, if no match is found but a
594 // phi self-loop is found, then the phi transition found is returned
595 // with the phi_label rewritten as the requested label (both sides if
596 // an acceptor, or if 'rewrite_both' is true and both input and output
597 // labels of the found transition are 'phi_label'). If 'phi_label' is
598 // kNoLabel, this special matching is not done. PhiMatcher is
599 // templated itself on a matcher, which is used to perform the
600 // underlying matching. By default, the underlying matcher is
601 // constructed by PhiMatcher. The user can instead pass in this
602 // object; in that case, PhiMatcher takes its ownership.
603 // Phi non-determinism not supported. No non-consuming symbols other
604 // than epsilon supported with the underlying template argument matcher.
605 template <class M>
606 class PhiMatcher : public MatcherBase<typename M::Arc> {
607  public:
608  using FST = typename M::FST;
609  using Arc = typename FST::Arc;
610  using Label = typename Arc::Label;
611  using StateId = typename Arc::StateId;
612  using Weight = typename Arc::Weight;
613 
614  // This makes a copy of the FST (w/o 'matcher' arg).
615  PhiMatcher(const FST &fst, MatchType match_type, Label phi_label = kNoLabel,
616  bool phi_loop = true,
618  M *matcher = nullptr)
619  : matcher_(matcher ? matcher : new M(fst, match_type)),
620  match_type_(match_type),
621  phi_label_(phi_label),
622  state_(kNoStateId),
623  phi_loop_(phi_loop),
624  error_(false) {
625  if (match_type == MATCH_BOTH) {
626  FSTERROR() << "PhiMatcher: Bad match type";
627  match_type_ = MATCH_NONE;
628  error_ = true;
629  }
630  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
631  rewrite_both_ = fst.Properties(kAcceptor, true);
632  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
633  rewrite_both_ = true;
634  } else {
635  rewrite_both_ = false;
636  }
637  }
638 
639  // This doesn't copy the FST.
640  PhiMatcher(const FST *fst, MatchType match_type, Label phi_label = kNoLabel,
641  bool phi_loop = true,
643  M *matcher = nullptr)
644  : PhiMatcher(*fst, match_type, phi_label, phi_loop, rewrite_mode,
645  matcher ? matcher : new M(fst, match_type)) {}
646 
647  // This makes a copy of the FST.
648  PhiMatcher(const PhiMatcher &matcher, bool safe = false)
649  : matcher_(new M(*matcher.matcher_, safe)),
650  match_type_(matcher.match_type_),
651  phi_label_(matcher.phi_label_),
652  rewrite_both_(matcher.rewrite_both_),
653  state_(kNoStateId),
654  phi_loop_(matcher.phi_loop_),
655  error_(matcher.error_) {}
656 
657  PhiMatcher *Copy(bool safe = false) const override {
658  return new PhiMatcher(*this, safe);
659  }
660 
661  MatchType Type(bool test) const override { return matcher_->Type(test); }
662 
663  void SetState(StateId s) final {
664  if (state_ == s) return;
665  matcher_->SetState(s);
666  state_ = s;
667  has_phi_ = phi_label_ != kNoLabel;
668  }
669 
670  bool Find(Label match_label) final;
671 
672  bool Done() const final { return matcher_->Done(); }
673 
674  const Arc &Value() const final {
675  if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) {
676  return matcher_->Value();
677  } else if (phi_match_ == 0) { // Virtual epsilon loop.
678  phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_);
679  if (match_type_ == MATCH_OUTPUT) {
680  std::swap(phi_arc_.ilabel, phi_arc_.olabel);
681  }
682  return phi_arc_;
683  } else {
684  phi_arc_ = matcher_->Value();
685  phi_arc_.weight = Times(phi_weight_, phi_arc_.weight);
686  if (phi_match_ != kNoLabel) { // Phi loop match.
687  if (rewrite_both_) {
688  if (phi_arc_.ilabel == phi_label_) phi_arc_.ilabel = phi_match_;
689  if (phi_arc_.olabel == phi_label_) phi_arc_.olabel = phi_match_;
690  } else if (match_type_ == MATCH_INPUT) {
691  phi_arc_.ilabel = phi_match_;
692  } else {
693  phi_arc_.olabel = phi_match_;
694  }
695  }
696  return phi_arc_;
697  }
698  }
699 
700  void Next() final { matcher_->Next(); }
701 
702  Weight Final(StateId s) const final {
703  auto weight = matcher_->Final(s);
704  if (phi_label_ == kNoLabel || weight != Weight::Zero()) {
705  return weight;
706  }
707  weight = Weight::One();
708  matcher_->SetState(s);
709  while (matcher_->Final(s) == Weight::Zero()) {
710  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) break;
711  weight = Times(weight, matcher_->Value().weight);
712  if (s == matcher_->Value().nextstate) {
713  return Weight::Zero(); // Does not follow phi self-loops.
714  }
715  s = matcher_->Value().nextstate;
716  matcher_->SetState(s);
717  }
718  weight = Times(weight, matcher_->Final(s));
719  return weight;
720  }
721 
722  ssize_t Priority(StateId s) final {
723  if (phi_label_ != kNoLabel) {
724  matcher_->SetState(s);
725  const bool has_phi = matcher_->Find(phi_label_ == 0 ? -1 : phi_label_);
726  return has_phi ? kRequirePriority : matcher_->Priority(s);
727  } else {
728  return matcher_->Priority(s);
729  }
730  }
731 
732  const FST &GetFst() const override { return matcher_->GetFst(); }
733 
734  uint64_t Properties(uint64_t props) const override;
735 
736  uint32_t Flags() const override {
737  if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) {
738  return matcher_->Flags();
739  }
740  return matcher_->Flags() | kRequireMatch;
741  }
742 
743  Label PhiLabel() const { return phi_label_; }
744 
745  private:
746  mutable std::unique_ptr<M> matcher_;
747  MatchType match_type_; // Type of match requested.
748  Label phi_label_; // Label that represents the phi transition.
749  bool rewrite_both_; // Rewrite both sides when both are phi_label_?
750  bool has_phi_; // Are there possibly phis at the current state?
751  Label phi_match_; // Current label that matches phi loop.
752  mutable Arc phi_arc_; // Arc to return.
753  StateId state_; // Matcher state.
754  Weight phi_weight_; // Product of the weights of phi transitions taken.
755  bool phi_loop_; // When true, phi self-loop are allowed and treated
756  // as rho (required for Aho-Corasick).
757  bool error_; // Error encountered?
758 
759  PhiMatcher &operator=(const PhiMatcher &) = delete;
760 };
761 
762 template <class M>
763 inline bool PhiMatcher<M>::Find(Label label) {
764  if (label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) {
765  FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_;
766  error_ = true;
767  return false;
768  }
769  matcher_->SetState(state_);
770  phi_match_ = kNoLabel;
771  phi_weight_ = Weight::One();
772  // If phi_label_ == 0, there are no more true epsilon arcs.
773  if (phi_label_ == 0) {
774  if (label == kNoLabel) {
775  return false;
776  }
777  if (label == 0) { // but a virtual epsilon loop needs to be returned.
778  if (!matcher_->Find(kNoLabel)) {
779  return matcher_->Find(0);
780  } else {
781  phi_match_ = 0;
782  return true;
783  }
784  }
785  }
786  if (!has_phi_ || label == 0 || label == kNoLabel) {
787  return matcher_->Find(label);
788  }
789  auto s = state_;
790  while (!matcher_->Find(label)) {
791  // Look for phi transition (if phi_label_ == 0, we need to look
792  // for -1 to avoid getting the virtual self-loop)
793  if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) return false;
794  if (phi_loop_ && matcher_->Value().nextstate == s) {
795  phi_match_ = label;
796  return true;
797  }
798  phi_weight_ = Times(phi_weight_, matcher_->Value().weight);
799  s = matcher_->Value().nextstate;
800  matcher_->Next();
801  if (!matcher_->Done()) {
802  FSTERROR() << "PhiMatcher: Phi non-determinism not supported";
803  error_ = true;
804  }
805  matcher_->SetState(s);
806  }
807  return true;
808 }
809 
810 template <class M>
811 inline uint64_t PhiMatcher<M>::Properties(uint64_t inprops) const {
812  auto outprops = matcher_->Properties(inprops);
813  if (error_) outprops |= kError;
814  if (match_type_ == MATCH_NONE) {
815  return outprops;
816  } else if (match_type_ == MATCH_INPUT) {
817  if (phi_label_ == 0) {
818  outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons);
819  outprops |= kNoEpsilons | kNoIEpsilons;
820  }
821  if (rewrite_both_) {
822  return outprops &
825  } else {
826  return outprops &
829  }
830  } else if (match_type_ == MATCH_OUTPUT) {
831  if (phi_label_ == 0) {
832  outprops &= ~(kEpsilons | kIEpsilons | kOEpsilons);
833  outprops |= kNoEpsilons | kNoOEpsilons;
834  }
835  if (rewrite_both_) {
836  return outprops &
839  } else {
840  return outprops &
843  }
844  } else {
845  // Shouldn't ever get here.
846  FSTERROR() << "PhiMatcher: Bad match type: " << match_type_;
847  return 0;
848  }
849 }
850 
851 // For any requested label that doesn't match at a state, this matcher
852 // considers all transitions that match the label 'rho_label' (rho =
853 // 'rest'). Each such rho transition found is returned with the
854 // rho_label rewritten as the requested label (both sides if an
855 // acceptor, or if 'rewrite_both' is true and both input and output
856 // labels of the found transition are 'rho_label'). If 'rho_label' is
857 // kNoLabel, this special matching is not done. RhoMatcher is
858 // templated itself on a matcher, which is used to perform the
859 // underlying matching. By default, the underlying matcher is
860 // constructed by RhoMatcher. The user can instead pass in this
861 // object; in that case, RhoMatcher takes its ownership.
862 // No non-consuming symbols other than epsilon supported with
863 // the underlying template argument matcher.
864 template <class M>
865 class RhoMatcher : public MatcherBase<typename M::Arc> {
866  public:
867  using FST = typename M::FST;
868  using Arc = typename FST::Arc;
869  using Label = typename Arc::Label;
870  using StateId = typename Arc::StateId;
871  using Weight = typename Arc::Weight;
872 
873  // This makes a copy of the FST (w/o 'matcher' arg).
874  RhoMatcher(const FST &fst, MatchType match_type, Label rho_label = kNoLabel,
876  M *matcher = nullptr)
877  : matcher_(matcher ? matcher : new M(fst, match_type)),
878  match_type_(match_type),
879  rho_label_(rho_label),
880  error_(false),
881  state_(kNoStateId),
882  has_rho_(false) {
883  if (match_type == MATCH_BOTH) {
884  FSTERROR() << "RhoMatcher: Bad match type";
885  match_type_ = MATCH_NONE;
886  error_ = true;
887  }
888  if (rho_label == 0) {
889  FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label";
890  rho_label_ = kNoLabel;
891  error_ = true;
892  }
893  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
894  rewrite_both_ = fst.Properties(kAcceptor, true);
895  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
896  rewrite_both_ = true;
897  } else {
898  rewrite_both_ = false;
899  }
900  }
901 
902  // This doesn't copy the FST.
903  RhoMatcher(const FST *fst, MatchType match_type, Label rho_label = kNoLabel,
905  M *matcher = nullptr)
906  : RhoMatcher(*fst, match_type, rho_label, rewrite_mode,
907  matcher ? matcher : new M(fst, match_type)) {}
908 
909  // This makes a copy of the FST.
910  RhoMatcher(const RhoMatcher &matcher, bool safe = false)
911  : matcher_(new M(*matcher.matcher_, safe)),
912  match_type_(matcher.match_type_),
913  rho_label_(matcher.rho_label_),
914  rewrite_both_(matcher.rewrite_both_),
915  error_(matcher.error_),
916  state_(kNoStateId),
917  has_rho_(false) {}
918 
919  RhoMatcher *Copy(bool safe = false) const override {
920  return new RhoMatcher(*this, safe);
921  }
922 
923  MatchType Type(bool test) const override { return matcher_->Type(test); }
924 
925  void SetState(StateId s) final {
926  if (state_ == s) return;
927  state_ = s;
928  matcher_->SetState(s);
929  has_rho_ = rho_label_ != kNoLabel;
930  }
931 
932  bool Find(Label label) final {
933  if (label == rho_label_ && rho_label_ != kNoLabel) {
934  FSTERROR() << "RhoMatcher::Find: bad label (rho)";
935  error_ = true;
936  return false;
937  }
938  if (matcher_->Find(label)) {
939  rho_match_ = kNoLabel;
940  return true;
941  } else if (has_rho_ && label != 0 && label != kNoLabel &&
942  (has_rho_ = matcher_->Find(rho_label_))) {
943  rho_match_ = label;
944  return true;
945  } else {
946  return false;
947  }
948  }
949 
950  bool Done() const final { return matcher_->Done(); }
951 
952  const Arc &Value() const final {
953  if (rho_match_ == kNoLabel) {
954  return matcher_->Value();
955  } else {
956  rho_arc_ = matcher_->Value();
957  if (rewrite_both_) {
958  if (rho_arc_.ilabel == rho_label_) rho_arc_.ilabel = rho_match_;
959  if (rho_arc_.olabel == rho_label_) rho_arc_.olabel = rho_match_;
960  } else if (match_type_ == MATCH_INPUT) {
961  rho_arc_.ilabel = rho_match_;
962  } else {
963  rho_arc_.olabel = rho_match_;
964  }
965  return rho_arc_;
966  }
967  }
968 
969  void Next() final { matcher_->Next(); }
970 
971  Weight Final(StateId s) const final { return matcher_->Final(s); }
972 
973  ssize_t Priority(StateId s) final {
974  state_ = s;
975  matcher_->SetState(s);
976  has_rho_ = matcher_->Find(rho_label_);
977  if (has_rho_) {
978  return kRequirePriority;
979  } else {
980  return matcher_->Priority(s);
981  }
982  }
983 
984  const FST &GetFst() const override { return matcher_->GetFst(); }
985 
986  uint64_t Properties(uint64_t props) const override;
987 
988  uint32_t Flags() const override {
989  if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) {
990  return matcher_->Flags();
991  }
992  return matcher_->Flags() | kRequireMatch;
993  }
994 
995  Label RhoLabel() const { return rho_label_; }
996 
997  private:
998  std::unique_ptr<M> matcher_;
999  MatchType match_type_; // Type of match requested.
1000  Label rho_label_; // Label that represents the rho transition
1001  bool rewrite_both_; // Rewrite both sides when both are rho_label_?
1002  Label rho_match_; // Current label that matches rho transition.
1003  mutable Arc rho_arc_; // Arc to return when rho match.
1004  bool error_; // Error encountered?
1005  StateId state_; // Matcher state.
1006  bool has_rho_; // Are there possibly rhos at the current state?
1007 };
1008 
1009 template <class M>
1010 inline uint64_t RhoMatcher<M>::Properties(uint64_t inprops) const {
1011  auto outprops = matcher_->Properties(inprops);
1012  if (error_) outprops |= kError;
1013  if (match_type_ == MATCH_NONE) {
1014  return outprops;
1015  } else if (match_type_ == MATCH_INPUT) {
1016  if (rewrite_both_) {
1017  return outprops &
1020  } else {
1021  return outprops & ~(kODeterministic | kAcceptor | kString |
1023  }
1024  } else if (match_type_ == MATCH_OUTPUT) {
1025  if (rewrite_both_) {
1026  return outprops &
1029  } else {
1030  return outprops & ~(kIDeterministic | kAcceptor | kString |
1032  }
1033  } else {
1034  // Shouldn't ever get here.
1035  FSTERROR() << "RhoMatcher: Bad match type: " << match_type_;
1036  return 0;
1037  }
1038 }
1039 
1040 // For any requested label, this matcher considers all transitions
1041 // that match the label 'sigma_label' (sigma = "any"), and this in
1042 // additions to transitions with the requested label. Each such sigma
1043 // transition found is returned with the sigma_label rewritten as the
1044 // requested label (both sides if an acceptor, or if 'rewrite_both' is
1045 // true and both input and output labels of the found transition are
1046 // 'sigma_label'). If 'sigma_label' is kNoLabel, this special
1047 // matching is not done. SigmaMatcher is templated itself on a
1048 // matcher, which is used to perform the underlying matching. By
1049 // default, the underlying matcher is constructed by SigmaMatcher.
1050 // The user can instead pass in this object; in that case,
1051 // SigmaMatcher takes its ownership. No non-consuming symbols other
1052 // than epsilon supported with the underlying template argument matcher.
1053 template <class M>
1054 class SigmaMatcher : public MatcherBase<typename M::Arc> {
1055  public:
1056  using FST = typename M::FST;
1057  using Arc = typename FST::Arc;
1058  using Label = typename Arc::Label;
1059  using StateId = typename Arc::StateId;
1060  using Weight = typename Arc::Weight;
1061 
1062  // This makes a copy of the FST (w/o 'matcher' arg).
1063  SigmaMatcher(const FST &fst, MatchType match_type,
1064  Label sigma_label = kNoLabel,
1066  M *matcher = nullptr)
1067  : matcher_(matcher ? matcher : new M(fst, match_type)),
1068  match_type_(match_type),
1069  sigma_label_(sigma_label),
1070  error_(false),
1071  state_(kNoStateId) {
1072  if (match_type == MATCH_BOTH) {
1073  FSTERROR() << "SigmaMatcher: Bad match type";
1074  match_type_ = MATCH_NONE;
1075  error_ = true;
1076  }
1077  if (sigma_label == 0) {
1078  FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label";
1079  sigma_label_ = kNoLabel;
1080  error_ = true;
1081  }
1082  if (rewrite_mode == MATCHER_REWRITE_AUTO) {
1083  rewrite_both_ = fst.Properties(kAcceptor, true);
1084  } else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) {
1085  rewrite_both_ = true;
1086  } else {
1087  rewrite_both_ = false;
1088  }
1089  }
1090 
1091  // This doesn't copy the FST.
1092  SigmaMatcher(const FST *fst, MatchType match_type,
1093  Label sigma_label = kNoLabel,
1095  M *matcher = nullptr)
1096  : SigmaMatcher(*fst, match_type, sigma_label, rewrite_mode,
1097  matcher ? matcher : new M(fst, match_type)) {}
1098 
1099  // This makes a copy of the FST.
1100  SigmaMatcher(const SigmaMatcher &matcher, bool safe = false)
1101  : matcher_(new M(*matcher.matcher_, safe)),
1102  match_type_(matcher.match_type_),
1103  sigma_label_(matcher.sigma_label_),
1104  rewrite_both_(matcher.rewrite_both_),
1105  error_(matcher.error_),
1106  state_(kNoStateId) {}
1107 
1108  SigmaMatcher *Copy(bool safe = false) const override {
1109  return new SigmaMatcher(*this, safe);
1110  }
1111 
1112  MatchType Type(bool test) const override { return matcher_->Type(test); }
1113 
1114  void SetState(StateId s) final {
1115  if (state_ == s) return;
1116  state_ = s;
1117  matcher_->SetState(s);
1118  has_sigma_ =
1119  (sigma_label_ != kNoLabel) ? matcher_->Find(sigma_label_) : false;
1120  }
1121 
1122  bool Find(Label match_label) final {
1123  match_label_ = match_label;
1124  if (match_label == sigma_label_ && sigma_label_ != kNoLabel) {
1125  FSTERROR() << "SigmaMatcher::Find: bad label (sigma)";
1126  error_ = true;
1127  return false;
1128  }
1129  if (matcher_->Find(match_label)) {
1130  sigma_match_ = kNoLabel;
1131  return true;
1132  } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel &&
1133  matcher_->Find(sigma_label_)) {
1134  sigma_match_ = match_label;
1135  return true;
1136  } else {
1137  return false;
1138  }
1139  }
1140 
1141  bool Done() const final { return matcher_->Done(); }
1142 
1143  const Arc &Value() const final {
1144  if (sigma_match_ == kNoLabel) {
1145  return matcher_->Value();
1146  } else {
1147  sigma_arc_ = matcher_->Value();
1148  if (rewrite_both_) {
1149  if (sigma_arc_.ilabel == sigma_label_) sigma_arc_.ilabel = sigma_match_;
1150  if (sigma_arc_.olabel == sigma_label_) sigma_arc_.olabel = sigma_match_;
1151  } else if (match_type_ == MATCH_INPUT) {
1152  sigma_arc_.ilabel = sigma_match_;
1153  } else {
1154  sigma_arc_.olabel = sigma_match_;
1155  }
1156  return sigma_arc_;
1157  }
1158  }
1159 
1160  void Next() final {
1161  matcher_->Next();
1162  if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) &&
1163  (match_label_ > 0)) {
1164  matcher_->Find(sigma_label_);
1165  sigma_match_ = match_label_;
1166  }
1167  }
1168 
1169  Weight Final(StateId s) const final { return matcher_->Final(s); }
1170 
1171  ssize_t Priority(StateId s) final {
1172  if (sigma_label_ != kNoLabel) {
1173  SetState(s);
1174  return has_sigma_ ? kRequirePriority : matcher_->Priority(s);
1175  } else {
1176  return matcher_->Priority(s);
1177  }
1178  }
1179 
1180  const FST &GetFst() const override { return matcher_->GetFst(); }
1181 
1182  uint64_t Properties(uint64_t props) const override;
1183 
1184  uint32_t Flags() const override {
1185  if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) {
1186  return matcher_->Flags();
1187  }
1188  return matcher_->Flags() | kRequireMatch;
1189  }
1190 
1191  Label SigmaLabel() const { return sigma_label_; }
1192 
1193  private:
1194  std::unique_ptr<M> matcher_;
1195  MatchType match_type_; // Type of match requested.
1196  Label sigma_label_; // Label that represents the sigma transition.
1197  bool rewrite_both_; // Rewrite both sides when both are sigma_label_?
1198  bool has_sigma_; // Are there sigmas at the current state?
1199  Label sigma_match_; // Current label that matches sigma transition.
1200  mutable Arc sigma_arc_; // Arc to return when sigma match.
1201  Label match_label_; // Label being matched.
1202  bool error_; // Error encountered?
1203  StateId state_; // Matcher state.
1204 };
1205 
1206 template <class M>
1207 inline uint64_t SigmaMatcher<M>::Properties(uint64_t inprops) const {
1208  auto outprops = matcher_->Properties(inprops);
1209  if (error_) outprops |= kError;
1210  if (match_type_ == MATCH_NONE) {
1211  return outprops;
1212  } else if (rewrite_both_) {
1213  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1216  } else if (match_type_ == MATCH_INPUT) {
1217  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1219  kString | kAcceptor);
1220  } else if (match_type_ == MATCH_OUTPUT) {
1221  return outprops & ~(kIDeterministic | kNonIDeterministic | kODeterministic |
1223  kString | kAcceptor);
1224  } else {
1225  // Shouldn't ever get here.
1226  FSTERROR() << "SigmaMatcher: Bad match type: " << match_type_;
1227  return 0;
1228  }
1229 }
1230 
1231 // Flags for MultiEpsMatcher.
1232 
1233 // Return multi-epsilon arcs for Find(kNoLabel).
1234 inline constexpr uint32_t kMultiEpsList = 0x00000001;
1235 
1236 // Return a kNolabel loop for Find(multi_eps).
1237 inline constexpr uint32_t kMultiEpsLoop = 0x00000002;
1238 
1239 // MultiEpsMatcher: allows treating multiple non-0 labels as
1240 // non-consuming labels in addition to 0 that is always
1241 // non-consuming. Precise behavior controlled by 'flags' argument. By
1242 // default, the underlying matcher is constructed by
1243 // MultiEpsMatcher. The user can instead pass in this object; in that
1244 // case, MultiEpsMatcher takes its ownership iff 'own_matcher' is
1245 // true.
1246 template <class M>
1248  public:
1249  using FST = typename M::FST;
1250  using Arc = typename FST::Arc;
1251  using Label = typename Arc::Label;
1252  using StateId = typename Arc::StateId;
1253  using Weight = typename Arc::Weight;
1254 
1255  // This makes a copy of the FST (w/o 'matcher' arg).
1256  MultiEpsMatcher(const FST &fst, MatchType match_type,
1257  uint32_t flags = (kMultiEpsLoop | kMultiEpsList),
1258  M *matcher = nullptr, bool own_matcher = true)
1259  : matcher_(matcher ? matcher : new M(fst, match_type)),
1260  flags_(flags),
1261  own_matcher_(matcher ? own_matcher : true) {
1262  Init(match_type);
1263  }
1264 
1265  // This doesn't copy the FST.
1266  MultiEpsMatcher(const FST *fst, MatchType match_type,
1267  uint32_t flags = (kMultiEpsLoop | kMultiEpsList),
1268  M *matcher = nullptr, bool own_matcher = true)
1269  : matcher_(matcher ? matcher : new M(fst, match_type)),
1270  flags_(flags),
1271  own_matcher_(matcher ? own_matcher : true) {
1272  Init(match_type);
1273  }
1274 
1275  // This makes a copy of the FST.
1276  MultiEpsMatcher(const MultiEpsMatcher &matcher, bool safe = false)
1277  : matcher_(new M(*matcher.matcher_, safe)),
1278  flags_(matcher.flags_),
1279  own_matcher_(true),
1280  multi_eps_labels_(matcher.multi_eps_labels_),
1281  loop_(matcher.loop_) {
1282  loop_.nextstate = kNoStateId;
1283  }
1284 
1286  if (own_matcher_) delete matcher_;
1287  }
1288 
1289  MultiEpsMatcher *Copy(bool safe = false) const {
1290  return new MultiEpsMatcher(*this, safe);
1291  }
1292 
1293  MatchType Type(bool test) const { return matcher_->Type(test); }
1294 
1295  void SetState(StateId state) {
1296  matcher_->SetState(state);
1297  loop_.nextstate = state;
1298  }
1299 
1300  bool Find(Label label);
1301 
1302  bool Done() const { return done_; }
1303 
1304  const Arc &Value() const { return current_loop_ ? loop_ : matcher_->Value(); }
1305 
1306  void Next() {
1307  if (!current_loop_) {
1308  matcher_->Next();
1309  done_ = matcher_->Done();
1310  if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) {
1311  ++multi_eps_iter_;
1312  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1313  !matcher_->Find(*multi_eps_iter_)) {
1314  ++multi_eps_iter_;
1315  }
1316  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1317  done_ = false;
1318  } else {
1319  done_ = !matcher_->Find(kNoLabel);
1320  }
1321  }
1322  } else {
1323  done_ = true;
1324  }
1325  }
1326 
1327  const FST &GetFst() const { return matcher_->GetFst(); }
1328 
1329  uint64_t Properties(uint64_t props) const {
1330  return matcher_->Properties(props);
1331  }
1332 
1333  const M *GetMatcher() const { return matcher_; }
1334 
1335  Weight Final(StateId s) const { return matcher_->Final(s); }
1336 
1337  uint32_t Flags() const { return matcher_->Flags(); }
1338 
1339  ssize_t Priority(StateId s) { return matcher_->Priority(s); }
1340 
1341  void AddMultiEpsLabel(Label label) {
1342  if (label == 0) {
1343  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1344  } else {
1345  multi_eps_labels_.Insert(label);
1346  }
1347  }
1348 
1350  if (label == 0) {
1351  FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0";
1352  } else {
1353  multi_eps_labels_.Erase(label);
1354  }
1355  }
1356 
1357  void ClearMultiEpsLabels() { multi_eps_labels_.Clear(); }
1358 
1359  private:
1360  void Init(MatchType match_type) {
1361  if (match_type == MATCH_INPUT) {
1362  loop_.ilabel = kNoLabel;
1363  loop_.olabel = 0;
1364  } else {
1365  loop_.ilabel = 0;
1366  loop_.olabel = kNoLabel;
1367  }
1368  loop_.weight = Weight::One();
1369  loop_.nextstate = kNoStateId;
1370  }
1371 
1372  M *matcher_;
1373  uint32_t flags_;
1374  bool own_matcher_; // Does this class delete the matcher?
1375 
1376  // Multi-eps label set.
1377  CompactSet<Label, kNoLabel> multi_eps_labels_;
1378  typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_;
1379 
1380  bool current_loop_; // Current arc is the implicit loop?
1381  mutable Arc loop_; // For non-consuming symbols.
1382  bool done_; // Matching done?
1383 
1384  MultiEpsMatcher &operator=(const MultiEpsMatcher &) = delete;
1385 };
1386 
1387 template <class M>
1388 inline bool MultiEpsMatcher<M>::Find(Label label) {
1389  multi_eps_iter_ = multi_eps_labels_.End();
1390  current_loop_ = false;
1391  bool ret;
1392  if (label == 0) {
1393  ret = matcher_->Find(0);
1394  } else if (label == kNoLabel) {
1395  if (flags_ & kMultiEpsList) {
1396  // Returns all non-consuming arcs (including epsilon).
1397  multi_eps_iter_ = multi_eps_labels_.Begin();
1398  while ((multi_eps_iter_ != multi_eps_labels_.End()) &&
1399  !matcher_->Find(*multi_eps_iter_)) {
1400  ++multi_eps_iter_;
1401  }
1402  if (multi_eps_iter_ != multi_eps_labels_.End()) {
1403  ret = true;
1404  } else {
1405  ret = matcher_->Find(kNoLabel);
1406  }
1407  } else {
1408  // Returns all epsilon arcs.
1409  ret = matcher_->Find(kNoLabel);
1410  }
1411  } else if ((flags_ & kMultiEpsLoop) &&
1412  multi_eps_labels_.Find(label) != multi_eps_labels_.End()) {
1413  // Returns implicit loop.
1414  current_loop_ = true;
1415  ret = true;
1416  } else {
1417  ret = matcher_->Find(label);
1418  }
1419  done_ = !ret;
1420  return ret;
1421 }
1422 
1423 // This class discards any implicit matches (e.g., the implicit epsilon
1424 // self-loops in the SortedMatcher). Matchers are most often used in
1425 // composition/intersection where the implicit matches are needed
1426 // e.g. for epsilon processing. However, if a matcher is simply being
1427 // used to look-up explicit label matches, this class saves the user
1428 // from having to check for and discard the unwanted implicit matches
1429 // themselves.
1430 template <class M>
1431 class ExplicitMatcher : public MatcherBase<typename M::Arc> {
1432  public:
1433  using FST = typename M::FST;
1434  using Arc = typename FST::Arc;
1435  using Label = typename Arc::Label;
1436  using StateId = typename Arc::StateId;
1437  using Weight = typename Arc::Weight;
1438 
1439  // This makes a copy of the FST.
1440  ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher = nullptr)
1441  : matcher_(matcher ? matcher : new M(fst, match_type)),
1442  match_type_(match_type),
1443  error_(false) {}
1444 
1445  // This doesn't copy the FST.
1446  ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher = nullptr)
1447  : matcher_(matcher ? matcher : new M(fst, match_type)),
1448  match_type_(match_type),
1449  error_(false) {}
1450 
1451  // This makes a copy of the FST.
1452  ExplicitMatcher(const ExplicitMatcher &matcher, bool safe = false)
1453  : matcher_(new M(*matcher.matcher_, safe)),
1454  match_type_(matcher.match_type_),
1455  error_(matcher.error_) {}
1456 
1457  ExplicitMatcher *Copy(bool safe = false) const override {
1458  return new ExplicitMatcher(*this, safe);
1459  }
1460 
1461  MatchType Type(bool test) const override { return matcher_->Type(test); }
1462 
1463  void SetState(StateId s) final { matcher_->SetState(s); }
1464 
1465  bool Find(Label label) final {
1466  matcher_->Find(label);
1467  CheckArc();
1468  return !Done();
1469  }
1470 
1471  bool Done() const final { return matcher_->Done(); }
1472 
1473  const Arc &Value() const final { return matcher_->Value(); }
1474 
1475  void Next() final {
1476  matcher_->Next();
1477  CheckArc();
1478  }
1479 
1480  Weight Final(StateId s) const final { return matcher_->Final(s); }
1481 
1482  ssize_t Priority(StateId s) final { return matcher_->Priority(s); }
1483 
1484  const FST &GetFst() const final { return matcher_->GetFst(); }
1485 
1486  uint64_t Properties(uint64_t inprops) const override {
1487  return matcher_->Properties(inprops);
1488  }
1489 
1490  const M *GetMatcher() const { return matcher_.get(); }
1491 
1492  uint32_t Flags() const override { return matcher_->Flags(); }
1493 
1494  private:
1495  // Checks current arc if available and explicit. If not available, stops. If
1496  // not explicit, checks next ones.
1497  void CheckArc() {
1498  for (; !matcher_->Done(); matcher_->Next()) {
1499  const auto label = match_type_ == MATCH_INPUT ? matcher_->Value().ilabel
1500  : matcher_->Value().olabel;
1501  if (label != kNoLabel) return;
1502  }
1503  }
1504 
1505  std::unique_ptr<M> matcher_;
1506  MatchType match_type_; // Type of match requested.
1507  bool error_; // Error encountered?
1508 };
1509 
1510 // Generic matcher, templated on the FST definition.
1511 //
1512 // Here is a typical use:
1513 //
1514 // Matcher<StdFst> matcher(fst, MATCH_INPUT);
1515 // matcher.SetState(state);
1516 // if (matcher.Find(label))
1517 // for (; !matcher.Done(); matcher.Next()) {
1518 // auto &arc = matcher.Value();
1519 // ...
1520 // }
1521 template <class F>
1522 class Matcher {
1523  public:
1524  using FST = F;
1525  using Arc = typename F::Arc;
1526  using Label = typename Arc::Label;
1527  using StateId = typename Arc::StateId;
1528  using Weight = typename Arc::Weight;
1529 
1530  // This makes a copy of the FST.
1531  Matcher(const FST &fst, MatchType match_type)
1532  : owned_fst_(fst.Copy()), base_(owned_fst_->InitMatcher(match_type)) {
1533  if (!base_)
1534  base_ =
1535  std::make_unique<SortedMatcher<FST>>(owned_fst_.get(), match_type);
1536  }
1537 
1538  // This doesn't copy the FST.
1539  Matcher(const FST *fst, MatchType match_type)
1540  : base_(fst->InitMatcher(match_type)) {
1541  if (!base_) base_ = std::make_unique<SortedMatcher<FST>>(fst, match_type);
1542  }
1543 
1544  // This makes a copy of the FST.
1545  Matcher(const Matcher &matcher, bool safe = false)
1546  : base_(matcher.base_->Copy(safe)) {}
1547 
1548  // Takes ownership of the provided matcher.
1549  explicit Matcher(MatcherBase<Arc> *base_matcher) : base_(base_matcher) {}
1550 
1551  Matcher *Copy(bool safe = false) const { return new Matcher(*this, safe); }
1552 
1553  MatchType Type(bool test) const { return base_->Type(test); }
1554 
1555  void SetState(StateId s) { base_->SetState(s); }
1556 
1557  bool Find(Label label) { return base_->Find(label); }
1558 
1559  bool Done() const { return base_->Done(); }
1560 
1561  const Arc &Value() const { return base_->Value(); }
1562 
1563  void Next() { base_->Next(); }
1564 
1565  const FST &GetFst() const { return down_cast<const FST &>(base_->GetFst()); }
1566 
1567  uint64_t Properties(uint64_t props) const { return base_->Properties(props); }
1568 
1569  Weight Final(StateId s) const { return base_->Final(s); }
1570 
1571  uint32_t Flags() const { return base_->Flags() & kMatcherFlags; }
1572 
1573  ssize_t Priority(StateId s) { return base_->Priority(s); }
1574 
1575  private:
1576  std::unique_ptr<const FST> owned_fst_;
1577  std::unique_ptr<MatcherBase<Arc>> base_;
1578 };
1579 
1580 } // namespace fst
1581 
1582 #endif // FST_MATCHER_H_
bool Find(Label label) final
Definition: matcher.h:932
MatchType Type(bool test) const
Definition: matcher.h:1553
void Next() final
Definition: matcher.h:317
void SetState(StateId s) final
Definition: matcher.h:257
virtual void Next()=0
RhoMatcher(const RhoMatcher &matcher, bool safe=false)
Definition: matcher.h:910
Weight Final(StateId s) const final
Definition: matcher.h:702
const Arc & Value() const final
Definition: matcher.h:674
typename FST::Arc Arc
Definition: matcher.h:868
void SetState(StateId s) final
Definition: matcher.h:925
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
virtual uint64_t Properties(uint64_t) const =0
constexpr ssize_t kRequirePriority
Definition: matcher.h:135
typename typename Filter::Matcher2::FST FST
Definition: matcher.h:1249
Matcher(const FST &fst, MatchType match_type)
Definition: matcher.h:1531
const FST & GetFst() const
Definition: matcher.h:1327
void AddMultiEpsLabel(Label label)
Definition: matcher.h:1341
constexpr int kNoLabel
Definition: fst.h:195
size_t Position() const
Definition: matcher.h:335
constexpr uint8_t kArcNoCache
Definition: fst.h:452
SortedMatcher * Copy(bool safe=false) const override
Definition: matcher.h:237
constexpr uint64_t kOEpsilons
Definition: properties.h:89
uint32_t Flags() const override
Definition: matcher.h:1492
uint32_t Flags() const
Definition: matcher.h:1571
ssize_t Priority(StateId s) final
Definition: matcher.h:1171
const Arc & Value() const final
Definition: matcher.h:952
ExplicitMatcher(const FST *fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1446
bool Done() const final
Definition: matcher.h:494
void LowerBound(Label label)
Definition: matcher.h:288
typename Arc::Label Label
Definition: matcher.h:1435
bool Find(Label label)
Definition: matcher.h:1388
PhiMatcher * Copy(bool safe=false) const override
Definition: matcher.h:657
const Arc & Value() const final
Definition: matcher.h:311
MatchType Type(bool test) const override
Definition: matcher.h:1461
ssize_t Priority(StateId s) final
Definition: matcher.h:973
uint64_t Properties(uint64_t props) const
Definition: matcher.h:1567
const M * GetMatcher() const
Definition: matcher.h:1333
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:1010
SigmaMatcher(const SigmaMatcher &matcher, bool safe=false)
Definition: matcher.h:1100
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:107
constexpr uint32_t kMultiEpsList
Definition: matcher.h:1234
MatchType
Definition: fst.h:187
constexpr uint8_t kArcFlags
Definition: fst.h:455
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:64
constexpr uint64_t kError
Definition: properties.h:52
PhiMatcher(const FST *fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:640
Weight Final(StateId s) const
Definition: matcher.h:1569
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:811
bool Find(Label label) final
Definition: matcher.h:1465
typename Arc::StateId StateId
Definition: matcher.h:429
virtual bool Find(Label)=0
typename Arc::Label Label
Definition: matcher.h:1526
uint64_t Properties(uint64_t props) const
Definition: matcher.h:1329
MatchType Type(bool test) const override
Definition: matcher.h:661
Weight Final(StateId s) const final
Definition: matcher.h:1169
Matcher(const Matcher &matcher, bool safe=false)
Definition: matcher.h:1545
SigmaMatcher(const FST &fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1063
void Next() final
Definition: matcher.h:969
virtual ~MatcherBase()=default
ExplicitMatcher(const ExplicitMatcher &matcher, bool safe=false)
Definition: matcher.h:1452
ExplicitMatcher * Copy(bool safe=false) const override
Definition: matcher.h:1457
void SetState(StateId s) final
Definition: matcher.h:1114
const FST & GetFst() const override
Definition: matcher.h:984
void SetState(StateId s) final
Definition: matcher.h:663
constexpr uint8_t kArcILabelValue
Definition: fst.h:444
constexpr uint64_t kEpsilons
Definition: properties.h:79
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:113
To down_cast(From *f)
Definition: compat.h:50
constexpr uint64_t kNotOLabelSorted
Definition: properties.h:101
HashMatcher * Copy(bool safe=false) const override
Definition: matcher.h:475
ExplicitMatcher(const FST &fst, MatchType match_type, M *matcher=nullptr)
Definition: matcher.h:1440
typename M::FST FST
Definition: matcher.h:1433
Label SigmaLabel() const
Definition: matcher.h:1191
uint32_t Flags() const override
Definition: matcher.h:1184
MatchType Type(bool test) const override
Definition: matcher.h:241
void SetState(StateId state)
Definition: matcher.h:1295
PhiMatcher(const PhiMatcher &matcher, bool safe=false)
Definition: matcher.h:648
RhoMatcher(const FST &fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:874
typename Arc::StateId StateId
Definition: matcher.h:1527
constexpr uint64_t kODeterministic
Definition: properties.h:74
constexpr int kNoStateId
Definition: fst.h:196
MultiEpsMatcher(const FST *fst, MatchType match_type, uint32_t flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1266
typename F::Arc Arc
Definition: matcher.h:1525
typename Arc::Weight Weight
Definition: matcher.h:612
Label RhoLabel() const
Definition: matcher.h:995
typename Arc::StateId StateId
Definition: matcher.h:1436
void RemoveMultiEpsLabel(Label label)
Definition: matcher.h:1349
void Next() final
Definition: matcher.h:505
MatchType Type(bool test) const
Definition: matcher.h:1293
uint32_t Flags() const override
Definition: matcher.h:736
void Next() final
Definition: matcher.h:1160
#define FSTERROR()
Definition: util.h:56
bool Done() const final
Definition: matcher.h:1141
constexpr uint32_t kMultiEpsLoop
Definition: matcher.h:1237
bool Done() const final
Definition: matcher.h:301
const FST & GetFst() const override
Definition: matcher.h:513
constexpr uint64_t kNonIDeterministic
Definition: properties.h:71
const Arc & Value() const
Definition: matcher.h:1304
typename Arc::Label Label
Definition: matcher.h:1058
void Next() final
Definition: matcher.h:700
const Arc & Value() const final
Definition: matcher.h:499
virtual uint32_t Flags() const
Definition: matcher.h:163
MatchType Type(bool test) const override
Definition: matcher.h:1112
constexpr uint64_t kNotILabelSorted
Definition: properties.h:96
bool Find(Label label) final
Definition: matcher.h:484
bool Find(Label match_label) final
Definition: matcher.h:270
bool Done() const final
Definition: matcher.h:950
virtual ssize_t Priority(StateId s)
Definition: matcher.h:167
virtual Weight Final(StateId s) const
Definition: matcher.h:165
typename FST::Arc Arc
Definition: matcher.h:1057
typename Arc::StateId StateId
Definition: matcher.h:870
typename FST::Arc Arc
Definition: matcher.h:427
constexpr uint64_t kNoOEpsilons
Definition: properties.h:91
const M * GetMatcher() const
Definition: matcher.h:1490
bool Done() const
Definition: matcher.h:1559
typename M::FST FST
Definition: matcher.h:867
constexpr uint64_t kOLabelSorted
Definition: properties.h:99
Weight Final(StateId s) const final
Definition: matcher.h:971
virtual void SetState(StateId)=0
ssize_t Priority(StateId s)
Definition: matcher.h:1573
constexpr uint64_t kNoEpsilons
Definition: properties.h:81
const Arc & Value() const final
Definition: matcher.h:1473
Matcher(const FST *fst, MatchType match_type)
Definition: matcher.h:1539
const FST & GetFst() const override
Definition: matcher.h:732
constexpr uint8_t kArcOLabelValue
Definition: fst.h:446
Weight Final(StateId s) const final
Definition: matcher.h:325
typename FST::Arc Arc
Definition: matcher.h:1434
RhoMatcher * Copy(bool safe=false) const override
Definition: matcher.h:919
Matcher * Copy(bool safe=false) const
Definition: matcher.h:1551
void Next()
Definition: matcher.h:1563
void SetState(StateId s) final
Definition: matcher.h:1463
constexpr uint64_t kIDeterministic
Definition: properties.h:69
bool Find(Label label)
Definition: matcher.h:1557
virtual const Fst< Arc > & GetFst() const =0
Matcher(MatcherBase< Arc > *base_matcher)
Definition: matcher.h:1549
bool Find(Label match_label) final
Definition: matcher.h:763
constexpr uint64_t kIEpsilons
Definition: properties.h:84
typename M::FST FST
Definition: matcher.h:608
const FST & GetFst() const override
Definition: matcher.h:1180
uint32_t Flags() const override
Definition: matcher.h:988
HashMatcher(const HashMatcher &matcher, bool safe=false)
Definition: matcher.h:465
SigmaMatcher * Copy(bool safe=false) const override
Definition: matcher.h:1108
Label PhiLabel() const
Definition: matcher.h:743
void ClearMultiEpsLabels()
Definition: matcher.h:1357
constexpr uint64_t kILabelSorted
Definition: properties.h:94
void SetState(StateId s)
Definition: matcher.h:1555
ssize_t Priority(StateId s) final
Definition: matcher.h:327
SortedMatcher(const FST &fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:190
void SetState(StateId s) final
Definition: matcher.h:545
MultiEpsMatcher(const MultiEpsMatcher &matcher, bool safe=false)
Definition: matcher.h:1276
MatcherRewriteMode
Definition: matcher.h:584
bool Done() const final
Definition: matcher.h:672
virtual MatcherBase * Copy(bool safe=false) const =0
ssize_t Priority(StateId s) final
Definition: matcher.h:1482
constexpr uint64_t kNonODeterministic
Definition: properties.h:76
ssize_t Priority(StateId s) final
Definition: matcher.h:722
const FST & GetFst() const final
Definition: matcher.h:1484
typename FST::Arc Arc
Definition: matcher.h:609
constexpr uint32_t kMatcherFlags
Definition: matcher.h:132
typename Arc::Weight Weight
Definition: matcher.h:1060
uint32_t Flags() const
Definition: matcher.h:1337
SortedMatcher(const FST *fst, MatchType match_type, Label binary_label=1)
Definition: matcher.h:198
typename Arc::Weight Weight
Definition: matcher.h:430
SortedMatcher(const SortedMatcher &matcher, bool safe=false)
Definition: matcher.h:223
const FST & GetFst() const override
Definition: matcher.h:329
uint64_t Properties(uint64_t props) const override
Definition: matcher.h:1207
typename Arc::Weight Weight
Definition: matcher.h:145
constexpr uint64_t kString
Definition: properties.h:136
typename Arc::Label Label
Definition: matcher.h:143
virtual MatchType Type(bool) const =0
Weight Final(StateId s) const final
Definition: matcher.h:1480
bool Done() const
Definition: matcher.h:1302
void Next() final
Definition: matcher.h:1475
SigmaMatcher(const FST *fst, MatchType match_type, Label sigma_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:1092
HashMatcher(const FST &fst, MatchType match_type)
Definition: matcher.h:437
bool Done() const final
Definition: matcher.h:1471
PhiMatcher(const FST &fst, MatchType match_type, Label phi_label=kNoLabel, bool phi_loop=true, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:615
MatchType Type(bool test) const override
Definition: matcher.h:480
ssize_t Priority(StateId s)
Definition: matcher.h:1339
typename Arc::Weight Weight
Definition: matcher.h:871
const Arc & Value() const
Definition: matcher.h:1561
typename Arc::Label Label
Definition: matcher.h:610
MatchType Type(bool test) const override
Definition: matcher.h:923
typename Arc::Label Label
Definition: matcher.h:869
typename M::FST FST
Definition: matcher.h:1056
typename Arc::Weight Weight
Definition: matcher.h:1528
typename Arc::Label Label
Definition: matcher.h:428
typename Arc::StateId StateId
Definition: matcher.h:1059
MultiEpsMatcher(const FST &fst, MatchType match_type, uint32_t flags=(kMultiEpsLoop|kMultiEpsList), M *matcher=nullptr, bool own_matcher=true)
Definition: matcher.h:1256
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:1486
typename Arc::Weight Weight
Definition: matcher.h:1437
constexpr uint64_t kNoIEpsilons
Definition: properties.h:86
bool Find(Label match_label) final
Definition: matcher.h:1122
MultiEpsMatcher * Copy(bool safe=false) const
Definition: matcher.h:1289
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:515
uint64_t Properties(uint64_t inprops) const override
Definition: matcher.h:331
RhoMatcher(const FST *fst, MatchType match_type, Label rho_label=kNoLabel, MatcherRewriteMode rewrite_mode=MATCHER_REWRITE_AUTO, M *matcher=nullptr)
Definition: matcher.h:903
HashMatcher(const FST *fst, MatchType match_type)
Definition: matcher.h:443
virtual bool Done() const =0
virtual const Arc & Value() const =0
const Arc & Value() const final
Definition: matcher.h:1143
constexpr uint64_t kAcceptor
Definition: properties.h:64
Weight Final(StateId s) const
Definition: matcher.h:1335
const FST & GetFst() const
Definition: matcher.h:1565
typename Arc::StateId StateId
Definition: matcher.h:611
constexpr uint32_t kRequireMatch
Definition: matcher.h:129
typename Arc::StateId StateId
Definition: matcher.h:144