FST  openfst-1.8.3
OpenFst Library
lookahead-matcher.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes to add lookahead to FST matchers, useful for improving composition
19 // efficiency with certain inputs.
20 
21 #ifndef FST_LOOKAHEAD_MATCHER_H_
22 #define FST_LOOKAHEAD_MATCHER_H_
23 
24 #include <sys/types.h>
25 
26 #include <cstdint>
27 #include <memory>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 #include <fst/flags.h>
33 #include <fst/log.h>
34 #include <fst/accumulator.h>
35 #include <fst/add-on.h>
36 #include <fst/const-fst.h>
37 #include <fst/fst.h>
38 #include <fst/label-reachable.h>
39 #include <fst/matcher.h>
40 #include <fst/mutable-fst.h>
41 #include <fst/properties.h>
42 #include <fst/util.h>
43 #include <fst/vector-fst.h>
44 #include <string_view>
45 
46 DECLARE_string(save_relabel_ipairs);
47 DECLARE_string(save_relabel_opairs);
48 
49 namespace fst {
50 
51 // Lookahead matches extend the matcher interface with following additional
52 // methods:
53 //
54 // template <class FST>
55 // class LookAheadMatcher {
56 // public:
57 // using Arc = typename FST::Arc;
58 // using Label = typename Arc::Label;
59 // using StateId = typename Arc::StateId;
60 // using Weight = typename Arc::Weight;
61 //
62 // // Required constructors.
63 // // This makes a copy of the FST.
64 // LookAheadMatcher(const FST &fst, MatchType match_type);
65 // // This doesn't copy the FST.
66 // LookAheadMatcher(const FST *fst, MatchType match_type);
67 // // This makes a copy of the FST.
68 // // See Copy() below.
69 // LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false);
70 //
71 // // If safe = true, the copy is thread-safe (except the lookahead FST is
72 // // preserved). See Fst<>::Copy() for further doc.
73 // LookaheadMatcher *Copy(bool safe = false) const override;
74 
75 // // Below are methods for looking ahead for a match to a label and more
76 // // generally, to a rational set. Each returns false if there is definitely
77 // // not a match and returns true if there possibly is a match.
78 //
79 // // Optionally pre-specifies the lookahead FST that will be passed to
80 // // LookAheadFst() for possible precomputation. If copy is true, then the FST
81 // // argument is a copy of the FST used in the previous call to this method
82 // // (to avoid unnecessary updates).
83 // void InitLookAheadFst(const Fst<Arc> &fst, bool copy = false) override;
84 //
85 // // Are there paths from a state in the lookahead FST that can be read from
86 // // the current matcher state?
87 // bool LookAheadFst(const Fst<Arc> &fst, StateId s) override;
88 //
89 // // Can the label be read from the current matcher state after possibly
90 // // following epsilon transitions?
91 // bool LookAheadLabel(Label label) const override;
92 //
93 // // The following methods allow looking ahead for an arbitrary rational set
94 // // of strings, specified by an FST and a state from which to begin the
95 // // matching. If the lookahead FST is a transducer, this looks on the side
96 // // different from the matcher's match_type (cf. composition).
97 // // Is there is a single non-epsilon arc found in the lookahead FST that
98 // // begins the path (after possibly following any epsilons) in the last call
99 // // to LookAheadFst? If so, return true and copy it to the arc argument;
100 // // otherwise, return false. Non-trivial implementations are useful for
101 // // label-pushing in composition.
102 // bool LookAheadPrefix(Arc *arc) override;
103 //
104 // // Gives an estimate of the combined weight of the paths in the lookahead
105 // // and matcher FSTs for the last call to LookAheadFst. Non-trivial
106 // // implementations are useful for weight-pushing in composition.
107 // Weight LookAheadWeight() const override;
108 // };
109 
110 // Look-ahead flags.
111 // Matcher is a lookahead matcher when match_type is MATCH_INPUT.
112 inline constexpr uint32_t kInputLookAheadMatcher = 0x00000010;
113 
114 // Matcher is a lookahead matcher when match_type is MATCH_OUTPUT.
115 inline constexpr uint32_t kOutputLookAheadMatcher = 0x00000020;
116 
117 // Is a non-trivial implementation of LookAheadWeight() method defined and
118 // if so, should it be used?
119 inline constexpr uint32_t kLookAheadWeight = 0x00000040;
120 
121 // Is a non-trivial implementation of LookAheadPrefix() method defined and
122 // if so, should it be used?
123 inline constexpr uint32_t kLookAheadPrefix = 0x00000080;
124 
125 // Look-ahead of matcher FST non-epsilon arcs?
126 inline constexpr uint32_t kLookAheadNonEpsilons = 0x00000100;
127 
128 // Look-ahead of matcher FST epsilon arcs?
129 inline constexpr uint32_t kLookAheadEpsilons = 0x00000200;
130 
131 // Ignore epsilon paths for the lookahead prefix? This gives correct results in
132 // composition only with an appropriate composition filter since it depends on
133 // the filter blocking the ignored paths.
134 inline constexpr uint32_t kLookAheadNonEpsilonPrefix = 0x00000400;
135 
136 // For LabelLookAheadMatcher, save relabeling data to file?
137 inline constexpr uint32_t kLookAheadKeepRelabelData = 0x00000800;
138 
139 // Flags used for lookahead matchers.
140 inline constexpr uint32_t kLookAheadFlags = 0x00000ff0;
141 
142 // LookAhead Matcher interface, templated on the Arc definition; used
143 // for lookahead matcher specializations that are returned by the
144 // InitMatcher() Fst method.
145 template <class Arc>
146 class LookAheadMatcherBase : public MatcherBase<Arc> {
147  public:
148  using Label = typename Arc::Label;
149  using StateId = typename Arc::StateId;
150  using Weight = typename Arc::Weight;
151 
152  virtual void InitLookAheadFst(const Fst<Arc> &, bool copy = false) = 0;
153  virtual bool LookAheadFst(const Fst<Arc> &, StateId) = 0;
154  virtual bool LookAheadLabel(Label) const = 0;
155 
156  // Suggested concrete implementation of lookahead methods.
157 
158  bool LookAheadPrefix(Arc *arc) const {
159  if (prefix_arc_.nextstate != kNoStateId) {
160  *arc = prefix_arc_;
161  return true;
162  } else {
163  return false;
164  }
165  }
166 
167  Weight LookAheadWeight() const { return weight_; }
168 
169  protected:
170  // Concrete implementations for lookahead helper methods.
171 
172  void ClearLookAheadWeight() { weight_ = Weight::One(); }
173 
174  void SetLookAheadWeight(Weight weight) { weight_ = std::move(weight); }
175 
176  void ClearLookAheadPrefix() { prefix_arc_.nextstate = kNoStateId; }
177 
178  void SetLookAheadPrefix(Arc arc) { prefix_arc_ = std::move(arc); }
179 
180  private:
181  Arc prefix_arc_;
182  Weight weight_;
183 };
184 
185 // Doesn't actually lookahead, just declares that the future looks good.
186 template <class M>
188  : public LookAheadMatcherBase<typename M::FST::Arc> {
189  public:
190  using FST = typename M::FST;
191  using Arc = typename FST::Arc;
192  using Label = typename Arc::Label;
193  using StateId = typename Arc::StateId;
194  using Weight = typename Arc::Weight;
195 
196  // This makes a copy of the FST.
198  : matcher_(fst, match_type) {}
199 
200  // This doesn't copy the FST.
202  : matcher_(fst, match_type) {}
203 
204  // This makes a copy of the FST.
206  bool safe = false)
207  : matcher_(lmatcher.matcher_, safe) {}
208 
209  TrivialLookAheadMatcher *Copy(bool safe = false) const override {
210  return new TrivialLookAheadMatcher(*this, safe);
211  }
212 
213  MatchType Type(bool test) const override { return matcher_.Type(test); }
214 
215  void SetState(StateId s) final { return matcher_.SetState(s); }
216 
217  bool Find(Label label) final { return matcher_.Find(label); }
218 
219  bool Done() const final { return matcher_.Done(); }
220 
221  const Arc &Value() const final { return matcher_.Value(); }
222 
223  void Next() final { matcher_.Next(); }
224 
225  Weight Final(StateId s) const final { return matcher_.Final(s); }
226 
227  ssize_t Priority(StateId s) final { return matcher_.Priority(s); }
228 
229  const FST &GetFst() const override { return matcher_.GetFst(); }
230 
231  uint64_t Properties(uint64_t props) const override {
232  return matcher_.Properties(props);
233  }
234 
235  uint32_t Flags() const override {
236  return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher;
237  }
238 
239  // Lookahead methods (all trivial).
240 
241  void InitLookAheadFst(const Fst<Arc> &fst, bool copy = false) override {}
242 
243  bool LookAheadFst(const Fst<Arc> &, StateId) final { return true; }
244 
245  bool LookAheadLabel(Label) const final { return true; }
246 
247  bool LookAheadPrefix(Arc *) const { return false; }
248 
249  Weight LookAheadWeight() const { return Weight::One(); }
250 
251  private:
252  M matcher_;
253 };
254 
255 // Look-ahead of one transition. Template argument flags accepts flags to
256 // control behavior.
257 template <class M, uint32_t flags = kLookAheadNonEpsilons | kLookAheadEpsilons |
258  kLookAheadWeight | kLookAheadPrefix>
259 class ArcLookAheadMatcher : public LookAheadMatcherBase<typename M::FST::Arc> {
260  public:
261  using FST = typename M::FST;
262  using Arc = typename FST::Arc;
263  using Label = typename Arc::Label;
264  using StateId = typename Arc::StateId;
265  using Weight = typename Arc::Weight;
267 
274 
275  static constexpr uint32_t kFlags = flags;
276 
277  // This makes a copy of the FST.
278  ArcLookAheadMatcher(const FST &fst, MatchType match_type,
279  std::shared_ptr<MatcherData> data = nullptr)
280  : matcher_(fst, match_type),
281  fst_(matcher_.GetFst()),
282  lfst_(nullptr),
283  state_(kNoStateId) {}
284 
285  // This doesn't copy the FST.
286  ArcLookAheadMatcher(const FST *fst, MatchType match_type,
287  std::shared_ptr<MatcherData> data = nullptr)
288  : matcher_(fst, match_type),
289  fst_(matcher_.GetFst()),
290  lfst_(nullptr),
291  state_(kNoStateId) {}
292 
293  // This makes a copy of the FST.
294  ArcLookAheadMatcher(const ArcLookAheadMatcher &lmatcher, bool safe = false)
295  : matcher_(lmatcher.matcher_, safe),
296  fst_(matcher_.GetFst()),
297  lfst_(lmatcher.lfst_),
298  state_(kNoStateId) {}
299 
300  // General matcher methods.
301  ArcLookAheadMatcher *Copy(bool safe = false) const override {
302  return new ArcLookAheadMatcher(*this, safe);
303  }
304 
305  MatchType Type(bool test) const override { return matcher_.Type(test); }
306 
307  void SetState(StateId s) final {
308  state_ = s;
309  matcher_.SetState(s);
310  }
311 
312  bool Find(Label label) final { return matcher_.Find(label); }
313 
314  bool Done() const final { return matcher_.Done(); }
315 
316  const Arc &Value() const final { return matcher_.Value(); }
317 
318  void Next() final { matcher_.Next(); }
319 
320  Weight Final(StateId s) const final { return matcher_.Final(s); }
321 
322  ssize_t Priority(StateId s) final { return matcher_.Priority(s); }
323 
324  const FST &GetFst() const override { return fst_; }
325 
326  uint64_t Properties(uint64_t props) const override {
327  return matcher_.Properties(props);
328  }
329 
330  uint32_t Flags() const override {
331  return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher |
332  kFlags;
333  }
334 
335  const MatcherData *GetData() const { return nullptr; }
336 
337  std::shared_ptr<MatcherData> GetSharedData() const { return nullptr; }
338 
339  // Look-ahead methods.
340 
341  void InitLookAheadFst(const Fst<Arc> &fst, bool copy = false) override {
342  lfst_ = &fst;
343  }
344 
345  // Checks if there is a matching (possibly super-final) transition
346  // at (state_, s).
347  bool LookAheadFst(const Fst<Arc> &, StateId) final;
348 
349  bool LookAheadLabel(Label label) const final { return matcher_.Find(label); }
350 
351  private:
352  mutable M matcher_;
353  const FST &fst_; // Matcher FST.
354  const Fst<Arc> *lfst_; // Look-ahead FST.
355  StateId state_; // Matcher state.
356 };
357 
358 template <class M, uint32_t flags>
360  StateId s) {
361  if (&fst != lfst_) InitLookAheadFst(fst);
362  bool result = false;
363  ssize_t nprefix = 0;
364  if (kFlags & kLookAheadWeight) ClearLookAheadWeight();
365  if (kFlags & kLookAheadPrefix) ClearLookAheadPrefix();
366  if (fst_.Final(state_) != Weight::Zero() &&
367  lfst_->Final(s) != Weight::Zero()) {
368  if (!(kFlags & (kLookAheadWeight | kLookAheadPrefix))) return true;
369  ++nprefix;
370  if (kFlags & kLookAheadWeight) {
372  Plus(LookAheadWeight(), Times(fst_.Final(state_), lfst_->Final(s))));
373  }
374  result = true;
375  }
376  if (matcher_.Find(kNoLabel)) {
377  if (!(kFlags & (kLookAheadWeight | kLookAheadPrefix))) return true;
378  ++nprefix;
379  if (kFlags & kLookAheadWeight) {
380  for (; !matcher_.Done(); matcher_.Next()) {
381  SetLookAheadWeight(Plus(LookAheadWeight(), matcher_.Value().weight));
382  }
383  }
384  result = true;
385  }
386  for (ArcIterator<Fst<Arc>> aiter(*lfst_, s); !aiter.Done(); aiter.Next()) {
387  const auto &arc = aiter.Value();
388  Label label = kNoLabel;
389  switch (matcher_.Type(false)) {
390  case MATCH_INPUT:
391  label = arc.olabel;
392  break;
393  case MATCH_OUTPUT:
394  label = arc.ilabel;
395  break;
396  default:
397  FSTERROR() << "ArcLookAheadMatcher::LookAheadFst: Bad match type";
398  return true;
399  }
400  if (label == 0) {
401  if (!(kFlags & (kLookAheadWeight | kLookAheadPrefix))) return true;
402  if (!(kFlags & kLookAheadNonEpsilonPrefix)) ++nprefix;
403  if (kFlags & kLookAheadWeight) {
404  SetLookAheadWeight(Plus(LookAheadWeight(), arc.weight));
405  }
406  result = true;
407  } else if (matcher_.Find(label)) {
408  if (!(kFlags & (kLookAheadWeight | kLookAheadPrefix))) return true;
409  for (; !matcher_.Done(); matcher_.Next()) {
410  ++nprefix;
411  if (kFlags & kLookAheadWeight) {
413  Times(arc.weight, matcher_.Value().weight)));
414  }
415  if ((kFlags & kLookAheadPrefix) && nprefix == 1)
416  SetLookAheadPrefix(arc);
417  }
418  result = true;
419  }
420  }
421  if (kFlags & kLookAheadPrefix) {
422  if (nprefix == 1) {
423  ClearLookAheadWeight(); // Avoids double counting.
424  } else {
426  }
427  }
428  return result;
429 }
430 
431 // Template argument flags accepts flags to control behavior. It must include
432 // precisely one of kInputLookAheadMatcher or kOutputLookAheadMatcher.
433 template <class M,
434  uint32_t flags = kLookAheadEpsilons | kLookAheadWeight |
435  kLookAheadPrefix | kLookAheadNonEpsilonPrefix |
440  : public LookAheadMatcherBase<typename M::FST::Arc> {
441  public:
442  using Matcher = M;
443  using Accumulator = Accum;
444  using Reachable = R;
445 
446  using FST = typename M::FST;
447  using Arc = typename FST::Arc;
448  using Label = typename Arc::Label;
449  using StateId = typename Arc::StateId;
450  using Weight = typename Arc::Weight;
451  using MatcherData = typename Reachable::Data;
452 
459 
460  static_assert(!(flags & kInputLookAheadMatcher) !=
461  !(flags & kOutputLookAheadMatcher),
462  "Must include precisely one of kInputLookAheadMatcher and "
463  "kOutputLookAheadMatcher");
464  static constexpr uint32_t kFlags = flags;
465 
466  // This makes a copy of the FST.
468  std::shared_ptr<MatcherData> data = nullptr,
469  std::unique_ptr<Accumulator> accumulator = nullptr)
470  : matcher_(fst, match_type),
471  lfst_(nullptr),
472  state_(kNoStateId),
473  error_(false) {
474  Init(fst, match_type, data, std::move(accumulator));
475  }
476 
477  // This doesn't copy the FST.
479  std::shared_ptr<MatcherData> data = nullptr,
480  std::unique_ptr<Accumulator> accumulator = nullptr)
481  : matcher_(fst, match_type),
482  lfst_(nullptr),
483  state_(kNoStateId),
484  error_(false) {
485  Init(*fst, match_type, data, std::move(accumulator));
486  }
487 
488  // This makes a copy of the FST.
490  bool safe = false)
491  : matcher_(lmatcher.matcher_, safe),
492  lfst_(lmatcher.lfst_),
493  label_reachable_(lmatcher.label_reachable_
494  ? new Reachable(*lmatcher.label_reachable_, safe)
495  : nullptr),
496  state_(kNoStateId),
497  error_(lmatcher.error_) {}
498 
499  LabelLookAheadMatcher *Copy(bool safe = false) const override {
500  return new LabelLookAheadMatcher(*this, safe);
501  }
502 
503  MatchType Type(bool test) const override { return matcher_.Type(test); }
504 
505  void SetState(StateId s) final {
506  if (state_ == s) return;
507  state_ = s;
508  match_set_state_ = false;
509  reach_set_state_ = false;
510  }
511 
512  bool Find(Label label) final {
513  if (!match_set_state_) {
514  matcher_.SetState(state_);
515  match_set_state_ = true;
516  }
517  return matcher_.Find(label);
518  }
519 
520  bool Done() const final { return matcher_.Done(); }
521 
522  const Arc &Value() const final { return matcher_.Value(); }
523 
524  void Next() final { matcher_.Next(); }
525 
526  Weight Final(StateId s) const final { return matcher_.Final(s); }
527 
528  ssize_t Priority(StateId s) final { return matcher_.Priority(s); }
529 
530  const FST &GetFst() const override { return matcher_.GetFst(); }
531 
532  uint64_t Properties(uint64_t inprops) const override {
533  auto outprops = matcher_.Properties(inprops);
534  if (error_ || (label_reachable_ && label_reachable_->Error())) {
535  outprops |= kError;
536  }
537  return outprops;
538  }
539 
540  uint32_t Flags() const override {
541  if (label_reachable_ && label_reachable_->GetData()->ReachInput()) {
542  return matcher_.Flags() | kFlags | kInputLookAheadMatcher;
543  } else if (label_reachable_ && !label_reachable_->GetData()->ReachInput()) {
544  return matcher_.Flags() | kFlags | kOutputLookAheadMatcher;
545  } else {
546  return matcher_.Flags();
547  }
548  }
549 
550  const MatcherData *GetData() const {
551  return label_reachable_ ? label_reachable_->GetData() : nullptr;
552  }
553 
554  std::shared_ptr<MatcherData> GetSharedData() const {
555  return label_reachable_ ? label_reachable_->GetSharedData() : nullptr;
556  }
557  // Checks if there is a matching (possibly super-final) transition at
558  // (state_, s).
559  template <class LFST>
560  bool LookAheadFst(const LFST &fst, StateId s);
561 
562  // Required to make class concrete.
563  bool LookAheadFst(const Fst<Arc> &fst, StateId s) final {
564  return LookAheadFst<Fst<Arc>>(fst, s);
565  }
566 
567  void InitLookAheadFst(const Fst<Arc> &fst, bool copy = false) override {
568  lfst_ = &fst;
569  if (label_reachable_) {
570  const bool reach_input = Type(false) == MATCH_OUTPUT;
571  label_reachable_->ReachInit(fst, reach_input, copy);
572  }
573  }
574 
575  template <class LFST>
576  void InitLookAheadFst(const LFST &fst, bool copy = false) {
577  lfst_ = &fst;
578  if (label_reachable_) {
579  const bool reach_input = Type(false) == MATCH_OUTPUT;
580  label_reachable_->ReachInit(fst, reach_input, copy);
581  }
582  }
583 
584  bool LookAheadLabel(Label label) const final {
585  if (label == 0) return true;
586  if (label_reachable_) {
587  if (!reach_set_state_) {
588  label_reachable_->SetState(state_);
589  reach_set_state_ = true;
590  }
591  return label_reachable_->Reach(label);
592  } else {
593  return true;
594  }
595  }
596 
597  private:
598  void Init(const FST &fst, MatchType match_type,
599  std::shared_ptr<MatcherData> data,
600  std::unique_ptr<Accumulator> accumulator) {
601  const bool reach_input = match_type == MATCH_INPUT;
602  if (data) {
603  if (reach_input == data->ReachInput()) {
604  label_reachable_ =
605  std::make_unique<Reachable>(data, std::move(accumulator));
606  }
607  } else if ((reach_input && (kFlags & kInputLookAheadMatcher)) ||
608  (!reach_input && (kFlags & kOutputLookAheadMatcher))) {
609  label_reachable_ =
610  std::make_unique<Reachable>(fst, reach_input, std::move(accumulator),
611  kFlags & kLookAheadKeepRelabelData);
612  }
613  }
614 
615  mutable M matcher_;
616  const Fst<Arc> *lfst_; // Look-ahead FST.
617  std::unique_ptr<Reachable> label_reachable_; // Label reachability info.
618  StateId state_; // Matcher state.
619  bool match_set_state_; // matcher_.SetState called?
620  mutable bool reach_set_state_; // reachable_.SetState called?
621  bool error_; // Error encountered?
622 };
623 
624 template <class M, uint32_t flags, class Accumulator, class Reachable>
625 template <class LFST>
626 inline bool LabelLookAheadMatcher<M, flags, Accumulator,
628  StateId s) {
629  if (&fst != lfst_) InitLookAheadFst(fst);
632  if (!label_reachable_) return true;
633  label_reachable_->SetState(state_, s);
634  reach_set_state_ = true;
635  bool compute_weight = kFlags & kLookAheadWeight;
636  constexpr bool kComputePrefix = kFlags & kLookAheadPrefix;
637  ArcIterator<LFST> aiter(fst, s);
638  aiter.SetFlags(kArcNoCache, kArcNoCache); // Makes caching optional.
639  const bool reach_arc = label_reachable_->Reach(
640  &aiter, 0, internal::NumArcs(*lfst_, s), compute_weight);
641  const auto lfinal = internal::Final(*lfst_, s);
642  const bool reach_final =
643  lfinal != Weight::Zero() && label_reachable_->ReachFinal();
644  if (reach_arc) {
645  const auto begin = label_reachable_->ReachBegin();
646  const auto end = label_reachable_->ReachEnd();
647  if (kComputePrefix && end - begin == 1 && !reach_final) {
648  aiter.Seek(begin);
649  SetLookAheadPrefix(aiter.Value());
650  compute_weight = false;
651  } else if (compute_weight) {
652  SetLookAheadWeight(label_reachable_->ReachWeight());
653  }
654  }
655  if (reach_final && compute_weight) {
656  SetLookAheadWeight(reach_arc ? Plus(LookAheadWeight(), lfinal) : lfinal);
657  }
658  return reach_arc || reach_final;
659 }
660 
661 // Relabels the fst with Reachable::Reachable. Relabels input
662 // if data.First() is non-null, otherwise relabels output.
663 // Optionally saves the input/output label pairs to a file
664 // if save_relabel_ipairs/opairs is non-empty.
665 template <class Reachable, class FST, class Data>
666 void RelabelForReachable(FST *fst, const Data &data,
667  std::string_view save_relabel_ipairs,
668  std::string_view save_relabel_opairs) {
669  using Label = typename FST::Arc::Label;
670  if (data.First() != nullptr) { // reach_input.
671  Reachable reachable(data.SharedFirst());
672  reachable.Relabel(fst, /*relabel_input=*/true);
673  if (!save_relabel_ipairs.empty()) {
674  std::vector<std::pair<Label, Label>> pairs;
675  reachable.RelabelPairs(&pairs, /*avoid_collisions=*/true);
676  WriteLabelPairs(save_relabel_ipairs, pairs);
677  }
678  } else {
679  Reachable reachable(data.SharedSecond());
680  reachable.Relabel(fst, /*relabel_input=*/false);
681  if (!save_relabel_opairs.empty()) {
682  std::vector<std::pair<Label, Label>> pairs;
683  reachable.RelabelPairs(&pairs, /*avoid_collisions=*/true);
684  WriteLabelPairs(save_relabel_opairs, pairs);
685  }
686  }
687 }
688 
689 // Label-lookahead relabeling class.
690 template <class Arc, class Data = LabelReachableData<typename Arc::Label>>
692  public:
693  using Label = typename Arc::Label;
695 
696  // Relabels matcher FST (initialization function object).
697  template <typename Impl>
698  explicit LabelLookAheadRelabeler(std::shared_ptr<Impl> *impl);
699 
700  // Relabels arbitrary FST. Class LFST should be a label-lookahead FST.
701  template <class LFST>
702  static void Relabel(MutableFst<Arc> *fst, const LFST &mfst,
703  bool relabel_input) {
704  const auto *data = mfst.GetAddOn();
705  Reachable reachable(data->First() ? data->SharedFirst()
706  : data->SharedSecond());
707  reachable.Relabel(fst, relabel_input);
708  }
709 
710  // Returns relabeling pairs (cf. relabel.h::Relabel()). Class LFST should be a
711  // label-lookahead FST. If avoid_collisions is true, extra pairs are added to
712  // ensure no collisions when relabeling automata that have labels unseen here.
713  template <class LFST>
714  static void RelabelPairs(const LFST &mfst,
715  std::vector<std::pair<Label, Label>> *pairs,
716  bool avoid_collisions = false) {
717  const auto *data = mfst.GetAddOn();
718  Reachable reachable(data->First() ? data->SharedFirst()
719  : data->SharedSecond());
720  reachable.RelabelPairs(pairs, avoid_collisions);
721  }
722 };
723 
724 template <class Arc, class Data>
725 template <typename Impl>
727  std::shared_ptr<Impl> *impl) {
728  Fst<Arc> &fst = (*impl)->GetFst();
729  auto data = (*impl)->GetSharedAddOn();
730  const auto name = (*impl)->Type();
731  const bool is_mutable = fst.Properties(kMutable, false);
732  std::unique_ptr<MutableFst<Arc>> mfst;
733  if (is_mutable) {
734  // Borrow pointer from fst without increasing ref count; it will
735  // be released below. We do not want to call Copy() since that would
736  // do a deep copy when the Fst is modified.
737  mfst.reset(down_cast<MutableFst<Arc> *>(&fst));
738  } else {
739  mfst = std::make_unique<VectorFst<Arc>>(fst);
740  }
741 
742  RelabelForReachable<Reachable>(mfst.get(), *data,
743  FST_FLAGS_save_relabel_ipairs,
744  FST_FLAGS_save_relabel_opairs);
745 
746  if (is_mutable) {
747  // Pointer was just borrowed, don't delete it.
748  mfst.release();
749  } else {
750  *impl = std::make_shared<Impl>(*mfst, name);
751  (*impl)->SetAddOn(data);
752  }
753 }
754 
755 // Generic lookahead matcher, templated on the FST definition (a wrapper around
756 // a pointer to specific one).
757 template <class F>
759  public:
760  using FST = F;
761  using Arc = typename FST::Arc;
762  using Label = typename Arc::Label;
763  using StateId = typename Arc::StateId;
764  using Weight = typename Arc::Weight;
766 
767  // This makes a copy of the FST.
768  LookAheadMatcher(const FST &fst, MatchType match_type)
769  : owned_fst_(fst.Copy()),
770  base_(owned_fst_->InitMatcher(match_type)),
771  lookahead_(false) {
772  if (!base_)
773  base_ =
774  std::make_unique<SortedMatcher<FST>>(owned_fst_.get(), match_type);
775  }
776 
777  // This doesn't copy the FST.
778  LookAheadMatcher(const FST *fst, MatchType match_type)
779  : base_(fst->InitMatcher(match_type)), lookahead_(false) {
780  if (!base_) base_ = std::make_unique<SortedMatcher<FST>>(fst, match_type);
781  }
782 
783  // This makes a copy of the FST.
784  LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false)
785  : base_(matcher.base_->Copy(safe)), lookahead_(matcher.lookahead_) {}
786 
787  // Takes ownership of base.
789  : base_(base), lookahead_(false) {}
790 
791  LookAheadMatcher *Copy(bool safe = false) const {
792  return new LookAheadMatcher(*this, safe);
793  }
794 
795  MatchType Type(bool test) const { return base_->Type(test); }
796 
797  void SetState(StateId s) { base_->SetState(s); }
798 
799  bool Find(Label label) { return base_->Find(label); }
800 
801  bool Done() const { return base_->Done(); }
802 
803  const Arc &Value() const { return base_->Value(); }
804 
805  void Next() { base_->Next(); }
806 
807  Weight Final(StateId s) const { return base_->Final(s); }
808 
809  ssize_t Priority(StateId s) { return base_->Priority(s); }
810 
811  const FST &GetFst() const { return down_cast<const FST &>(base_->GetFst()); }
812 
813  uint64_t Properties(uint64_t props) const { return base_->Properties(props); }
814 
815  uint32_t Flags() const { return base_->Flags(); }
816 
817  bool LookAheadLabel(Label label) const {
818  if (LookAheadCheck()) {
819  return down_cast<LBase *>(base_.get())->LookAheadLabel(label);
820  } else {
821  return true;
822  }
823  }
824 
825  bool LookAheadFst(const Fst<Arc> &fst, StateId s) {
826  if (LookAheadCheck()) {
827  return down_cast<LBase *>(base_.get())->LookAheadFst(fst, s);
828  } else {
829  return true;
830  }
831  }
832 
834  if (LookAheadCheck()) {
835  return down_cast<LBase *>(base_.get())->LookAheadWeight();
836  } else {
837  return Weight::One();
838  }
839  }
840 
841  bool LookAheadPrefix(Arc *arc) const {
842  if (LookAheadCheck()) {
843  return down_cast<LBase *>(base_.get())->LookAheadPrefix(arc);
844  } else {
845  return false;
846  }
847  }
848 
849  void InitLookAheadFst(const Fst<Arc> &fst, bool copy = false) {
850  if (LookAheadCheck()) {
851  down_cast<LBase *>(base_.get())->InitLookAheadFst(fst, copy);
852  }
853  }
854 
855  private:
856  bool LookAheadCheck() const {
857  if (!lookahead_) {
858  lookahead_ =
859  base_->Flags() & (kInputLookAheadMatcher | kOutputLookAheadMatcher);
860  if (!lookahead_) {
861  FSTERROR() << "LookAheadMatcher: No look-ahead matcher defined";
862  }
863  }
864  return lookahead_;
865  }
866 
867  std::unique_ptr<const FST> owned_fst_;
868  std::unique_ptr<MatcherBase<Arc>> base_;
869  mutable bool lookahead_;
870 
871  LookAheadMatcher &operator=(const LookAheadMatcher &) = delete;
872 };
873 
874 } // namespace fst
875 
876 #endif // FST_LOOKAHEAD_MATCHER_H_
typename Arc::Label Label
ssize_t Priority(StateId s) final
bool LookAheadFst(const Fst< Arc > &fst, StateId s)
ArcLookAheadMatcher(const FST *fst, MatchType match_type, std::shared_ptr< MatcherData > data=nullptr)
virtual bool LookAheadLabel(Label) const =0
constexpr uint64_t kMutable
Definition: properties.h:49
bool LookAheadLabel(Label label) const final
bool LookAheadPrefix(Arc *arc) const
uint64_t Properties(uint64_t props) const
constexpr int kNoLabel
Definition: fst.h:195
constexpr uint8_t kArcNoCache
Definition: fst.h:452
void RelabelForReachable(FST *fst, const Data &data, std::string_view save_relabel_ipairs, std::string_view save_relabel_opairs)
virtual uint64_t Properties(uint64_t mask, bool test) const =0
void SetState(StateId s)
typename Arc::StateId StateId
bool Find(Label label)
bool LookAheadLabel(Label) const final
typename Arc::Label Label
TrivialLookAheadMatcher * Copy(bool safe=false) const override
const Arc & Value() const final
TrivialLookAheadMatcher(const TrivialLookAheadMatcher &lmatcher, bool safe=false)
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:61
typename Arc::StateId StateId
bool LookAheadPrefix(Arc *) const
typename Arc::Weight Weight
static void Relabel(MutableFst< Arc > *fst, const LFST &mfst, bool relabel_input)
bool LookAheadLabel(Label label) const
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:107
MatchType
Definition: fst.h:187
std::shared_ptr< MatcherData > GetSharedData() const
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:64
const FST & GetFst() const override
bool WriteLabelPairs(std::string_view source, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:428
constexpr uint64_t kError
Definition: properties.h:52
const Arc & Value() const
ArcLookAheadMatcher * Copy(bool safe=false) const override
constexpr uint32_t kLookAheadFlags
MatchType Type(bool test) const override
typename Arc::Weight Weight
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:113
To down_cast(From *f)
Definition: compat.h:50
TrivialLookAheadMatcher(const FST &fst, MatchType match_type)
LabelLookAheadMatcher(const FST *fst, MatchType match_type, std::shared_ptr< MatcherData > data=nullptr, std::unique_ptr< Accumulator > accumulator=nullptr)
void InitLookAheadFst(const Fst< Arc > &fst, bool copy=false) override
MatchType Type(bool test) const
void SetState(StateId s) final
virtual bool LookAheadFst(const Fst< Arc > &, StateId)=0
void SetState(StateId s) final
constexpr int kNoStateId
Definition: fst.h:196
bool Find(Label label) final
Label Relabel(Label label)
const Arc & Value() const final
const Arc & Value() const
Definition: fst.h:536
void RelabelPairs(std::vector< std::pair< Label, Label >> *pairs, bool avoid_collisions=false)
ssize_t Priority(StateId s) final
constexpr uint32_t kOutputLookAheadMatcher
#define FSTERROR()
Definition: util.h:56
bool Find(Label label) final
LabelLookAheadRelabeler(std::shared_ptr< Impl > *impl)
void SetFlags(uint8_t flags, uint8_t mask)
Definition: fst.h:570
const FST & GetFst() const override
MatchType Type(bool test) const override
LabelLookAheadMatcher(const LabelLookAheadMatcher &lmatcher, bool safe=false)
constexpr uint32_t kLookAheadEpsilons
constexpr uint32_t kLookAheadNonEpsilonPrefix
constexpr uint32_t kLookAheadKeepRelabelData
LookAheadMatcher(const FST *fst, MatchType match_type)
static void RelabelPairs(const LFST &mfst, std::vector< std::pair< Label, Label >> *pairs, bool avoid_collisions=false)
const FST & GetFst() const
constexpr uint32_t kLookAheadNonEpsilons
const MatcherData * GetData() const
Weight Final(StateId s) const final
void Seek(size_t a)
Definition: fst.h:556
MatchType Type(bool test) const override
DECLARE_string(save_relabel_ipairs)
typename Arc::Label Label
const FST & GetFst() const override
constexpr uint32_t kInputLookAheadMatcher
ssize_t Priority(StateId s)
typename Reachable::Data MatcherData
LookAheadMatcher * Copy(bool safe=false) const
constexpr uint32_t kLookAheadPrefix
void SetState(StateId s) final
bool LookAheadFst(const Fst< Arc > &fst, StateId s) final
bool LookAheadPrefix(Arc *arc) const
virtual const Fst< Arc > & GetFst() const =0
void SetLookAheadWeight(Weight weight)
ArcLookAheadMatcher(const ArcLookAheadMatcher &lmatcher, bool safe=false)
void InitLookAheadFst(const Fst< Arc > &fst, bool copy=false) override
uint64_t Properties(uint64_t inprops) const override
LookAheadMatcher(const LookAheadMatcher &matcher, bool safe=false)
LabelLookAheadMatcher * Copy(bool safe=false) const override
uint32_t Flags() const override
void InitLookAheadFst(const Fst< Arc > &fst, bool copy=false) override
TrivialLookAheadMatcher(const FST *fst, MatchType match_type)
void InitLookAheadFst(const Fst< Arc > &fst, bool copy=false)
uint32_t Flags() const override
const MatcherData * GetData() const
ArcLookAheadMatcher(const FST &fst, MatchType match_type, std::shared_ptr< MatcherData > data=nullptr)
constexpr uint32_t kLookAheadWeight
virtual MatcherBase * Copy(bool safe=false) const =0
typename Arc::StateId StateId
typename Arc::StateId StateId
LookAheadMatcher(const FST &fst, MatchType match_type)
uint64_t Properties(uint64_t props) const override
virtual void InitLookAheadFst(const Fst< Arc > &, bool copy=false)=0
Weight Final(StateId s) const
typename Arc::Weight Weight
Definition: matcher.h:145
typename Arc::Label Label
Definition: matcher.h:143
virtual MatchType Type(bool) const =0
Weight Final(StateId s) const final
Weight LookAheadWeight() const
typename Arc::Weight Weight
Weight Final(StateId s) const final
ssize_t Priority(StateId s) final
std::shared_ptr< MatcherData > GetSharedData() const
uint32_t Flags() const override
bool LookAheadFst(const Fst< Arc > &, StateId) final
const Arc & Value() const final
uint64_t Properties(uint64_t props) const override
uint32_t Flags() const
LookAheadMatcher(MatcherBase< Arc > *base)
typename FST::Arc Arc
void InitLookAheadFst(const LFST &fst, bool copy=false)
LabelLookAheadMatcher(const FST &fst, MatchType match_type, std::shared_ptr< MatcherData > data=nullptr, std::unique_ptr< Accumulator > accumulator=nullptr)
bool Find(Label label) final
bool LookAheadLabel(Label label) const final
bool LookAheadFst(const Fst< Arc > &, StateId) final
typename Arc::StateId StateId
Definition: matcher.h:144