FST  openfst-1.8.2
OpenFst Library
connect.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Classes and functions to remove unsuccessful paths from an FST.
19 
20 #ifndef FST_CONNECT_H_
21 #define FST_CONNECT_H_
22 
23 #include <algorithm>
24 #include <cstdint>
25 #include <vector>
26 
27 
28 #include <fst/dfs-visit.h>
29 #include <fst/mutable-fst.h>
30 #include <fst/union-find.h>
31 
32 
33 namespace fst {
34 
35 // Finds and returns connected components. Use with Visit().
36 template <class Arc>
37 class CcVisitor {
38  public:
39  using Weight = typename Arc::Weight;
40  using StateId = typename Arc::StateId;
41 
42  // cc[i]: connected component number for state i.
43  explicit CcVisitor(std::vector<StateId> *cc)
44  : comps_(new UnionFind<StateId>(0, kNoStateId)), cc_(cc), nstates_(0) {}
45 
46  // comps: connected components equiv classes.
47  explicit CcVisitor(UnionFind<StateId> *comps)
48  : comps_(comps), cc_(nullptr), nstates_(0) {}
49 
51  if (cc_) delete comps_;
52  }
53 
54  void InitVisit(const Fst<Arc> &fst) {}
55 
56  bool InitState(StateId s, StateId root) {
57  ++nstates_;
58  if (comps_->FindSet(s) == kNoStateId) comps_->MakeSet(s);
59  return true;
60  }
61 
62  bool WhiteArc(StateId s, const Arc &arc) {
63  comps_->MakeSet(arc.nextstate);
64  comps_->Union(s, arc.nextstate);
65  return true;
66  }
67 
68  bool GreyArc(StateId s, const Arc &arc) {
69  comps_->Union(s, arc.nextstate);
70  return true;
71  }
72 
73  bool BlackArc(StateId s, const Arc &arc) {
74  comps_->Union(s, arc.nextstate);
75  return true;
76  }
77 
78  void FinishState(StateId s) {}
79 
80  void FinishVisit() {
81  if (cc_) GetCcVector(cc_);
82  }
83 
84  // Returns number of components.
85  // cc[i]: connected component number for state i.
86  int GetCcVector(std::vector<StateId> *cc) {
87  cc->clear();
88  cc->resize(nstates_, kNoStateId);
89  StateId ncomp = 0;
90  for (StateId s = 0; s < nstates_; ++s) {
91  const auto rep = comps_->FindSet(s);
92  auto &comp = (*cc)[rep];
93  if (comp == kNoStateId) {
94  comp = ncomp;
95  ++ncomp;
96  }
97  (*cc)[s] = comp;
98  }
99  return ncomp;
100  }
101 
102  private:
103  UnionFind<StateId> *comps_; // Components.
104  std::vector<StateId> *cc_; // State's cc number.
105  StateId nstates_; // State count.
106 };
107 
108 // Finds and returns strongly-connected components, accessible and
109 // coaccessible states and related properties. Uses Tarjan's single
110 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
111 // Algorithms", 189pp). Use with DfsVisit();
112 template <class Arc>
113 class SccVisitor {
114  public:
115  using StateId = typename Arc::StateId;
116  using Weight = typename Arc::Weight;
117 
118  // scc[i]: strongly-connected component number for state i.
119  // SCC numbers will be in topological order for acyclic input.
120  // access[i]: accessibility of state i.
121  // coaccess[i]: coaccessibility of state i.
122  // Any of above can be NULL.
123  // props: related property bits (cyclicity, initial cyclicity,
124  // accessibility, coaccessibility) set/cleared (o.w. unchanged).
125  SccVisitor(std::vector<StateId> *scc, std::vector<bool> *access,
126  std::vector<bool> *coaccess, uint64_t *props)
127  : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
128  explicit SccVisitor(uint64_t *props)
129  : scc_(nullptr), access_(nullptr), coaccess_(nullptr), props_(props) {}
130 
131  void InitVisit(const Fst<Arc> &fst);
132 
133  bool InitState(StateId s, StateId root);
134 
135  bool TreeArc(StateId s, const Arc &arc) { return true; }
136 
137  bool BackArc(StateId s, const Arc &arc) {
138  const auto t = arc.nextstate;
139  if (dfnumber_[t] < lowlink_[s]) lowlink_[s] = dfnumber_[t];
140  if ((*coaccess_)[t]) (*coaccess_)[s] = true;
141  *props_ |= kCyclic;
142  *props_ &= ~kAcyclic;
143  if (t == start_) {
144  *props_ |= kInitialCyclic;
145  *props_ &= ~kInitialAcyclic;
146  }
147  return true;
148  }
149 
150  bool ForwardOrCrossArc(StateId s, const Arc &arc) {
151  const auto t = arc.nextstate;
152  if (dfnumber_[t] < dfnumber_[s] /* cross edge */ && onstack_[t] &&
153  dfnumber_[t] < lowlink_[s]) {
154  lowlink_[s] = dfnumber_[t];
155  }
156  if ((*coaccess_)[t]) (*coaccess_)[s] = true;
157  return true;
158  }
159 
160  // Last argument always ignored, but required by the interface.
161  void FinishState(StateId state, StateId p, const Arc *);
162 
163  void FinishVisit() {
164  // Numbers SCCs in topological order when acyclic.
165  if (scc_) {
166  for (size_t s = 0; s < scc_->size(); ++s) {
167  (*scc_)[s] = nscc_ - 1 - (*scc_)[s];
168  }
169  }
170  if (coaccess_internal_) delete coaccess_;
171  }
172 
173  private:
174  std::vector<StateId> *scc_; // State's scc number.
175  std::vector<bool> *access_; // State's accessibility.
176  std::vector<bool> *coaccess_; // State's coaccessibility.
177  uint64_t *props_;
178  const Fst<Arc> *fst_;
179  StateId start_;
180  StateId nstates_; // State count.
181  StateId nscc_; // SCC count.
182  bool coaccess_internal_;
183  std::vector<StateId> dfnumber_; // State discovery times.
184  std::vector<StateId>
185  lowlink_; // lowlink[state] == dfnumber[state] => SCC root
186  std::vector<bool> onstack_; // Is a state on the SCC stack?
187  std::vector<StateId> scc_stack_; // SCC stack, with random access.
188 };
189 
190 template <class Arc>
192  if (scc_) scc_->clear();
193  if (access_) access_->clear();
194  if (coaccess_) {
195  coaccess_->clear();
196  coaccess_internal_ = false;
197  } else {
198  coaccess_ = new std::vector<bool>;
199  coaccess_internal_ = true;
200  }
203  fst_ = &fst;
204  start_ = fst.Start();
205  nstates_ = 0;
206  nscc_ = 0;
207  dfnumber_.clear();
208  lowlink_.clear();
209  onstack_.clear();
210  scc_stack_.clear();
211 }
212 
213 template <class Arc>
215  scc_stack_.push_back(s);
216  if (static_cast<StateId>(dfnumber_.size()) <= s) {
217  if (scc_) scc_->resize(s + 1, -1);
218  if (access_) access_->resize(s + 1, false);
219  coaccess_->resize(s + 1, false);
220  dfnumber_.resize(s + 1, -1);
221  lowlink_.resize(s + 1, -1);
222  onstack_.resize(s + 1, false);
223  }
224  dfnumber_[s] = nstates_;
225  lowlink_[s] = nstates_;
226  onstack_[s] = true;
227  if (root == start_) {
228  if (access_) (*access_)[s] = true;
229  } else {
230  if (access_) (*access_)[s] = false;
231  *props_ |= kNotAccessible;
232  *props_ &= ~kAccessible;
233  }
234  ++nstates_;
235  return true;
236 }
237 
238 template <class Arc>
239 inline void SccVisitor<Arc>::FinishState(StateId s, StateId p, const Arc *) {
240  if (fst_->Final(s) != Weight::Zero()) (*coaccess_)[s] = true;
241  if (dfnumber_[s] == lowlink_[s]) { // Root of new SCC.
242  bool scc_coaccess = false;
243  auto i = scc_stack_.size();
244  StateId t;
245  do {
246  t = scc_stack_[--i];
247  if ((*coaccess_)[t]) scc_coaccess = true;
248  } while (s != t);
249  do {
250  t = scc_stack_.back();
251  if (scc_) (*scc_)[t] = nscc_;
252  if (scc_coaccess) (*coaccess_)[t] = true;
253  onstack_[t] = false;
254  scc_stack_.pop_back();
255  } while (s != t);
256  if (!scc_coaccess) {
257  *props_ |= kNotCoAccessible;
258  *props_ &= ~kCoAccessible;
259  }
260  ++nscc_;
261  }
262  if (p != kNoStateId) {
263  if ((*coaccess_)[s]) (*coaccess_)[p] = true;
264  if (lowlink_[s] < lowlink_[p]) lowlink_[p] = lowlink_[s];
265  }
266 }
267 
268 // Trims an FST, removing states and arcs that are not on successful paths.
269 // This version modifies its input.
270 //
271 // Complexity:
272 //
273 // Time: O(V + E)
274 // Space: O(V + E)
275 //
276 // where V = # of states and E = # of arcs.
277 template <class Arc>
279  using StateId = typename Arc::StateId;
280  std::vector<bool> access;
281  std::vector<bool> coaccess;
282  uint64_t props = 0;
283  SccVisitor<Arc> scc_visitor(nullptr, &access, &coaccess, &props);
284  DfsVisit(*fst, &scc_visitor);
285  std::vector<StateId> dstates;
286  dstates.reserve(access.size());
287  for (StateId s = 0; s < access.size(); ++s) {
288  if (!access[s] || !coaccess[s]) dstates.push_back(s);
289  }
290  fst->DeleteStates(dstates);
292 }
293 
294 // Returns an acyclic FST where each SCC in the input FST has been condensed to
295 // a single state with transitions between SCCs retained and within SCCs
296 // dropped. Also populates 'scc' with a mapping from input to output states.
297 template <class Arc>
298 void Condense(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
299  std::vector<typename Arc::StateId> *scc) {
300  using StateId = typename Arc::StateId;
301  ofst->DeleteStates();
302  uint64_t props = 0;
303  SccVisitor<Arc> scc_visitor(scc, nullptr, nullptr, &props);
304  DfsVisit(ifst, &scc_visitor);
305  const auto iter = std::max_element(scc->cbegin(), scc->cend());
306  if (iter == scc->cend()) return;
307  const StateId num_condensed_states = 1 + *iter;
308  ofst->ReserveStates(num_condensed_states);
309  for (StateId c = 0; c < num_condensed_states; ++c) {
310  ofst->AddState();
311  }
312  for (StateId s = 0; s < scc->size(); ++s) {
313  const auto c = (*scc)[s];
314  if (s == ifst.Start()) ofst->SetStart(c);
315  const auto weight = ifst.Final(s);
316  if (weight != Arc::Weight::Zero())
317  ofst->SetFinal(c, Plus(ofst->Final(c), weight));
318  for (ArcIterator<Fst<Arc>> aiter(ifst, s); !aiter.Done(); aiter.Next()) {
319  const auto &arc = aiter.Value();
320  const auto nextc = (*scc)[arc.nextstate];
321  if (nextc != c) {
322  Arc condensed_arc = arc;
323  condensed_arc.nextstate = nextc;
324  ofst->AddArc(c, std::move(condensed_arc));
325  }
326  }
327  }
329 }
330 
331 } // namespace fst
332 
333 #endif // FST_CONNECT_H_
void FinishVisit()
Definition: connect.h:80
constexpr uint64_t kCyclic
Definition: properties.h:108
SccVisitor(std::vector< StateId > *scc, std::vector< bool > *access, std::vector< bool > *coaccess, uint64_t *props)
Definition: connect.h:125
void InitVisit(const Fst< Arc > &fst)
Definition: connect.h:54
bool InitState(StateId s, StateId root)
Definition: connect.h:56
bool BackArc(StateId s, const Arc &arc)
Definition: connect.h:137
constexpr uint64_t kInitialCyclic
Definition: properties.h:113
typename Arc::Weight Weight
Definition: connect.h:39
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:60
constexpr uint64_t kCoAccessible
Definition: properties.h:128
bool TreeArc(StateId s, const Arc &arc)
Definition: connect.h:135
void FinishState(StateId s)
Definition: connect.h:78
constexpr uint64_t kNotAccessible
Definition: properties.h:125
void DfsVisit(const FST &fst, Visitor *visitor, ArcFilter filter, bool access_only=false)
Definition: dfs-visit.h:109
constexpr uint64_t kInitialAcyclic
Definition: properties.h:115
void InitVisit(const Fst< Arc > &fst)
Definition: connect.h:191
bool GreyArc(StateId s, const Arc &arc)
Definition: connect.h:68
SccVisitor(uint64_t *props)
Definition: connect.h:128
virtual Weight Final(StateId) const =0
virtual void SetStart(StateId)=0
void Connect(MutableFst< Arc > *fst)
Definition: connect.h:278
CcVisitor(std::vector< StateId > *cc)
Definition: connect.h:43
constexpr int kNoStateId
Definition: fst.h:202
typename Arc::StateId StateId
Definition: connect.h:115
void FinishState(StateId state, StateId p, const Arc *)
Definition: connect.h:239
bool WhiteArc(StateId s, const Arc &arc)
Definition: connect.h:62
void FinishVisit()
Definition: connect.h:163
constexpr uint64_t kAcyclic
Definition: properties.h:110
virtual void SetProperties(uint64_t props, uint64_t mask)=0
constexpr uint64_t kAccessible
Definition: properties.h:123
int GetCcVector(std::vector< StateId > *cc)
Definition: connect.h:86
virtual StateId Start() const =0
bool ForwardOrCrossArc(StateId s, const Arc &arc)
Definition: connect.h:150
CcVisitor(UnionFind< StateId > *comps)
Definition: connect.h:47
virtual void AddArc(StateId, const Arc &)=0
void Union(T x, T y)
Definition: union-find.h:56
void Condense(const Fst< Arc > &ifst, MutableFst< Arc > *ofst, std::vector< typename Arc::StateId > *scc)
Definition: connect.h:298
T MakeSet(T item)
Definition: union-find.h:60
virtual StateId AddState()=0
virtual void ReserveStates(size_t)
Definition: mutable-fst.h:96
virtual void SetFinal(StateId s, Weight weight=Weight::One())=0
virtual void DeleteStates(const std::vector< StateId > &)=0
constexpr uint64_t kNotCoAccessible
Definition: properties.h:130
bool InitState(StateId s, StateId root)
Definition: connect.h:214
bool BlackArc(StateId s, const Arc &arc)
Definition: connect.h:73
T FindSet(T item)
Definition: union-find.h:39
typename Arc::StateId StateId
Definition: connect.h:40
typename Arc::Weight Weight
Definition: connect.h:116