FST  openfst-1.7.1
OpenFst Library
connect.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Classes and functions to remove unsuccessful paths from an FST.
5 
6 #ifndef FST_CONNECT_H_
7 #define FST_CONNECT_H_
8 
9 #include <algorithm>
10 #include <memory>
11 #include <vector>
12 
13 #include <fst/dfs-visit.h>
14 #include <fst/mutable-fst.h>
15 #include <fst/union-find.h>
16 
17 
18 namespace fst {
19 
20 // Finds and returns connected components. Use with Visit().
21 template <class Arc>
22 class CcVisitor {
23  public:
24  using Weight = typename Arc::Weight;
25  using StateId = typename Arc::StateId;
26 
27  // cc[i]: connected component number for state i.
28  explicit CcVisitor(std::vector<StateId> *cc)
29  : comps_(new UnionFind<StateId>(0, kNoStateId)), cc_(cc), nstates_(0) {}
30 
31  // comps: connected components equiv classes.
32  explicit CcVisitor(UnionFind<StateId> *comps)
33  : comps_(comps), cc_(nullptr), nstates_(0) {}
34 
36  if (cc_) delete comps_;
37  }
38 
39  void InitVisit(const Fst<Arc> &fst) {}
40 
41  bool InitState(StateId s, StateId root) {
42  ++nstates_;
43  if (comps_->FindSet(s) == kNoStateId) comps_->MakeSet(s);
44  return true;
45  }
46 
47  bool WhiteArc(StateId s, const Arc &arc) {
48  comps_->MakeSet(arc.nextstate);
49  comps_->Union(s, arc.nextstate);
50  return true;
51  }
52 
53  bool GreyArc(StateId s, const Arc &arc) {
54  comps_->Union(s, arc.nextstate);
55  return true;
56  }
57 
58  bool BlackArc(StateId s, const Arc &arc) {
59  comps_->Union(s, arc.nextstate);
60  return true;
61  }
62 
63  void FinishState(StateId s) {}
64 
65  void FinishVisit() {
66  if (cc_) GetCcVector(cc_);
67  }
68 
69  // Returns number of components.
70  // cc[i]: connected component number for state i.
71  int GetCcVector(std::vector<StateId> *cc) {
72  cc->clear();
73  cc->resize(nstates_, kNoStateId);
74  StateId ncomp = 0;
75  for (StateId s = 0; s < nstates_; ++s) {
76  const auto rep = comps_->FindSet(s);
77  auto &comp = (*cc)[rep];
78  if (comp == kNoStateId) {
79  comp = ncomp;
80  ++ncomp;
81  }
82  (*cc)[s] = comp;
83  }
84  return ncomp;
85  }
86 
87  private:
88  UnionFind<StateId> *comps_; // Components.
89  std::vector<StateId> *cc_; // State's cc number.
90  StateId nstates_; // State count.
91 };
92 
93 // Finds and returns strongly-connected components, accessible and
94 // coaccessible states and related properties. Uses Tarjan's single
95 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
96 // Algorithms", 189pp). Use with DfsVisit();
97 template <class Arc>
98 class SccVisitor {
99  public:
100  using StateId = typename Arc::StateId;
101  using Weight = typename Arc::Weight;
102 
103  // scc[i]: strongly-connected component number for state i.
104  // SCC numbers will be in topological order for acyclic input.
105  // access[i]: accessibility of state i.
106  // coaccess[i]: coaccessibility of state i.
107  // Any of above can be NULL.
108  // props: related property bits (cyclicity, initial cyclicity,
109  // accessibility, coaccessibility) set/cleared (o.w. unchanged).
110  SccVisitor(std::vector<StateId> *scc, std::vector<bool> *access,
111  std::vector<bool> *coaccess, uint64 *props)
112  : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
113  explicit SccVisitor(uint64 *props)
114  : scc_(nullptr), access_(nullptr), coaccess_(nullptr), props_(props) {}
115 
116  void InitVisit(const Fst<Arc> &fst);
117 
118  bool InitState(StateId s, StateId root);
119 
120  bool TreeArc(StateId s, const Arc &arc) { return true; }
121 
122  bool BackArc(StateId s, const Arc &arc) {
123  const auto t = arc.nextstate;
124  if ((*dfnumber_)[t] < (*lowlink_)[s]) (*lowlink_)[s] = (*dfnumber_)[t];
125  if ((*coaccess_)[t]) (*coaccess_)[s] = true;
126  *props_ |= kCyclic;
127  *props_ &= ~kAcyclic;
128  if (t == start_) {
129  *props_ |= kInitialCyclic;
130  *props_ &= ~kInitialAcyclic;
131  }
132  return true;
133  }
134 
135  bool ForwardOrCrossArc(StateId s, const Arc &arc) {
136  const auto t = arc.nextstate;
137  if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ && (*onstack_)[t] &&
138  (*dfnumber_)[t] < (*lowlink_)[s]) {
139  (*lowlink_)[s] = (*dfnumber_)[t];
140  }
141  if ((*coaccess_)[t]) (*coaccess_)[s] = true;
142  return true;
143  }
144 
145  // Last argument always ignored, but required by the interface.
146  void FinishState(StateId state, StateId p, const Arc *);
147 
148  void FinishVisit() {
149  // Numbers SCCs in topological order when acyclic.
150  if (scc_) {
151  for (StateId s = 0; s < scc_->size(); ++s) {
152  (*scc_)[s] = nscc_ - 1 - (*scc_)[s];
153  }
154  }
155  if (coaccess_internal_) delete coaccess_;
156  dfnumber_.reset();
157  lowlink_.reset();
158  onstack_.reset();
159  scc_stack_.reset();
160  }
161 
162  private:
163  std::vector<StateId> *scc_; // State's scc number.
164  std::vector<bool> *access_; // State's accessibility.
165  std::vector<bool> *coaccess_; // State's coaccessibility.
166  uint64 *props_;
167  const Fst<Arc> *fst_;
168  StateId start_;
169  StateId nstates_; // State count.
170  StateId nscc_; // SCC count.
171  bool coaccess_internal_;
172  std::unique_ptr<std::vector<StateId>> dfnumber_; // State discovery times.
173  std::unique_ptr<std::vector<StateId>>
174  lowlink_; // lowlink[state] == dfnumber[state] => SCC root
175  std::unique_ptr<std::vector<bool>> onstack_; // Is a state on the SCC stack?
176  std::unique_ptr<std::vector<StateId>>
177  scc_stack_; // SCC stack, with random access.
178 };
179 
180 template <class Arc>
182  if (scc_) scc_->clear();
183  if (access_) access_->clear();
184  if (coaccess_) {
185  coaccess_->clear();
186  coaccess_internal_ = false;
187  } else {
188  coaccess_ = new std::vector<bool>;
189  coaccess_internal_ = true;
190  }
193  fst_ = &fst;
194  start_ = fst.Start();
195  nstates_ = 0;
196  nscc_ = 0;
197  dfnumber_.reset(new std::vector<StateId>());
198  lowlink_.reset(new std::vector<StateId>());
199  onstack_.reset(new std::vector<bool>());
200  scc_stack_.reset(new std::vector<StateId>());
201 }
202 
203 template <class Arc>
205  scc_stack_->push_back(s);
206  while (dfnumber_->size() <= s) {
207  if (scc_) scc_->push_back(-1);
208  if (access_) access_->push_back(false);
209  coaccess_->push_back(false);
210  dfnumber_->push_back(-1);
211  lowlink_->push_back(-1);
212  onstack_->push_back(false);
213  }
214  (*dfnumber_)[s] = nstates_;
215  (*lowlink_)[s] = nstates_;
216  (*onstack_)[s] = true;
217  if (root == start_) {
218  if (access_) (*access_)[s] = true;
219  } else {
220  if (access_) (*access_)[s] = false;
221  *props_ |= kNotAccessible;
222  *props_ &= ~kAccessible;
223  }
224  ++nstates_;
225  return true;
226 }
227 
228 template <class Arc>
229 inline void SccVisitor<Arc>::FinishState(StateId s, StateId p, const Arc *) {
230  if (fst_->Final(s) != Weight::Zero()) (*coaccess_)[s] = true;
231  if ((*dfnumber_)[s] == (*lowlink_)[s]) { // Root of new SCC.
232  bool scc_coaccess = false;
233  auto i = scc_stack_->size();
234  StateId t;
235  do {
236  t = (*scc_stack_)[--i];
237  if ((*coaccess_)[t]) scc_coaccess = true;
238  } while (s != t);
239  do {
240  t = scc_stack_->back();
241  if (scc_) (*scc_)[t] = nscc_;
242  if (scc_coaccess) (*coaccess_)[t] = true;
243  (*onstack_)[t] = false;
244  scc_stack_->pop_back();
245  } while (s != t);
246  if (!scc_coaccess) {
247  *props_ |= kNotCoAccessible;
248  *props_ &= ~kCoAccessible;
249  }
250  ++nscc_;
251  }
252  if (p != kNoStateId) {
253  if ((*coaccess_)[s]) (*coaccess_)[p] = true;
254  if ((*lowlink_)[s] < (*lowlink_)[p]) (*lowlink_)[p] = (*lowlink_)[s];
255  }
256 }
257 
258 // Trims an FST, removing states and arcs that are not on successful paths.
259 // This version modifies its input.
260 //
261 // Complexity:
262 //
263 // Time: O(V + E)
264 // Space: O(V + E)
265 //
266 // where V = # of states and E = # of arcs.
267 template <class Arc>
269  using StateId = typename Arc::StateId;
270  std::vector<bool> access;
271  std::vector<bool> coaccess;
272  uint64 props = 0;
273  SccVisitor<Arc> scc_visitor(nullptr, &access, &coaccess, &props);
274  DfsVisit(*fst, &scc_visitor);
275  std::vector<StateId> dstates;
276  for (StateId s = 0; s < access.size(); ++s) {
277  if (!access[s] || !coaccess[s]) dstates.push_back(s);
278  }
279  fst->DeleteStates(dstates);
281 }
282 
283 // Returns an acyclic FST where each SCC in the input FST has been condensed to
284 // a single state with transitions between SCCs retained and within SCCs
285 // dropped. Also populates 'scc' with a mapping from input to output states.
286 template <class Arc>
287 void Condense(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
288  std::vector<typename Arc::StateId> *scc) {
289  using StateId = typename Arc::StateId;
290  ofst->DeleteStates();
291  uint64 props = 0;
292  SccVisitor<Arc> scc_visitor(scc, nullptr, nullptr, &props);
293  DfsVisit(ifst, &scc_visitor);
294  const auto iter = std::max_element(scc->cbegin(), scc->cend());
295  if (iter == scc->cend()) return;
296  const StateId num_condensed_states = 1 + *iter;
297  ofst->ReserveStates(num_condensed_states);
298  for (StateId c = 0; c < num_condensed_states; ++c) {
299  ofst->AddState();
300  }
301  for (StateId s = 0; s < scc->size(); ++s) {
302  const auto c = (*scc)[s];
303  if (s == ifst.Start()) ofst->SetStart(c);
304  const auto weight = ifst.Final(s);
305  if (weight != Arc::Weight::Zero())
306  ofst->SetFinal(c, Plus(ofst->Final(c), weight));
307  for (ArcIterator<Fst<Arc>> aiter(ifst, s); !aiter.Done(); aiter.Next()) {
308  const auto &arc = aiter.Value();
309  const auto nextc = (*scc)[arc.nextstate];
310  if (nextc != c) {
311  Arc condensed_arc = arc;
312  condensed_arc.nextstate = nextc;
313  ofst->AddArc(c, std::move(condensed_arc));
314  }
315  }
316  }
318 }
319 
320 } // namespace fst
321 
322 #endif // FST_CONNECT_H_
void FinishVisit()
Definition: connect.h:65
constexpr uint64 kInitialCyclic
Definition: properties.h:95
void InitVisit(const Fst< Arc > &fst)
Definition: connect.h:39
constexpr uint64 kNotAccessible
Definition: properties.h:107
constexpr uint64 kInitialAcyclic
Definition: properties.h:97
bool InitState(StateId s, StateId root)
Definition: connect.h:41
bool BackArc(StateId s, const Arc &arc)
Definition: connect.h:122
uint64_t uint64
Definition: types.h:32
constexpr uint64 kNotCoAccessible
Definition: properties.h:112
typename Arc::Weight Weight
Definition: connect.h:24
bool TreeArc(StateId s, const Arc &arc)
Definition: connect.h:120
void FinishState(StateId s)
Definition: connect.h:63
virtual void AddArc(StateId, const Arc &arc)=0
void DfsVisit(const FST &fst, Visitor *visitor, ArcFilter filter, bool access_only=false)
Definition: dfs-visit.h:94
void InitVisit(const Fst< Arc > &fst)
Definition: connect.h:181
bool GreyArc(StateId s, const Arc &arc)
Definition: connect.h:53
virtual Weight Final(StateId) const =0
virtual void SetStart(StateId)=0
void Connect(MutableFst< Arc > *fst)
Definition: connect.h:268
CcVisitor(std::vector< StateId > *cc)
Definition: connect.h:28
constexpr int kNoStateId
Definition: fst.h:180
typename Arc::StateId StateId
Definition: connect.h:100
void FinishState(StateId state, StateId p, const Arc *)
Definition: connect.h:229
virtual void SetFinal(StateId, Weight)=0
bool WhiteArc(StateId s, const Arc &arc)
Definition: connect.h:47
void FinishVisit()
Definition: connect.h:148
int GetCcVector(std::vector< StateId > *cc)
Definition: connect.h:71
ExpectationWeight< X1, X2 > Plus(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
virtual StateId Start() const =0
bool ForwardOrCrossArc(StateId s, const Arc &arc)
Definition: connect.h:135
virtual void ReserveStates(StateId n)
Definition: mutable-fst.h:76
SccVisitor(std::vector< StateId > *scc, std::vector< bool > *access, std::vector< bool > *coaccess, uint64 *props)
Definition: connect.h:110
CcVisitor(UnionFind< StateId > *comps)
Definition: connect.h:32
constexpr uint64 kCoAccessible
Definition: properties.h:110
SccVisitor(uint64 *props)
Definition: connect.h:113
constexpr uint64 kAcyclic
Definition: properties.h:92
void Union(T x, T y)
Definition: union-find.h:37
void Condense(const Fst< Arc > &ifst, MutableFst< Arc > *ofst, std::vector< typename Arc::StateId > *scc)
Definition: connect.h:287
T MakeSet(T item)
Definition: union-find.h:41
virtual StateId AddState()=0
virtual void DeleteStates(const std::vector< StateId > &)=0
bool InitState(StateId s, StateId root)
Definition: connect.h:204
bool BlackArc(StateId s, const Arc &arc)
Definition: connect.h:58
T FindSet(T item)
Definition: union-find.h:26
typename Arc::StateId StateId
Definition: connect.h:25
constexpr uint64 kAccessible
Definition: properties.h:105
typename Arc::Weight Weight
Definition: connect.h:101
constexpr uint64 kCyclic
Definition: properties.h:90
virtual void SetProperties(uint64 props, uint64 mask)=0