FST  openfst-1.7.2
OpenFst Library
info-impl.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Class to compute various information about FSTs, a helper class for
5 // fstinfo.cc.
6 
7 #ifndef FST_SCRIPT_INFO_IMPL_H_
8 #define FST_SCRIPT_INFO_IMPL_H_
9 
10 #include <map>
11 #include <string>
12 #include <vector>
13 
14 #include <fst/connect.h>
15 #include <fst/dfs-visit.h>
16 #include <fst/fst.h>
17 #include <fst/lookahead-matcher.h>
18 #include <fst/matcher.h>
19 #include <fst/queue.h>
20 #include <fst/test-properties.h>
21 #include <fst/verify.h>
22 #include <fst/visit.h>
23 
24 namespace fst {
25 
26 // Compute various information about FSTs, helper class for fstinfo.cc.
27 // WARNING: Stand-alone use of this class is not recommended, most code
28 // should call directly the relevant library functions: Fst<Arc>::NumStates,
29 // Fst<Arc>::NumArcs, TestProperties, etc.
30 class FstInfo {
31  public:
32  FstInfo() {}
33 
34  // When info_type is "short" (or "auto" and not an ExpandedFst) then only
35  // minimal info is computed and can be requested.
36  template <typename Arc>
37  FstInfo(const Fst<Arc> &fst, bool test_properties,
38  const string &arc_filter_type = "any",
39  const string &info_type = "auto", bool verify = true)
40  : fst_type_(fst.Type()),
41  input_symbols_(fst.InputSymbols() ? fst.InputSymbols()->Name()
42  : "none"),
43  output_symbols_(fst.OutputSymbols() ? fst.OutputSymbols()->Name()
44  : "none"),
45  nstates_(0),
46  narcs_(0),
47  start_(kNoStateId),
48  nfinal_(0),
49  nepsilons_(0),
50  niepsilons_(0),
51  noepsilons_(0),
52  ilabel_mult_(0.0),
53  olabel_mult_(0.0),
54  naccess_(0),
55  ncoaccess_(0),
56  nconnect_(0),
57  ncc_(0),
58  nscc_(0),
59  input_match_type_(MATCH_NONE),
60  output_match_type_(MATCH_NONE),
61  input_lookahead_(false),
62  output_lookahead_(false),
63  properties_(0),
64  arc_filter_type_(arc_filter_type),
65  long_info_(true),
66  arc_type_(Arc::Type()) {
67  using Label = typename Arc::Label;
68  using StateId = typename Arc::StateId;
69  using Weight = typename Arc::Weight;
70  if (info_type == "long") {
71  long_info_ = true;
72  } else if (info_type == "short") {
73  long_info_ = false;
74  } else if (info_type == "auto") {
75  long_info_ = fst.Properties(kExpanded, false);
76  } else {
77  FSTERROR() << "Bad info type: " << info_type;
78  return;
79  }
80  if (!long_info_) return;
81  // If the FST is not sane, we return.
82  if (verify && !Verify(fst)) {
83  FSTERROR() << "FstInfo: Verify: FST not well-formed";
84  return;
85  }
86  start_ = fst.Start();
87  properties_ = fst.Properties(kFstProperties, test_properties);
88  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
89  ++nstates_;
90  const auto s = siter.Value();
91  if (fst.Final(s) != Weight::Zero()) ++nfinal_;
92  std::map<Label, size_t> ilabel_count;
93  std::map<Label, size_t> olabel_count;
94  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
95  const auto &arc = aiter.Value();
96  ++narcs_;
97  if (arc.ilabel == 0 && arc.olabel == 0) ++nepsilons_;
98  if (arc.ilabel == 0) ++niepsilons_;
99  if (arc.olabel == 0) ++noepsilons_;
100  ++ilabel_count[arc.ilabel];
101  ++olabel_count[arc.olabel];
102  }
103  for (auto it = ilabel_count.begin(); it != ilabel_count.end(); ++it) {
104  ilabel_mult_ += it->second * it->second;
105  }
106  for (auto it = olabel_count.begin(); it != olabel_count.end(); ++it) {
107  olabel_mult_ += it->second * it->second;
108  }
109  }
110  if (narcs_ > 0) {
111  ilabel_mult_ /= narcs_;
112  olabel_mult_ /= narcs_;
113  }
114  {
115  std::vector<StateId> cc;
116  CcVisitor<Arc> cc_visitor(&cc);
117  FifoQueue<StateId> fifo_queue;
118  if (arc_filter_type == "any") {
119  Visit(fst, &cc_visitor, &fifo_queue);
120  } else if (arc_filter_type == "epsilon") {
121  Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
122  } else if (arc_filter_type == "iepsilon") {
123  Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
124  } else if (arc_filter_type == "oepsilon") {
125  Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
126  } else {
127  FSTERROR() << "Bad arc filter type: " << arc_filter_type;
128  return;
129  }
130  for (StateId s = 0; s < cc.size(); ++s) {
131  if (cc[s] >= ncc_) ncc_ = cc[s] + 1;
132  }
133  }
134  {
135  std::vector<StateId> scc;
136  std::vector<bool> access, coaccess;
137  uint64 props = 0;
138  SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
139  if (arc_filter_type == "any") {
140  DfsVisit(fst, &scc_visitor);
141  } else if (arc_filter_type == "epsilon") {
142  DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
143  } else if (arc_filter_type == "iepsilon") {
144  DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
145  } else if (arc_filter_type == "oepsilon") {
146  DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
147  } else {
148  FSTERROR() << "Bad arc filter type: " << arc_filter_type;
149  return;
150  }
151  for (StateId s = 0; s < scc.size(); ++s) {
152  if (access[s]) ++naccess_;
153  if (coaccess[s]) ++ncoaccess_;
154  if (access[s] && coaccess[s]) ++nconnect_;
155  if (scc[s] >= nscc_) nscc_ = scc[s] + 1;
156  }
157  }
158  LookAheadMatcher<Fst<Arc>> imatcher(fst, MATCH_INPUT);
159  input_match_type_ = imatcher.Type(test_properties);
160  input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher;
161  LookAheadMatcher<Fst<Arc>> omatcher(fst, MATCH_OUTPUT);
162  output_match_type_ = omatcher.Type(test_properties);
163  output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher;
164  }
165 
166  // Short info.
167 
168  const string &FstType() const { return fst_type_; }
169 
170  const string &ArcType() const { return arc_type_; }
171 
172  const string &InputSymbols() const { return input_symbols_; }
173 
174  const string &OutputSymbols() const { return output_symbols_; }
175 
176  bool LongInfo() const { return long_info_; }
177 
178  const string &ArcFilterType() const { return arc_filter_type_; }
179 
180  // Long info.
181 
183  CheckLong();
184  return input_match_type_;
185  }
186 
188  CheckLong();
189  return output_match_type_;
190  }
191 
192  bool InputLookAhead() const {
193  CheckLong();
194  return input_lookahead_;
195  }
196 
197  bool OutputLookAhead() const {
198  CheckLong();
199  return output_lookahead_;
200  }
201 
202  int64 NumStates() const {
203  CheckLong();
204  return nstates_;
205  }
206 
207  size_t NumArcs() const {
208  CheckLong();
209  return narcs_;
210  }
211 
212  int64 Start() const {
213  CheckLong();
214  return start_;
215  }
216 
217  size_t NumFinal() const {
218  CheckLong();
219  return nfinal_;
220  }
221 
222  size_t NumEpsilons() const {
223  CheckLong();
224  return nepsilons_;
225  }
226 
227  size_t NumInputEpsilons() const {
228  CheckLong();
229  return niepsilons_;
230  }
231 
232  size_t NumOutputEpsilons() const {
233  CheckLong();
234  return noepsilons_;
235  }
236 
237  double InputLabelMultiplicity() const {
238  CheckLong();
239  return ilabel_mult_;
240  }
241 
242  double OutputLabelMultiplicity() const {
243  CheckLong();
244  return olabel_mult_;
245  }
246 
247  size_t NumAccessible() const {
248  CheckLong();
249  return naccess_;
250  }
251 
252  size_t NumCoAccessible() const {
253  CheckLong();
254  return ncoaccess_;
255  }
256 
257  size_t NumConnected() const {
258  CheckLong();
259  return nconnect_;
260  }
261 
262  size_t NumCc() const {
263  CheckLong();
264  return ncc_;
265  }
266 
267  size_t NumScc() const {
268  CheckLong();
269  return nscc_;
270  }
271 
272  uint64 Properties() const {
273  CheckLong();
274  return properties_;
275  }
276 
277  private:
278  void CheckLong() const {
279  if (!long_info_)
280  FSTERROR() << "FstInfo: Method only available with long info signature";
281  }
282 
283  string fst_type_;
284  string input_symbols_;
285  string output_symbols_;
286  int64 nstates_;
287  size_t narcs_;
288  int64 start_;
289  size_t nfinal_;
290  size_t nepsilons_;
291  size_t niepsilons_;
292  size_t noepsilons_;
293  double ilabel_mult_;
294  double olabel_mult_;
295  size_t naccess_;
296  size_t ncoaccess_;
297  size_t nconnect_;
298  size_t ncc_;
299  size_t nscc_;
300  MatchType input_match_type_;
301  MatchType output_match_type_;
302  bool input_lookahead_;
303  bool output_lookahead_;
304  uint64 properties_;
305  string arc_filter_type_;
306  bool long_info_;
307  string arc_type_;
308 };
309 
310 void PrintFstInfoImpl(const FstInfo &fstinfo, bool pipe = false);
311 
312 } // namespace fst
313 
314 #endif // FST_SCRIPT_INFO_IMPL_H_
const string & InputSymbols() const
Definition: info-impl.h:172
const string & FstType() const
Definition: info-impl.h:168
const string & ArcFilterType() const
Definition: info-impl.h:178
uint64_t uint64
Definition: types.h:32
MatchType InputMatchType() const
Definition: info-impl.h:182
MatchType
Definition: fst.h:171
void Visit(const FST &fst, Visitor *visitor, Queue *queue, ArcFilter filter, bool access_only=false)
Definition: visit.h:58
void DfsVisit(const FST &fst, Visitor *visitor, ArcFilter filter, bool access_only=false)
Definition: dfs-visit.h:94
virtual Weight Final(StateId) const =0
size_t NumFinal() const
Definition: info-impl.h:217
size_t NumConnected() const
Definition: info-impl.h:257
MatchType Type(bool test) const
constexpr uint64 kFstProperties
Definition: properties.h:301
constexpr int kNoStateId
Definition: fst.h:180
constexpr uint64 kExpanded
Definition: properties.h:27
bool InputLookAhead() const
Definition: info-impl.h:192
size_t NumCc() const
Definition: info-impl.h:262
virtual uint64 Properties(uint64 mask, bool test) const =0
int64_t int64
Definition: types.h:27
bool LongInfo() const
Definition: info-impl.h:176
#define FSTERROR()
Definition: util.h:35
size_t NumScc() const
Definition: info-impl.h:267
double OutputLabelMultiplicity() const
Definition: info-impl.h:242
size_t NumOutputEpsilons() const
Definition: info-impl.h:232
size_t NumInputEpsilons() const
Definition: info-impl.h:227
void PrintFstInfoImpl(const FstInfo &fstinfo, bool pipe=false)
Definition: info-impl.cc:5
size_t NumAccessible() const
Definition: info-impl.h:247
double InputLabelMultiplicity() const
Definition: info-impl.h:237
virtual StateId Start() const =0
constexpr uint32 kInputLookAheadMatcher
constexpr uint32 kOutputLookAheadMatcher
size_t NumEpsilons() const
Definition: info-impl.h:222
const string & OutputSymbols() const
Definition: info-impl.h:174
size_t NumCoAccessible() const
Definition: info-impl.h:252
const string & ArcType() const
Definition: info-impl.h:170
uint64 Properties() const
Definition: info-impl.h:272
bool Verify(const Fst< Arc > &fst, bool allow_negative_labels=false)
Definition: verify.h:19
bool OutputLookAhead() const
Definition: info-impl.h:197
int64 Start() const
Definition: info-impl.h:212
FstInfo(const Fst< Arc > &fst, bool test_properties, const string &arc_filter_type="any", const string &info_type="auto", bool verify=true)
Definition: info-impl.h:37
MatchType OutputMatchType() const
Definition: info-impl.h:187
size_t NumArcs() const
Definition: info-impl.h:207
int64 NumStates() const
Definition: info-impl.h:202