FST  openfst-1.8.3
OpenFst Library
info-impl.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Class to compute various information about FSTs, a helper class for
19 // fstinfo.cc.
20 
21 #ifndef FST_SCRIPT_INFO_IMPL_H_
22 #define FST_SCRIPT_INFO_IMPL_H_
23 
24 #include <cstddef>
25 #include <cstdint>
26 #include <map>
27 #include <ostream>
28 #include <string>
29 #include <vector>
30 
31 #include <fst/log.h>
32 #include <fst/arcfilter.h>
33 #include <fst/cc-visitors.h>
34 #include <fst/dfs-visit.h>
35 #include <fst/fst.h>
36 #include <fst/lookahead-matcher.h>
37 #include <fst/matcher.h>
38 #include <fst/properties.h>
39 #include <fst/queue.h>
40 #include <fst/util.h>
41 #include <fst/verify.h>
42 #include <fst/visit.h>
44 #include <string_view>
45 
46 namespace fst {
47 
48 // Compute various information about FSTs, helper class for fstinfo.cc.
49 // WARNING: Stand-alone use of this class is not recommended, most code
50 // should call directly the relevant library functions: Fst<Arc>::NumStates,
51 // Fst<Arc>::NumArcs, TestProperties, etc.
52 class FstInfo {
53  public:
54  // When info_type is "short" (or "auto" and not an ExpandedFst) then only
55  // minimal info is computed and can be requested.
56  template <typename Arc>
57  FstInfo(const Fst<Arc> &fst, bool test_properties,
59  std::string_view info_type = "auto", bool verify = true)
60  : fst_type_(fst.Type()),
61  input_symbols_(fst.InputSymbols() ? fst.InputSymbols()->Name()
62  : "none"),
63  output_symbols_(fst.OutputSymbols() ? fst.OutputSymbols()->Name()
64  : "none"),
65  nstates_(0),
66  narcs_(0),
67  start_(kNoStateId),
68  nfinal_(0),
69  nepsilons_(0),
70  niepsilons_(0),
71  noepsilons_(0),
72  ilabel_mult_(0.0),
73  olabel_mult_(0.0),
74  naccess_(0),
75  ncoaccess_(0),
76  nconnect_(0),
77  ncc_(0),
78  nscc_(0),
79  input_match_type_(MATCH_NONE),
80  output_match_type_(MATCH_NONE),
81  input_lookahead_(false),
82  output_lookahead_(false),
83  properties_(0),
84  arc_filter_type_(arc_filter_type),
85  long_info_(true),
86  arc_type_(Arc::Type()) {
87  using Label = typename Arc::Label;
88  using StateId = typename Arc::StateId;
89  using Weight = typename Arc::Weight;
90  if (info_type == "long") {
91  long_info_ = true;
92  } else if (info_type == "short") {
93  long_info_ = false;
94  } else if (info_type == "auto") {
95  long_info_ = fst.Properties(kExpanded, false);
96  } else {
97  FSTERROR() << "Bad info type: " << info_type;
98  return;
99  }
100  if (!long_info_) return;
101  // If the FST is not sane, we return.
102  if (verify && !Verify(fst)) {
103  FSTERROR() << "FstInfo: Verify: FST not well-formed";
104  return;
105  }
106  start_ = fst.Start();
107  properties_ = fst.Properties(kFstProperties, test_properties);
108  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
109  ++nstates_;
110  const auto s = siter.Value();
111  if (fst.Final(s) != Weight::Zero()) ++nfinal_;
112  std::map<Label, size_t> ilabel_count;
113  std::map<Label, size_t> olabel_count;
114  for (ArcIterator<Fst<Arc>> aiter(fst, s); !aiter.Done(); aiter.Next()) {
115  const auto &arc = aiter.Value();
116  ++narcs_;
117  if (arc.ilabel == 0 && arc.olabel == 0) ++nepsilons_;
118  if (arc.ilabel == 0) ++niepsilons_;
119  if (arc.olabel == 0) ++noepsilons_;
120  ++ilabel_count[arc.ilabel];
121  ++olabel_count[arc.olabel];
122  }
123  for (auto it = ilabel_count.begin(); it != ilabel_count.end(); ++it) {
124  ilabel_mult_ += it->second * it->second;
125  }
126  for (auto it = olabel_count.begin(); it != olabel_count.end(); ++it) {
127  olabel_mult_ += it->second * it->second;
128  }
129  }
130  if (narcs_ > 0) {
131  ilabel_mult_ /= narcs_;
132  olabel_mult_ /= narcs_;
133  }
134  {
135  std::vector<StateId> cc;
136  CcVisitor<Arc> cc_visitor(&cc);
137  FifoQueue<StateId> fifo_queue;
138  switch (arc_filter_type) {
140  Visit(fst, &cc_visitor, &fifo_queue);
141  break;
142  }
144  Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
145  break;
146  }
148  Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
149  break;
150  }
152  Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
153  break;
154  }
155  }
156  for (StateId s = 0; s < cc.size(); ++s) {
157  if (cc[s] >= ncc_) ncc_ = cc[s] + 1;
158  }
159  }
160  {
161  std::vector<StateId> scc;
162  std::vector<bool> access, coaccess;
163  uint64_t props = 0;
164  SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
165  switch (arc_filter_type) {
167  DfsVisit(fst, &scc_visitor);
168  break;
169  }
171  DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
172  break;
173  }
175  DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
176  break;
177  }
179  DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
180  break;
181  }
182  }
183  for (StateId s = 0; s < scc.size(); ++s) {
184  if (access[s]) ++naccess_;
185  if (coaccess[s]) ++ncoaccess_;
186  if (access[s] && coaccess[s]) ++nconnect_;
187  if (scc[s] >= nscc_) nscc_ = scc[s] + 1;
188  }
189  }
190  LookAheadMatcher<Fst<Arc>> imatcher(fst, MATCH_INPUT);
191  input_match_type_ = imatcher.Type(test_properties);
192  input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher;
193  LookAheadMatcher<Fst<Arc>> omatcher(fst, MATCH_OUTPUT);
194  output_match_type_ = omatcher.Type(test_properties);
195  output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher;
196  }
197 
198  // Short info.
199 
200  const std::string &FstType() const { return fst_type_; }
201 
202  const std::string &ArcType() const { return arc_type_; }
203 
204  const std::string &InputSymbols() const { return input_symbols_; }
205 
206  const std::string &OutputSymbols() const { return output_symbols_; }
207 
208  bool LongInfo() const { return long_info_; }
209 
210  script::ArcFilterType ArcFilterType() const { return arc_filter_type_; }
211 
212  // Long info.
213 
215  CheckLong();
216  return input_match_type_;
217  }
218 
220  CheckLong();
221  return output_match_type_;
222  }
223 
224  bool InputLookAhead() const {
225  CheckLong();
226  return input_lookahead_;
227  }
228 
229  bool OutputLookAhead() const {
230  CheckLong();
231  return output_lookahead_;
232  }
233 
234  int64_t NumStates() const {
235  CheckLong();
236  return nstates_;
237  }
238 
239  size_t NumArcs() const {
240  CheckLong();
241  return narcs_;
242  }
243 
244  int64_t Start() const {
245  CheckLong();
246  return start_;
247  }
248 
249  size_t NumFinal() const {
250  CheckLong();
251  return nfinal_;
252  }
253 
254  size_t NumEpsilons() const {
255  CheckLong();
256  return nepsilons_;
257  }
258 
259  size_t NumInputEpsilons() const {
260  CheckLong();
261  return niepsilons_;
262  }
263 
264  size_t NumOutputEpsilons() const {
265  CheckLong();
266  return noepsilons_;
267  }
268 
269  double InputLabelMultiplicity() const {
270  CheckLong();
271  return ilabel_mult_;
272  }
273 
274  double OutputLabelMultiplicity() const {
275  CheckLong();
276  return olabel_mult_;
277  }
278 
279  size_t NumAccessible() const {
280  CheckLong();
281  return naccess_;
282  }
283 
284  size_t NumCoAccessible() const {
285  CheckLong();
286  return ncoaccess_;
287  }
288 
289  size_t NumConnected() const {
290  CheckLong();
291  return nconnect_;
292  }
293 
294  size_t NumCc() const {
295  CheckLong();
296  return ncc_;
297  }
298 
299  size_t NumScc() const {
300  CheckLong();
301  return nscc_;
302  }
303 
304  uint64_t Properties() const {
305  CheckLong();
306  return properties_;
307  }
308 
309  void Info() const;
310 
311  private:
312  void CheckLong() const {
313  if (!long_info_)
314  FSTERROR() << "FstInfo: Method only available with long info signature";
315  }
316 
317  std::string fst_type_;
318  std::string input_symbols_;
319  std::string output_symbols_;
320  int64_t nstates_;
321  size_t narcs_;
322  int64_t start_;
323  size_t nfinal_;
324  size_t nepsilons_;
325  size_t niepsilons_;
326  size_t noepsilons_;
327  double ilabel_mult_;
328  double olabel_mult_;
329  size_t naccess_;
330  size_t ncoaccess_;
331  size_t nconnect_;
332  size_t ncc_;
333  size_t nscc_;
334  MatchType input_match_type_;
335  MatchType output_match_type_;
336  bool input_lookahead_;
337  bool output_lookahead_;
338  uint64_t properties_;
339  script::ArcFilterType arc_filter_type_;
340  bool long_info_;
341  std::string arc_type_;
342 };
343 
344 // Prints `properties` to `ostrm` in a user-friendly multi-line format.
345 void PrintProperties(std::ostream &ostrm, uint64_t properties);
346 
347 // Prints `header` to `ostrm` in a user-friendly multi-line format.
348 void PrintHeader(std::ostream &ostrm, const FstHeader &header);
349 
350 } // namespace fst
351 
352 #endif // FST_SCRIPT_INFO_IMPL_H_
const std::string & OutputSymbols() const
Definition: info-impl.h:206
virtual uint64_t Properties(uint64_t mask, bool test) const =0
MatchType InputMatchType() const
Definition: info-impl.h:214
MatchType
Definition: fst.h:187
void Visit(const FST &fst, Visitor *visitor, Queue *queue, ArcFilter filter, bool access_only=false)
Definition: visit.h:77
void DfsVisit(const FST &fst, Visitor *visitor, ArcFilter filter, bool access_only=false)
Definition: dfs-visit.h:112
virtual Weight Final(StateId) const =0
size_t NumFinal() const
Definition: info-impl.h:249
void PrintHeader(std::ostream &ostrm, const FstHeader &header)
Definition: info-impl.cc:143
void PrintProperties(std::ostream &ostrm, uint64_t properties)
Definition: info-impl.cc:123
size_t NumConnected() const
Definition: info-impl.h:289
MatchType Type(bool test) const
constexpr int kNoStateId
Definition: fst.h:196
script::ArcFilterType ArcFilterType() const
Definition: info-impl.h:210
bool InputLookAhead() const
Definition: info-impl.h:224
size_t NumCc() const
Definition: info-impl.h:294
bool LongInfo() const
Definition: info-impl.h:208
constexpr uint32_t kOutputLookAheadMatcher
#define FSTERROR()
Definition: util.h:56
size_t NumScc() const
Definition: info-impl.h:299
double OutputLabelMultiplicity() const
Definition: info-impl.h:274
int64_t NumStates() const
Definition: info-impl.h:234
size_t NumOutputEpsilons() const
Definition: info-impl.h:264
size_t NumInputEpsilons() const
Definition: info-impl.h:259
constexpr uint32_t kInputLookAheadMatcher
size_t NumAccessible() const
Definition: info-impl.h:279
double InputLabelMultiplicity() const
Definition: info-impl.h:269
virtual StateId Start() const =0
uint64_t Properties() const
Definition: info-impl.h:304
const std::string & InputSymbols() const
Definition: info-impl.h:204
size_t NumEpsilons() const
Definition: info-impl.h:254
void Info() const
Definition: info-impl.cc:36
size_t NumCoAccessible() const
Definition: info-impl.h:284
const std::string & FstType() const
Definition: info-impl.h:200
int64_t Start() const
Definition: info-impl.h:244
bool Verify(const Fst< Arc > &fst, bool allow_negative_labels=false)
Definition: verify.h:36
constexpr uint64_t kFstProperties
Definition: properties.h:326
const std::string & ArcType() const
Definition: info-impl.h:202
bool OutputLookAhead() const
Definition: info-impl.h:229
FstInfo(const Fst< Arc > &fst, bool test_properties, script::ArcFilterType arc_filter_type=script::ArcFilterType::ANY, std::string_view info_type="auto", bool verify=true)
Definition: info-impl.h:57
MatchType OutputMatchType() const
Definition: info-impl.h:219
constexpr uint64_t kExpanded
Definition: properties.h:46
size_t NumArcs() const
Definition: info-impl.h:239
uint32_t Flags() const