FST  openfst-1.7.9
OpenFst Library
compile-impl.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Class to to compile a binary FST from textual input.
5 
6 #ifndef FST_SCRIPT_COMPILE_IMPL_H_
7 #define FST_SCRIPT_COMPILE_IMPL_H_
8 
9 #include <iostream>
10 #include <memory>
11 #include <sstream>
12 #include <string>
13 #include <vector>
14 
15 #include <fst/fst.h>
16 #include <fst/util.h>
17 #include <fst/vector-fst.h>
18 #include <unordered_map>
19 
20 DECLARE_string(fst_field_separator);
21 
22 namespace fst {
23 
24 // Compile a binary Fst from textual input, helper class for fstcompile.cc
25 // WARNING: Stand-alone use of this class not recommended, most code should
26 // read/write using the binary format which is much more efficient.
27 template <class Arc>
28 class FstCompiler {
29  public:
30  using Label = typename Arc::Label;
31  using StateId = typename Arc::StateId;
32  using Weight = typename Arc::Weight;
33 
34  // WARNING: use of negative labels not recommended as it may cause conflicts.
35  // If add_symbols_ is true, then the symbols will be dynamically added to the
36  // symbol tables. This is only useful if you set the (i/o)keep flag to attach
37  // the final symbol table, or use the accessors. (The input symbol tables are
38  // const and therefore not changed.)
39  FstCompiler(std::istream &istrm, const std::string &source, // NOLINT
40  const SymbolTable *isyms, const SymbolTable *osyms,
41  const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep,
42  bool nkeep, bool allow_negative_labels = false) {
43  std::unique_ptr<SymbolTable> misyms(isyms ? isyms->Copy() : nullptr);
44  std::unique_ptr<SymbolTable> mosyms(osyms ? osyms->Copy() : nullptr);
45  std::unique_ptr<SymbolTable> mssyms(ssyms ? ssyms->Copy() : nullptr);
46  Init(istrm, source, misyms.get(), mosyms.get(), mssyms.get(), accep, ikeep,
47  okeep, nkeep, allow_negative_labels, false);
48  }
49 
50  FstCompiler(std::istream &istrm, const std::string &source, // NOLINT
51  SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms,
52  bool accep, bool ikeep, bool okeep, bool nkeep,
53  bool allow_negative_labels, bool add_symbols) {
54  Init(istrm, source, isyms, osyms, ssyms, accep, ikeep, okeep, nkeep,
55  allow_negative_labels, add_symbols);
56  }
57 
58  void Init(std::istream &istrm, const std::string &source, // NOLINT
59  SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms,
60  bool accep, bool ikeep, bool okeep, bool nkeep,
61  bool allow_negative_labels, bool add_symbols) {
62  nline_ = 0;
63  source_ = source;
64  isyms_ = isyms;
65  osyms_ = osyms;
66  ssyms_ = ssyms;
67  nstates_ = 0;
68  keep_state_numbering_ = nkeep;
69  allow_negative_labels_ = allow_negative_labels;
70  add_symbols_ = add_symbols;
71  bool start_state_populated = false;
72  char line[kLineLen];
73  const std::string separator = FLAGS_fst_field_separator + "\n";
74  while (istrm.getline(line, kLineLen)) {
75  ++nline_;
76  std::vector<char *> col;
77  SplitString(line, separator.c_str(), &col, true);
78  if (col.empty() || col[0][0] == '\0') continue;
79  if (col.size() > 5 || (col.size() > 4 && accep) ||
80  (col.size() == 3 && !accep)) {
81  FSTERROR() << "FstCompiler: Bad number of columns, source = " << source_
82  << ", line = " << nline_;
84  return;
85  }
86  StateId s = StrToStateId(col[0]);
87  while (s >= fst_.NumStates()) fst_.AddState();
88  if (!start_state_populated) {
89  fst_.SetStart(s);
90  start_state_populated = true;
91  }
92 
93  Arc arc;
94  StateId d = s;
95  switch (col.size()) {
96  case 1:
97  fst_.SetFinal(s, Weight::One());
98  break;
99  case 2:
100  fst_.SetFinal(s, StrToWeight(col[1], true));
101  break;
102  case 3:
103  arc.nextstate = d = StrToStateId(col[1]);
104  arc.ilabel = StrToILabel(col[2]);
105  arc.olabel = arc.ilabel;
106  arc.weight = Weight::One();
107  fst_.AddArc(s, arc);
108  break;
109  case 4:
110  arc.nextstate = d = StrToStateId(col[1]);
111  arc.ilabel = StrToILabel(col[2]);
112  if (accep) {
113  arc.olabel = arc.ilabel;
114  arc.weight = StrToWeight(col[3], true);
115  } else {
116  arc.olabel = StrToOLabel(col[3]);
117  arc.weight = Weight::One();
118  }
119  fst_.AddArc(s, arc);
120  break;
121  case 5:
122  arc.nextstate = d = StrToStateId(col[1]);
123  arc.ilabel = StrToILabel(col[2]);
124  arc.olabel = StrToOLabel(col[3]);
125  arc.weight = StrToWeight(col[4], true);
126  fst_.AddArc(s, arc);
127  }
128  while (d >= fst_.NumStates()) fst_.AddState();
129  }
130  if (ikeep) fst_.SetInputSymbols(isyms);
131  if (okeep) fst_.SetOutputSymbols(osyms);
132  }
133 
134  const VectorFst<Arc> &Fst() const { return fst_; }
135 
136  private:
137  // Maximum line length in text file.
138  static constexpr int kLineLen = 8096;
139 
140  StateId StrToId(const char *s, SymbolTable *syms, const char *name,
141  bool allow_negative = false) const {
142  StateId n = 0;
143  if (syms) {
144  n = (add_symbols_) ? syms->AddSymbol(s) : syms->Find(s);
145  if (n == -1 || (!allow_negative && n < 0)) {
146  FSTERROR() << "FstCompiler: Symbol \"" << s
147  << "\" is not mapped to any integer " << name
148  << ", symbol table = " << syms->Name()
149  << ", source = " << source_ << ", line = " << nline_;
150  fst_.SetProperties(kError, kError);
151  }
152  } else {
153  char *p;
154  n = strtoll(s, &p, 10);
155  if (*p != '\0' || (!allow_negative && n < 0)) {
156  FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s
157  << "\", source = " << source_ << ", line = " << nline_;
158  fst_.SetProperties(kError, kError);
159  }
160  }
161  return n;
162  }
163 
164  StateId StrToStateId(const char *s) {
165  StateId n = StrToId(s, ssyms_, "state ID");
166  if (keep_state_numbering_) return n;
167  // Remaps state IDs to make dense set.
168  const auto it = states_.find(n);
169  if (it == states_.end()) {
170  states_[n] = nstates_;
171  return nstates_++;
172  } else {
173  return it->second;
174  }
175  }
176 
177  StateId StrToILabel(const char *s) const {
178  return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_);
179  }
180 
181  StateId StrToOLabel(const char *s) const {
182  return StrToId(s, osyms_, "arc olabel", allow_negative_labels_);
183  }
184 
185  Weight StrToWeight(const char *s, bool allow_zero) const {
186  Weight w;
187  std::istringstream strm(s);
188  strm >> w;
189  if (!strm || (!allow_zero && w == Weight::Zero())) {
190  FSTERROR() << "FstCompiler: Bad weight = \"" << s
191  << "\", source = " << source_ << ", line = " << nline_;
192  fst_.SetProperties(kError, kError);
193  w = Weight::NoWeight();
194  }
195  return w;
196  }
197 
198  mutable VectorFst<Arc> fst_;
199  size_t nline_;
200  std::string source_; // Text FST source name.
201  SymbolTable *isyms_; // ilabel symbol table (not owned).
202  SymbolTable *osyms_; // olabel symbol table (not owned).
203  SymbolTable *ssyms_; // slabel symbol table (not owned).
204  std::unordered_map<StateId, StateId> states_; // State ID map.
205  StateId nstates_; // Number of seen states.
206  bool keep_state_numbering_;
207  bool allow_negative_labels_; // Not recommended; may cause conflicts.
208  bool add_symbols_; // Add to symbol tables on-the fly.
209 
210  FstCompiler(const FstCompiler &) = delete;
211  FstCompiler &operator=(const FstCompiler &) = delete;
212 };
213 
214 } // namespace fst
215 
216 #endif // FST_SCRIPT_COMPILE_IMPL_H_
const std::string & Name() const
Definition: symbol-table.h:480
void AddArc(StateId s, const Arc &arc) override
Definition: mutable-fst.h:312
void SetStart(StateId s) override
Definition: mutable-fst.h:284
void SetProperties(uint64 props, uint64 mask) override
Definition: mutable-fst.h:294
typename Arc::StateId StateId
Definition: compile-impl.h:31
void SetFinal(StateId s, Weight weight=Weight::One()) override
Definition: mutable-fst.h:289
virtual SymbolTable * Copy() const
Definition: symbol-table.h:426
int64 AddSymbol(SymbolType symbol, int64 key)
Definition: symbol-table.h:440
typename Arc::Weight Weight
Definition: compile-impl.h:32
typename Arc::Label Label
Definition: compile-impl.h:30
FstCompiler(std::istream &istrm, const std::string &source, const SymbolTable *isyms, const SymbolTable *osyms, const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool allow_negative_labels=false)
Definition: compile-impl.h:39
#define FSTERROR()
Definition: util.h:36
void SplitString(char *line, const char *delim, std::vector< char * > *vec, bool omit_empty_strings)
Definition: util.cc:25
StateId AddState() override
Definition: mutable-fst.h:302
void SetOutputSymbols(const SymbolTable *osyms) override
Definition: mutable-fst.h:382
DECLARE_string(fst_field_separator)
std::string Find(int64 key) const
Definition: symbol-table.h:464
void SetInputSymbols(const SymbolTable *isyms) override
Definition: mutable-fst.h:377
StateId NumStates() const override
Definition: expanded-fst.h:120
FstCompiler(std::istream &istrm, const std::string &source, SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool allow_negative_labels, bool add_symbols)
Definition: compile-impl.h:50
void Init(std::istream &istrm, const std::string &source, SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool allow_negative_labels, bool add_symbols)
Definition: compile-impl.h:58
constexpr uint64 kError
Definition: properties.h:36
const VectorFst< Arc > & Fst() const
Definition: compile-impl.h:134