FST  openfst-1.8.3
OpenFst Library
compile-impl.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Class to to compile a binary FST from textual input.
19 
20 #ifndef FST_SCRIPT_COMPILE_IMPL_H_
21 #define FST_SCRIPT_COMPILE_IMPL_H_
22 
23 #include <cstddef>
24 #include <iostream>
25 #include <istream>
26 #include <memory>
27 #include <optional>
28 #include <sstream>
29 #include <string>
30 #include <vector>
31 
32 #include <fst/log.h>
33 #include <fst/fst.h>
34 #include <fst/properties.h>
35 #include <fst/symbol-table.h>
36 #include <fst/util.h>
37 #include <fst/vector-fst.h>
38 #include <unordered_map>
39 #include <string_view>
40 
41 DECLARE_string(fst_field_separator);
42 
43 namespace fst {
44 
45 // Compile a binary FST from textual input, helper class for fstcompile.cc.
46 // WARNING: Stand-alone use of this class not recommended, most code should
47 // read/write using the binary format which is much more efficient.
48 template <class Arc>
49 class FstCompiler {
50  public:
51  using Label = typename Arc::Label;
52  using StateId = typename Arc::StateId;
53  using Weight = typename Arc::Weight;
54 
55  // If add_symbols_ is true, then the symbols will be dynamically added to the
56  // symbol tables. This is only useful if you set the (i/o)keep flag to attach
57  // the final symbol table, or use the accessors. (The input symbol tables are
58  // const and therefore not changed.)
59  FstCompiler(std::istream &istrm, std::string_view source,
60  const SymbolTable *isyms, const SymbolTable *osyms,
61  const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep,
62  bool nkeep) {
63  std::unique_ptr<SymbolTable> misyms(isyms ? isyms->Copy() : nullptr);
64  std::unique_ptr<SymbolTable> mosyms(osyms ? osyms->Copy() : nullptr);
65  std::unique_ptr<SymbolTable> mssyms(ssyms ? ssyms->Copy() : nullptr);
66  Init(istrm, source, misyms.get(), mosyms.get(), mssyms.get(), accep, ikeep,
67  okeep, nkeep, false);
68  }
69 
70  FstCompiler(std::istream &istrm, std::string_view source, SymbolTable *isyms,
71  SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep,
72  bool okeep, bool nkeep, bool add_symbols) {
73  Init(istrm, source, isyms, osyms, ssyms, accep, ikeep, okeep, nkeep,
74  add_symbols);
75  }
76 
77  void Init(std::istream &istrm, std::string_view source, SymbolTable *isyms,
78  SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep,
79  bool okeep, bool nkeep, bool add_symbols) {
80  nline_ = 0;
81  source_ = std::string(source);
82  isyms_ = isyms;
83  osyms_ = osyms;
84  ssyms_ = ssyms;
85  nstates_ = 0;
86  keep_state_numbering_ = nkeep;
87  add_symbols_ = add_symbols;
88  bool start_state_populated = false;
89  char line[kLineLen];
90  const std::string separator =
91  FST_FLAGS_fst_field_separator + "\n";
92  while (istrm.getline(line, kLineLen)) {
93  ++nline_;
94  const std::vector<std::string_view> col =
95  StrSplit(line, ByAnyChar(separator), SkipEmpty());
96  if (col.empty() || col[0].empty()) continue;
97  if (col.size() > 5 || (col.size() > 4 && accep) ||
98  (col.size() == 3 && !accep)) {
99  FSTERROR() << "FstCompiler: Bad number of columns, source = " << source_
100  << ", line = " << nline_;
101  fst_.SetProperties(kError, kError);
102  return;
103  }
104  StateId s = StrToStateId(col[0]);
105  while (s >= fst_.NumStates()) fst_.AddState();
106  if (!start_state_populated) {
107  fst_.SetStart(s);
108  start_state_populated = true;
109  }
110  Arc arc;
111  StateId d = s;
112  switch (col.size()) {
113  case 1:
114  fst_.SetFinal(s, Weight::One());
115  break;
116  case 2:
117  fst_.SetFinal(s, StrToWeight(col[1], true));
118  break;
119  case 3:
120  arc.nextstate = d = StrToStateId(col[1]);
121  arc.ilabel = StrToILabel(col[2]);
122  arc.olabel = arc.ilabel;
123  arc.weight = Weight::One();
124  fst_.AddArc(s, arc);
125  break;
126  case 4:
127  arc.nextstate = d = StrToStateId(col[1]);
128  arc.ilabel = StrToILabel(col[2]);
129  if (accep) {
130  arc.olabel = arc.ilabel;
131  arc.weight = StrToWeight(col[3], true);
132  } else {
133  arc.olabel = StrToOLabel(col[3]);
134  arc.weight = Weight::One();
135  }
136  fst_.AddArc(s, arc);
137  break;
138  case 5:
139  arc.nextstate = d = StrToStateId(col[1]);
140  arc.ilabel = StrToILabel(col[2]);
141  arc.olabel = StrToOLabel(col[3]);
142  arc.weight = StrToWeight(col[4], true);
143  fst_.AddArc(s, arc);
144  }
145  while (d >= fst_.NumStates()) fst_.AddState();
146  }
147  if (ikeep) fst_.SetInputSymbols(isyms);
148  if (okeep) fst_.SetOutputSymbols(osyms);
149  }
150 
151  const VectorFst<Arc> &Fst() const { return fst_; }
152 
153  private:
154  // Maximum line length in text file.
155  static constexpr int kLineLen = 8096;
156 
157  StateId StrToId(std::string_view s, SymbolTable *syms,
158  std::string_view name) const {
159  StateId n = 0;
160  if (syms) {
161  // n = (add_symbols_) ? syms->AddSymbol(s) : syms->Find(s);
162  if (add_symbols_) {
163  n = syms->AddSymbol(s);
164  } else {
165  n = syms->Find(s);
166  }
167  if (n == kNoSymbol) {
168  FSTERROR() << "FstCompiler: Symbol \"" << s
169  << "\" is not mapped to any integer " << name
170  << ", symbol table = " << syms->Name()
171  << ", source = " << source_ << ", line = " << nline_;
172  fst_.SetProperties(kError, kError);
173  }
174  } else {
175  auto maybe_n = ParseInt64(s);
176  if (!maybe_n.has_value()) {
177  FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s
178  << "\", source = " << source_ << ", line = " << nline_;
179  fst_.SetProperties(kError, kError);
180  }
181  n = *maybe_n;
182  }
183  return n;
184  }
185 
186  StateId StrToStateId(std::string_view s) {
187  StateId n = StrToId(s, ssyms_, "state ID");
188  if (keep_state_numbering_) return n;
189  // Remaps state IDs to make dense set.
190  const auto it = states_.find(n);
191  if (it == states_.end()) {
192  states_[n] = nstates_;
193  return nstates_++;
194  } else {
195  return it->second;
196  }
197  }
198 
199  StateId StrToILabel(std::string_view s) const {
200  return StrToId(s, isyms_, "arc ilabel");
201  }
202 
203  StateId StrToOLabel(std::string_view s) const {
204  return StrToId(s, osyms_, "arc olabel");
205  }
206 
207  Weight StrToWeight(std::string_view s, bool allow_zero) const {
208  Weight w;
209  std::istringstream strm(std::string{s});
210  strm >> w;
211  if (!strm || (!allow_zero && w == Weight::Zero())) {
212  FSTERROR() << "FstCompiler: Bad weight = \"" << s
213  << "\", source = " << source_ << ", line = " << nline_;
214  fst_.SetProperties(kError, kError);
215  w = Weight::NoWeight();
216  }
217  return w;
218  }
219 
220  mutable VectorFst<Arc> fst_;
221  size_t nline_;
222  std::string source_; // Text FST source name.
223  SymbolTable *isyms_; // ilabel symbol table (not owned).
224  SymbolTable *osyms_; // olabel symbol table (not owned).
225  SymbolTable *ssyms_; // slabel symbol table (not owned).
226  std::unordered_map<StateId, StateId> states_; // State ID map.
227  StateId nstates_; // Number of seen states.
228  bool keep_state_numbering_;
229  bool add_symbols_; // Add to symbol tables on-the fly.
230 
231  FstCompiler(const FstCompiler &) = delete;
232  FstCompiler &operator=(const FstCompiler &) = delete;
233 };
234 
235 } // namespace fst
236 
237 #endif // FST_SCRIPT_COMPILE_IMPL_H_
const std::string & Name() const
Definition: symbol-table.h:466
int64_t AddSymbol(std::string_view symbol, int64_t key)
Definition: symbol-table.h:425
void AddArc(StateId s, const Arc &arc) override
Definition: mutable-fst.h:331
void SetStart(StateId s) override
Definition: mutable-fst.h:303
FstCompiler(std::istream &istrm, std::string_view source, const SymbolTable *isyms, const SymbolTable *osyms, const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep)
Definition: compile-impl.h:59
typename Arc::StateId StateId
Definition: compile-impl.h:52
void SetFinal(StateId s, Weight weight=Weight::One()) override
Definition: mutable-fst.h:308
void Init(std::istream &istrm, std::string_view source, SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool add_symbols)
Definition: compile-impl.h:77
virtual SymbolTable * Copy() const
Definition: symbol-table.h:411
constexpr uint64_t kError
Definition: properties.h:52
void SetProperties(uint64_t props, uint64_t mask) override
Definition: mutable-fst.h:313
typename Arc::Weight Weight
Definition: compile-impl.h:53
typename Arc::Label Label
Definition: compile-impl.h:51
FstCompiler(std::istream &istrm, std::string_view source, SymbolTable *isyms, SymbolTable *osyms, SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool add_symbols)
Definition: compile-impl.h:70
constexpr int64_t kNoSymbol
Definition: symbol-table.h:55
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:77
#define FSTERROR()
Definition: util.h:56
StateId AddState() override
Definition: mutable-fst.h:321
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
Definition: util.cc:46
void SetOutputSymbols(const SymbolTable *osyms) override
Definition: mutable-fst.h:401
DECLARE_string(fst_field_separator)
void SetInputSymbols(const SymbolTable *isyms) override
Definition: mutable-fst.h:396
StateId NumStates() const override
Definition: expanded-fst.h:144
std::string Find(int64_t key) const
Definition: symbol-table.h:450
const VectorFst< Arc > & Fst() const
Definition: compile-impl.h:151