18 #ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ 19 #define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ 48 using Label =
typename Arc::Label;
53 bool allow_negative_labels,
const SymbolTable *syms =
nullptr,
58 entry_type_(entry_type),
59 token_type_(token_type),
62 compiler_(token_type, syms, unknown_label, allow_negative_labels) {
66 bool Done() {
return done_; }
69 VLOG(1) <<
"Processing source " << source_ <<
" at line " << nline_;
75 std::getline(istrm_, content_);
80 while (std::getline(istrm_, line)) {
82 content_.append(line);
83 content_.append(
"\n");
86 if (!istrm_ && content_.empty())
93 fst->SetInputSymbols(symbols_);
94 fst->SetOutputSymbols(symbols_);
96 if (compiler_(content_, fst.get())) {
104 std::unique_ptr<CompactStringFst<Arc>>
fst;
113 if (compiler_(content_, fst.get())) {
114 return fst.release();
122 std::istream &istrm_;
129 std::string content_;
137 int KeySize(
const std::string &source);
145 TokenType token_type,
const std::string &symbols_source,
146 const std::string &unknown_symbol,
bool keep_symbols,
147 bool initial_symbols,
bool allow_negative_labels,
148 const std::string &key_prefix,
149 const std::string &key_suffix) {
151 if (fst_type.empty() || (fst_type ==
"vector")) {
153 }
else if (fst_type ==
"compact") {
156 FSTERROR() <<
"CompileStrings: Unknown FST type: " << fst_type;
159 std::unique_ptr<const SymbolTable> syms;
160 typename Arc::Label unknown_label =
kNoLabel;
161 if (!symbols_source.empty()) {
165 LOG(ERROR) <<
"CompileStrings: Error reading symbol table: " 169 if (!unknown_symbol.empty()) {
170 unknown_label = syms->Find(unknown_symbol);
172 FSTERROR() <<
"CompileStrings: Label \"" << unknown_label
173 <<
"\" missing from symbol table: " << symbols_source;
179 for (
const auto &in_source : sources) {
181 if (generate_keys == 0 && in_source.empty()) {
182 FSTERROR() <<
"CompileStrings: Read from a file instead of stdin or" 183 <<
" set the --generate_keys flag.";
186 const int key_size = generate_keys ? generate_keys
191 FSTERROR() <<
"CompileStrings: " << in_source <<
" is not seekable. " 192 <<
"Read from a file instead or set the --generate_keys flag.";
196 if (!in_source.empty()) {
197 fstrm.open(in_source);
199 FSTERROR() <<
"CompileStrings: Can't open file: " << in_source;
203 std::istream &istrm = fstrm.is_open() ? fstrm : std::cin;
204 bool keep_syms = keep_symbols;
206 istrm, in_source.empty() ?
"stdin" : in_source, entry_type,
207 token_type, allow_negative_labels, syms.get(), unknown_label);
208 !reader.
Done(); reader.Next()) {
210 std::unique_ptr<const Fst<Arc>>
fst;
212 fst.reset(reader.GetCompactFst(keep_syms));
214 fst.reset(reader.GetVectorFst(keep_syms));
216 if (initial_symbols) keep_syms =
false;
218 FSTERROR() <<
"CompileStrings: Compiling string number " << n
219 <<
" in file " << in_source
220 <<
" failed with token_type = " << token_type
221 <<
" and entry_type = " 228 std::ostringstream keybuf;
229 keybuf.width(key_size);
233 if (generate_keys > 0) {
237 fst::make_unique_for_overwrite<char[]>(in_source.size() + 1);
238 strcpy(source.get(), in_source.c_str());
239 key = basename(source.get());
245 writer.
Add(key_prefix + key + key_suffix, *fst);
247 if (generate_keys == 0) n = 0;
253 #endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
typename Arc::Label Label
VectorFst< Arc > * GetVectorFst(bool keep_symbols=false)
virtual void Add(std::string_view key, const Fst< Arc > &fst)=0
void SetOutputSymbols(const SymbolTable *osyms) override
static SymbolTable * ReadText(std::istream &strm, std::string_view name, const SymbolTableTextOptions &opts=SymbolTableTextOptions())
void SetInputSymbols(const SymbolTable *isyms) override
int KeySize(const std::string &source)
CompactStringFst< Arc > * GetCompactFst(bool keep_symbols=false)
typename Arc::Weight Weight
void CompileStrings(const std::vector< std::string > &sources, FarWriter< Arc > &writer, std::string_view fst_type, int32_t generate_keys, FarEntryType entry_type, TokenType token_type, const std::string &symbols_source, const std::string &unknown_symbol, bool keep_symbols, bool initial_symbols, bool allow_negative_labels, const std::string &key_prefix, const std::string &key_suffix)
StringReader(std::istream &istrm, const std::string &source, FarEntryType entry_type, TokenType token_type, bool allow_negative_labels, const SymbolTable *syms=nullptr, Label unknown_label=kNoStateId)