18 #ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ 19 #define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ 43 #include <string_view> 60 using Label =
typename Arc::Label;
70 entry_type_(entry_type),
71 token_type_(token_type),
74 compiler_(token_type, syms, unknown_label) {
78 bool Done() {
return done_; }
81 VLOG(1) <<
"Processing source " << source_ <<
" at line " << nline_;
87 std::getline(istrm_, content_);
92 while (std::getline(istrm_, line)) {
94 content_.append(line);
95 content_.append(
"\n");
98 if (!istrm_ && content_.empty())
105 fst->SetInputSymbols(symbols_);
106 fst->SetOutputSymbols(symbols_);
108 if (compiler_(content_, fst.get())) {
109 return fst.release();
116 std::unique_ptr<CompactStringFst<Arc>>
fst;
125 if (compiler_(content_, fst.get())) {
126 return fst.release();
134 std::istream &istrm_;
141 std::string content_;
149 int KeySize(std::string_view source);
157 TokenType token_type,
const std::string &symbols_source,
158 const std::string &unknown_symbol,
bool keep_symbols,
159 bool initial_symbols,
const std::string &key_prefix,
160 const std::string &key_suffix) {
162 if (fst_type.empty() || (fst_type ==
"vector")) {
164 }
else if (fst_type ==
"compact") {
167 FSTERROR() <<
"CompileStrings: Unknown FST type: " << fst_type;
170 std::unique_ptr<const SymbolTable> syms;
171 typename Arc::Label unknown_label =
kNoLabel;
172 if (!symbols_source.empty()) {
174 FST_FLAGS_fst_field_separator));
176 LOG(ERROR) <<
"CompileStrings: Error reading symbol table: " 180 if (!unknown_symbol.empty()) {
181 unknown_label = syms->Find(unknown_symbol);
183 FSTERROR() <<
"CompileStrings: Label \"" << unknown_label
184 <<
"\" missing from symbol table: " << symbols_source;
190 for (
const auto &in_source : sources) {
192 if (generate_keys == 0 && in_source.empty()) {
193 FSTERROR() <<
"CompileStrings: Read from a file instead of stdin or" 194 <<
" set the --generate_keys flag.";
197 const int key_size = generate_keys ? generate_keys
202 FSTERROR() <<
"CompileStrings: " << in_source <<
" is not seekable. " 203 <<
"Read from a file instead or set the --generate_keys flag.";
207 if (!in_source.empty()) {
208 fstrm.open(in_source);
210 FSTERROR() <<
"CompileStrings: Can't open file: " << in_source;
214 std::istream &istrm = fstrm.is_open() ? fstrm : std::cin;
215 bool keep_syms = keep_symbols;
217 istrm, in_source.empty() ?
"stdin" : in_source, entry_type,
218 token_type, syms.get(), unknown_label);
219 !reader.
Done(); reader.Next()) {
221 std::unique_ptr<const Fst<Arc>>
fst;
223 fst.reset(reader.GetCompactFst(keep_syms));
225 fst.reset(reader.GetVectorFst(keep_syms));
227 if (initial_symbols) keep_syms =
false;
229 FSTERROR() <<
"CompileStrings: Compiling string number " << n
230 <<
" in file " << in_source
231 <<
" failed with token_type = " << token_type
232 <<
" and entry_type = " 239 std::ostringstream keybuf;
240 keybuf.width(key_size);
244 if (generate_keys > 0) {
248 fst::make_unique_for_overwrite<char[]>(in_source.size() + 1);
249 strcpy(source.get(), in_source.c_str());
250 key = basename(source.get());
256 writer.
Add(key_prefix + key + key_suffix, *fst);
258 if (generate_keys == 0) n = 0;
264 #endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_
void SetInputSymbols(const SymbolTable *isyms) override
typename Arc::Label Label
VectorFst< Arc > * GetVectorFst(bool keep_symbols=false)
virtual void Add(std::string_view key, const Fst< Arc > &fst)=0
int KeySize(std::string_view source)
StringReader(std::istream &istrm, const std::string &source, FarEntryType entry_type, TokenType token_type, const SymbolTable *syms=nullptr, Label unknown_label=kNoStateId)
void SetOutputSymbols(const SymbolTable *osyms) override
CompactStringFst< Arc > * GetCompactFst(bool keep_symbols=false)
void CompileStrings(const std::vector< std::string > &sources, FarWriter< Arc > &writer, std::string_view fst_type, int32_t generate_keys, FarEntryType entry_type, TokenType token_type, const std::string &symbols_source, const std::string &unknown_symbol, bool keep_symbols, bool initial_symbols, const std::string &key_prefix, const std::string &key_suffix)
static SymbolTable * ReadText(std::istream &strm, std::string_view name, std::string_view sep=FST_FLAGS_fst_field_separator)
typename Arc::Weight Weight