41 #include <string_view> 53 return strm <<
"byte";
55 return strm <<
"utf8";
57 return strm <<
"symbol";
64 template <
class Label>
66 Label unknown_label,
bool allow_negative,
72 if (n ==
kNoSymbol || (!allow_negative && n < 0)) {
73 LOG(ERROR) <<
"ConvertSymbolToLabel: Symbol \"" << str
74 <<
"\" is not mapped to any integer label, symbol table = " 80 if (!maybe_n.has_value() || (!allow_negative && *maybe_n < 0)) {
81 LOG(ERROR) <<
"ConvertSymbolToLabel: Bad label integer " 82 <<
"= \"" << str <<
"\"";
91 template <
class Label>
94 Label unknown_label,
bool allow_negative, std::vector<Label> *labels,
95 const std::string &sep = FST_FLAGS_fst_field_separator) {
99 labels->reserve(str.size());
106 const std::string separator =
fst::StrCat(
"\n", sep);
107 for (std::string_view c :
114 labels->push_back(label);
125 template <
class Label>
129 std::stringstream ostrm;
130 sep.remove_prefix(sep.size() - 1);
131 std::string_view delim =
"";
132 for (
auto label : labels) {
133 if (omit_epsilon && !label)
continue;
135 const std::string &symbol = syms.
Find(label);
136 if (symbol.empty()) {
137 LOG(ERROR) <<
"LabelsToSymbolString: Label " << label
138 <<
" is not mapped onto any textual symbol in symbol table " 152 template <
class Label>
154 std::string_view sep,
bool omit_epsilon) {
155 std::stringstream ostrm;
156 sep.remove_prefix(sep.size() - 1);
157 std::string_view delim =
"";
158 for (
auto label : labels) {
159 if (omit_epsilon && !label)
continue;
181 bool allow_negative =
false)
182 : token_type_(token_type),
184 unknown_label_(unknown_label),
185 allow_negative_(allow_negative) {}
193 std::string_view str,
FST *
fst,
194 const std::string &sep = FST_FLAGS_fst_field_separator)
const {
195 std::vector<Label> labels;
197 unknown_label_, allow_negative_,
209 const std::string &sep = FST_FLAGS_fst_field_separator)
const {
210 std::vector<Label> labels;
212 unknown_label_, allow_negative_,
216 Compile(labels, fst, std::move(weight));
222 Weight weight = Weight::One())
const {
227 for (
auto label : labels) {
228 fst->
AddArc(state, Arc(label, label, state + 1));
231 fst->
SetFinal(state, std::move(weight));
235 template <
class Un
signed>
236 void Compile(
const std::vector<Label> &labels,
240 std::make_shared<Compactor>(labels.begin(), labels.end()));
243 template <
class Un
signed>
244 void Compile(
const std::vector<Label> &labels,
246 Weight weight = Weight::One())
const {
247 std::vector<std::pair<Label, Weight>> compacts;
248 compacts.reserve(labels.size() + 1);
249 for (
StateId i = 0; i < static_cast<StateId>(labels.size()) - 1; ++i) {
250 compacts.emplace_back(labels[i], Weight::One());
252 compacts.emplace_back(!labels.empty() ? labels.back() :
kNoLabel, weight);
256 std::make_shared<Compactor>(compacts.begin(), compacts.end()));
261 const Label unknown_label_;
262 const bool allow_negative_;
278 std::vector<typename Arc::Label> *labels) {
280 auto s = fst.
Start();
282 LOG(ERROR) <<
"StringFstToOutputLabels: Invalid start state";
285 while (fst.
Final(s) == Arc::Weight::Zero()) {
288 LOG(ERROR) <<
"StringFstToOutputLabels: Does not reach final state";
291 const auto &arc = aiter.
Value();
292 labels->push_back(arc.olabel);
296 LOG(ERROR) <<
"StringFstToOutputLabels: State " << s
297 <<
" has multiple outgoing arcs";
302 LOG(ERROR) <<
"StringFstToOutputLabels: Final state " << s
303 <<
" has outgoing arc(s)";
313 template <
class Label>
315 const std::vector<Label> &labels, std::string *str,
317 const std::string &sep = FST_FLAGS_fst_field_separator,
318 bool omit_epsilon =
true) {
344 bool omit_epsilon =
true)
345 : token_type_(token_type), syms_(syms), omit_epsilon_(omit_epsilon) {}
351 const std::string &sep = FST_FLAGS_fst_field_separator)
const {
352 std::vector<Label> labels;
354 LabelsToString(labels, str, token_type_, syms_, sep, omit_epsilon_);
360 const bool omit_epsilon_;
371 #endif // FST_STRING_H_ const std::string & Name() const
void Compile(std::istream &istrm, const std::string &source, const std::string &dest, const std::string &fst_type, const std::string &arc_type, const SymbolTable *isyms, const SymbolTable *osyms, const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep, bool allow_negative_labels)
bool StringFstToOutputLabels(const Fst< Arc > &fst, std::vector< typename Arc::Label > *labels)
bool LabelsToString(const std::vector< Label > &labels, std::string *str, TokenType ttype=TokenType::BYTE, const SymbolTable *syms=nullptr, const std::string &sep=FST_FLAGS_fst_field_separator, bool omit_epsilon=true)
bool operator()(std::string_view str, FST *fst, const std::string &sep=FST_FLAGS_fst_field_separator) const
virtual size_t NumArcs(StateId) const =0
bool ConvertSymbolToLabel(std::string_view str, const SymbolTable *syms, Label unknown_label, bool allow_negative, Label *output)
typename Arc::StateId StateId
constexpr uint64_t kCompiledStringProperties
virtual Weight Final(StateId) const =0
virtual void SetStart(StateId)=0
constexpr int64_t kNoSymbol
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
const Arc & Value() const
typename Arc::Weight Weight
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
typename Arc::Label Label
virtual void SetProperties(uint64_t props, uint64_t mask)=0
StringCompiler(TokenType token_type=TokenType::BYTE, const SymbolTable *syms=nullptr, Label unknown_label=kNoLabel, bool allow_negative=false)
DECLARE_string(fst_field_separator)
std::ostream & operator<<(std::ostream &strm, const ErrorWeight &)
bool UTF8StringToLabels(std::string_view str, std::vector< Label > *labels)
typename Arc::Label Label
bool ConvertStringToLabels(std::string_view str, TokenType token_type, const SymbolTable *syms, Label unknown_label, bool allow_negative, std::vector< Label > *labels, const std::string &sep=FST_FLAGS_fst_field_separator)
bool LabelsToSymbolString(const std::vector< Label > &labels, std::string *str, const SymbolTable &syms, std::string_view sep, bool omit_epsilon)
virtual StateId Start() const =0
bool operator()(std::string_view str, FST *fst, Weight weight, const std::string &sep=FST_FLAGS_fst_field_separator) const
bool LabelsToUTF8String(const std::vector< Label > &labels, std::string *str)
virtual void AddArc(StateId, const Arc &)=0
void SetCompactor(std::shared_ptr< Compactor > compactor)
std::string StrCat(const StringOrInt &s1, const StringOrInt &s2)
virtual StateId AddState()=0
bool LabelsToNumericString(const std::vector< Label > &labels, std::string *str, std::string_view sep, bool omit_epsilon)
virtual void SetFinal(StateId s, Weight weight=Weight::One())=0
virtual void DeleteStates(const std::vector< StateId > &)=0
bool ByteStringToLabels(std::string_view str, std::vector< Label > *labels)
std::string Find(int64_t key) const
bool LabelsToByteString(const std::vector< Label > &labels, std::string *str)
bool operator()(const Fst< Arc > &fst, std::string *str, const std::string &sep=FST_FLAGS_fst_field_separator) const
StringPrinter(TokenType token_type=TokenType::BYTE, const SymbolTable *syms=nullptr, bool omit_epsilon=true)
virtual void AddStates(size_t)=0