43 #include <string_view> 55 return strm <<
"byte";
57 return strm <<
"utf8";
59 return strm <<
"symbol";
66 template <
class Label>
68 Label unknown_label, Label *output) {
74 LOG(ERROR) <<
"ConvertSymbolToLabel: Symbol \"" << str
75 <<
"\" is not mapped to any integer label, symbol table = " 81 if (!maybe_n.has_value()) {
82 LOG(ERROR) <<
"ConvertSymbolToLabel: Bad label integer " 83 <<
"= \"" << str <<
"\"";
92 template <
class Label>
95 Label unknown_label, std::vector<Label> *labels,
96 std::string_view sep = FST_FLAGS_fst_field_separator) {
100 labels->reserve(str.size());
107 const std::string separator =
fst::StrCat(
"\n", sep);
108 for (std::string_view c :
112 labels->push_back(label);
123 template <
class Label>
127 std::stringstream ostrm;
128 sep.remove_prefix(sep.size() - 1);
129 std::string_view delim =
"";
130 for (
auto label : labels) {
131 if (omit_epsilon && !label)
continue;
133 const std::string &symbol = syms.
Find(label);
134 if (symbol.empty()) {
135 LOG(ERROR) <<
"LabelsToSymbolString: Label " << label
136 <<
" is not mapped onto any textual symbol in symbol table " 150 template <
class Label>
152 std::string_view sep,
bool omit_epsilon) {
153 std::stringstream ostrm;
154 sep.remove_prefix(sep.size() - 1);
155 std::string_view delim =
"";
156 for (
auto label : labels) {
157 if (omit_epsilon && !label)
continue;
172 using Label =
typename Arc::Label;
173 using StateId =
typename Arc::StateId;
174 using Weight =
typename Arc::Weight;
179 : token_type_(token_type), syms_(syms), unknown_label_(unknown_label) {}
187 std::string_view str,
FST *
fst,
188 std::string_view sep = FST_FLAGS_fst_field_separator)
const {
189 std::vector<Label> labels;
191 unknown_label_, &labels, sep)) {
201 std::string_view str,
FST *fst, Weight weight,
202 std::string_view sep = FST_FLAGS_fst_field_separator)
const {
203 std::vector<Label> labels;
205 unknown_label_, &labels, sep)) {
208 Compile(labels, fst, std::move(weight));
214 Weight weight = Weight::One())
const {
219 for (
auto label : labels) {
220 fst->
AddArc(state, Arc(label, label, state + 1));
223 fst->
SetFinal(state, std::move(weight));
227 template <
class Un
signed>
228 void Compile(
const std::vector<Label> &labels,
232 std::make_shared<Compactor>(labels.begin(), labels.end()));
235 template <
class Un
signed>
236 void Compile(
const std::vector<Label> &labels,
238 Weight weight = Weight::One())
const {
239 std::vector<std::pair<Label, Weight>> compacts;
240 compacts.reserve(labels.size() + 1);
241 for (StateId i = 0; i < static_cast<StateId>(labels.size()) - 1; ++i) {
242 compacts.emplace_back(labels[i], Weight::One());
244 compacts.emplace_back(!labels.empty() ? labels.back() :
kNoLabel, weight);
248 std::make_shared<Compactor>(compacts.begin(), compacts.end()));
253 const Label unknown_label_;
255 StringCompiler(
const StringCompiler &) =
delete;
256 StringCompiler &operator=(
const StringCompiler &) =
delete;
269 std::vector<typename Arc::Label> *labels) {
271 auto s = fst.
Start();
273 LOG(ERROR) <<
"StringFstToOutputLabels: Invalid start state";
276 while (fst.
Final(s) == Arc::Weight::Zero()) {
279 LOG(ERROR) <<
"StringFstToOutputLabels: Does not reach final state";
282 const auto &arc = aiter.
Value();
283 labels->push_back(arc.olabel);
287 LOG(ERROR) <<
"StringFstToOutputLabels: State " << s
288 <<
" has multiple outgoing arcs";
293 LOG(ERROR) <<
"StringFstToOutputLabels: Final state " << s
294 <<
" has outgoing arc(s)";
304 std::vector<typename Arc::Label> *labels,
305 typename Arc::Weight *weight) {
307 auto path_weight = Arc::Weight::One();
308 auto s = fst.
Start();
310 LOG(ERROR) <<
"StringFstToOutputLabels: Invalid start state";
313 auto final_weight = fst.
Final(s);
314 while (final_weight == Arc::Weight::Zero()) {
317 LOG(ERROR) <<
"StringFstToOutputLabels: Does not reach final state";
320 const auto &arc = aiter.
Value();
321 labels->push_back(arc.olabel);
322 path_weight =
Times(path_weight, arc.weight);
326 LOG(ERROR) <<
"StringFstToOutputLabels: State " << s
327 <<
" has multiple outgoing arcs";
330 final_weight = fst.
Final(s);
333 LOG(ERROR) <<
"StringFstToOutputLabels: Final state " << s
334 <<
" has outgoing arc(s)";
337 *weight =
Times(path_weight, final_weight);
345 template <
class Label>
347 const std::vector<Label> &labels, std::string *str,
349 std::string_view sep = FST_FLAGS_fst_field_separator,
350 bool omit_epsilon =
true) {
377 bool omit_epsilon =
true)
378 : token_type_(token_type), syms_(syms), omit_epsilon_(omit_epsilon) {}
384 std::string_view sep = FST_FLAGS_fst_field_separator)
const {
385 std::vector<Label> labels;
387 LabelsToString(labels, str, token_type_, syms_, sep, omit_epsilon_);
394 std::string_view sep = FST_FLAGS_fst_field_separator)
const {
395 std::vector<Label> labels;
397 LabelsToString(labels, str, token_type_, syms_, sep, omit_epsilon_);
403 const bool omit_epsilon_;
414 #endif // FST_STRING_H_ const std::string & Name() const
bool StringFstToOutputLabels(const Fst< Arc > &fst, std::vector< typename Arc::Label > *labels)
class OPENFST_DEPRECATED("allow_negative is no-op") StringCompiler
StringCompiler< StdArc > StdStringCompiler
virtual size_t NumArcs(StateId) const =0
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
constexpr uint64_t kCompiledStringProperties
virtual Weight Final(StateId) const =0
virtual void SetStart(StateId)=0
typename Arc::Weight Weight
constexpr int64_t kNoSymbol
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
const Arc & Value() const
void Compile(std::istream &istrm, const std::string &source, const std::string &dest, const std::string &fst_type, const std::string &arc_type, const SymbolTable *isyms, const SymbolTable *osyms, const SymbolTable *ssyms, bool accep, bool ikeep, bool okeep, bool nkeep)
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
bool operator()(const Fst< Arc > &fst, std::string *str, std::string_view sep=FST_FLAGS_fst_field_separator) const
virtual void SetProperties(uint64_t props, uint64_t mask)=0
DECLARE_string(fst_field_separator)
See www.openfst.org for extensive documentation on this weighted.
std::ostream & operator<<(std::ostream &strm, const ErrorWeight &)
bool UTF8StringToLabels(std::string_view str, std::vector< Label > *labels)
typename Arc::Label Label
bool LabelsToSymbolString(const std::vector< Label > &labels, std::string *str, const SymbolTable &syms, std::string_view sep, bool omit_epsilon)
virtual StateId Start() const =0
bool ConvertStringToLabels(std::string_view str, TokenType token_type, const SymbolTable *syms, Label unknown_label, std::vector< Label > *labels, std::string_view sep=FST_FLAGS_fst_field_separator)
bool ConvertSymbolToLabel(std::string_view str, const SymbolTable *syms, Label unknown_label, Label *output)
bool LabelsToUTF8String(const std::vector< Label > &labels, std::string *str)
virtual void AddArc(StateId, const Arc &)=0
void SetCompactor(std::shared_ptr< Compactor > compactor)
bool operator()(const Fst< Arc > &fst, std::string *str, Weight *weight, std::string_view sep=FST_FLAGS_fst_field_separator) const
std::string StrCat(const StringOrInt &s1, const StringOrInt &s2)
bool LabelsToString(const std::vector< Label > &labels, std::string *str, TokenType ttype=TokenType::BYTE, const SymbolTable *syms=nullptr, std::string_view sep=FST_FLAGS_fst_field_separator, bool omit_epsilon=true)
virtual StateId AddState()=0
bool LabelsToNumericString(const std::vector< Label > &labels, std::string *str, std::string_view sep, bool omit_epsilon)
virtual void SetFinal(StateId s, Weight weight=Weight::One())=0
virtual void DeleteStates(const std::vector< StateId > &)=0
bool ByteStringToLabels(std::string_view str, std::vector< Label > *labels)
std::string Find(int64_t key) const
bool LabelsToByteString(const std::vector< Label > &labels, std::string *str)
StringPrinter(TokenType token_type=TokenType::BYTE, const SymbolTable *syms=nullptr, bool omit_epsilon=true)
virtual void AddStates(size_t)=0