FST  openfst-1.7.2
OpenFst Library
linearscript.cc
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 
4 #include <cctype>
5 #include <cstdio>
6 #include <set>
7 
8 #include <fst/compat.h>
9 #include <fst/flags.h>
11 #include <fst/arc.h>
12 #include <fstream>
13 #include <fst/script/script-impl.h>
14 
15 DEFINE_string(delimiter, "|",
16  "Single non-white-space character delimiter inside sequences of "
17  "feature symbols and output symbols");
18 DEFINE_string(empty_symbol, "<empty>",
19  "Special symbol that designates an empty sequence");
20 
21 DEFINE_string(start_symbol, "<s>", "Start of sentence symbol");
22 DEFINE_string(end_symbol, "</s>", "End of sentence symbol");
23 
24 DEFINE_bool(classifier, false,
25  "Treat input model as a classifier instead of a tagger");
26 
27 namespace fst {
28 namespace script {
29 
31  if (FLAGS_delimiter.size() == 1 && !std::isspace(FLAGS_delimiter[0]))
32  return true;
33  return false;
34 }
35 
37  bool okay = !FLAGS_empty_symbol.empty();
38  for (size_t i = 0; i < FLAGS_empty_symbol.size(); ++i) {
39  char c = FLAGS_empty_symbol[i];
40  if (std::isspace(c)) okay = false;
41  }
42  return okay;
43 }
44 
45 void LinearCompile(const string &arc_type, const string &epsilon_symbol,
46  const string &unknown_symbol, const string &vocab,
47  char **models, int models_len, const string &out,
48  const string &save_isymbols, const string &save_fsymbols,
49  const string &save_osymbols) {
50  LinearCompileArgs args(epsilon_symbol, unknown_symbol, vocab, models,
51  models_len, out, save_isymbols, save_fsymbols,
52  save_osymbols);
53  Apply<Operation<LinearCompileArgs>>("LinearCompileTpl", arc_type, &args);
54 }
55 
56 // Instantiate templates for common arc types
59 
60 void SplitByWhitespace(const string &str, std::vector<string> *out) {
61  out->clear();
62  std::istringstream strm(str);
63  string buf;
64  while (strm >> buf) out->push_back(buf);
65 }
66 
67 int ScanNumClasses(char **models, int models_len) {
68  std::set<string> preds;
69  for (int i = 0; i < models_len; ++i) {
70  std::ifstream in(models[i]);
71  if (!in) LOG(FATAL) << "Failed to open " << models[i];
72 
73  string line;
74  std::getline(in, line);
75 
76  size_t num_line = 1;
77  while (std::getline(in, line)) {
78  ++num_line;
79  std::vector<string> fields;
80  SplitByWhitespace(line, &fields);
81  if (fields.size() != 3)
82  LOG(FATAL) << "Wrong number of fields in source " << models[i]
83  << ", line " << num_line;
84  preds.insert(fields[1]);
85  }
86  }
87  return preds.size();
88 }
89 
90 } // namespace script
91 } // namespace fst
#define LOG(type)
Definition: log.h:48
DEFINE_string(delimiter,"|","Single non-white-space character delimiter inside sequences of ""feature symbols and output symbols")
bool ValidateEmptySymbol()
Definition: linearscript.cc:36
REGISTER_FST_LINEAR_OPERATIONS(StdArc)
std::tuple< const string &, const string &, const string &, char **, int, const string &, const string &, const string &, const string & > LinearCompileArgs
Definition: linearscript.h:31
int ScanNumClasses(char **models, int models_len)
Definition: linearscript.cc:67
void LinearCompile(const string &arc_type, const string &epsilon_symbol, const string &unknown_symbol, const string &vocab, char **models, int models_len, const string &out, const string &save_isymbols, const string &save_fsymbols, const string &save_osymbols)
Definition: linearscript.cc:45
DEFINE_bool(classifier, false,"Treat input model as a classifier instead of a tagger")
bool ValidateDelimiter()
Definition: linearscript.cc:30
void SplitByWhitespace(const string &str, std::vector< string > *out)
Definition: linearscript.cc:60