FST  openfst-1.8.2.post1
OpenFst Library
linearscript.cc
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 
18 #include <fst/compat.h>
20 
21 #include <cctype>
22 #include <cstdio>
23 #include <set>
24 #include <string>
25 
26 #include <fst/flags.h>
27 #include <fst/arc.h>
28 #include <fstream>
29 #include <fst/script/script-impl.h>
30 
31 DEFINE_string(delimiter, "|",
32  "Single non-white-space character delimiter inside sequences of "
33  "feature symbols and output symbols");
34 DEFINE_string(empty_symbol, "<empty>",
35  "Special symbol that designates an empty sequence");
36 
37 DEFINE_string(start_symbol, "<s>", "Start of sentence symbol");
38 DEFINE_string(end_symbol, "</s>", "End of sentence symbol");
39 
40 DEFINE_bool(classifier, false,
41  "Treat input model as a classifier instead of a tagger");
42 
43 namespace fst {
44 namespace script {
45 
47  return FST_FLAGS_delimiter.size() == 1 &&
48  !std::isspace(FST_FLAGS_delimiter[0]);
49 }
50 
52  bool okay = !FST_FLAGS_empty_symbol.empty();
53  for (size_t i = 0; i < FST_FLAGS_empty_symbol.size(); ++i) {
54  char c = FST_FLAGS_empty_symbol[i];
55  if (std::isspace(c)) okay = false;
56  }
57  return okay;
58 }
59 
60 void LinearCompile(const std::string &arc_type,
61  const std::string &epsilon_symbol,
62  const std::string &unknown_symbol, const std::string &vocab,
63  char **models, int models_len, const std::string &out,
64  const std::string &save_isymbols,
65  const std::string &save_fsymbols,
66  const std::string &save_osymbols) {
67  LinearCompileArgs args(epsilon_symbol, unknown_symbol, vocab, models,
68  models_len, out, save_isymbols, save_fsymbols,
69  save_osymbols);
70  Apply<Operation<LinearCompileArgs>>("LinearCompileTpl", arc_type, &args);
71 }
72 
74 
75 void SplitByWhitespace(const std::string &str, std::vector<std::string> *out) {
76  out->clear();
77  std::istringstream strm(str);
78  std::string buf;
79  while (strm >> buf) out->push_back(buf);
80 }
81 
82 int ScanNumClasses(char **models, int models_len) {
83  std::set<std::string> preds;
84  for (int i = 0; i < models_len; ++i) {
85  std::ifstream in(models[i]);
86  if (!in) LOG(FATAL) << "Failed to open " << models[i];
87  std::string line;
88  std::getline(in, line);
89  size_t num_line = 1;
90  while (std::getline(in, line)) {
91  ++num_line;
92  std::vector<std::string> fields;
93  SplitByWhitespace(line, &fields);
94  if (fields.size() != 3)
95  LOG(FATAL) << "Wrong number of fields in source " << models[i]
96  << ", line " << num_line;
97  preds.insert(fields[1]);
98  }
99  }
100  return preds.size();
101 }
102 
103 } // namespace script
104 } // namespace fst
void SplitByWhitespace(const std::string &str, std::vector< std::string > *out)
Definition: linearscript.cc:75
REGISTER_FST_OPERATION_3ARCS(Compress, CompressArgs)
std::tuple< const std::string &, const std::string &, const std::string &, char **, int, const std::string &, const std::string &, const std::string &, const std::string & > LinearCompileArgs
Definition: linearscript.h:46
#define LOG(type)
Definition: log.h:49
DEFINE_string(delimiter,"|","Single non-white-space character delimiter inside sequences of ""feature symbols and output symbols")
bool ValidateEmptySymbol()
Definition: linearscript.cc:51
int ScanNumClasses(char **models, int models_len)
Definition: linearscript.cc:82
void LinearCompile(const std::string &arc_type, const std::string &epsilon_symbol, const std::string &unknown_symbol, const std::string &vocab, char **models, int models_len, const std::string &out, const std::string &save_isymbols, const std::string &save_fsymbols, const std::string &save_osymbols)
Definition: linearscript.cc:60
DEFINE_bool(classifier, false,"Treat input model as a classifier instead of a tagger")
bool ValidateDelimiter()
Definition: linearscript.cc:46
void LinearCompileTpl(LinearCompileArgs *args)
Definition: linearscript.h:296