FST  openfst-1.8.3
OpenFst Library
farcompilestrings-main.cc
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Compiles a set of stings as FSTs and stores them in a finite-state archive.
19 
20 #include <cstring>
21 #include <istream>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include <fst/log.h>
28 #include <fst/extensions/far/far.h>
31 #include <fstream>
32 #include <fst/string.h>
33 #include <fst/util.h>
34 #include <fst/script/arg-packs.h>
35 #include <fst/script/getters.h>
36 
37 DECLARE_string(key_prefix);
38 DECLARE_string(key_suffix);
39 DECLARE_int32(generate_keys);
40 DECLARE_string(far_type);
41 DECLARE_string(arc_type);
42 DECLARE_string(entry_type);
43 DECLARE_string(fst_type);
44 DECLARE_string(token_type);
45 DECLARE_string(symbols);
46 DECLARE_string(unknown_symbol);
47 DECLARE_bool(file_list_input);
48 DECLARE_bool(keep_symbols);
49 DECLARE_bool(initial_symbols);
50 
51 int farcompilestrings_main(int argc, char **argv) {
52  namespace s = fst::script;
54 
55  std::string usage = "Compiles a set of strings as FSTs and stores them in";
56  usage += " an FST archive.\n\n Usage: ";
57  usage += argv[0];
58  usage += " [in1.txt [[in2.txt ...] out.far]]\n";
59 
60  SET_FLAGS(usage.c_str(), &argc, &argv, true);
61  s::ExpandArgs(argc, argv, &argc, &argv);
62 
63  std::vector<std::string> sources;
64  if (FST_FLAGS_file_list_input) {
65  for (int i = 1; i < argc - 1; ++i) {
66  std::ifstream istrm(argv[i]);
67  std::string str;
68  while (std::getline(istrm, str)) sources.push_back(str);
69  }
70  } else {
71  for (int i = 1; i < argc - 1; ++i)
72  sources.push_back(strcmp(argv[i], "-") != 0 ? argv[i] : "");
73  if (sources.empty()) {
74  // argc == 1 || argc == 2. This cleverly handles both the no-file case
75  // and the one (input) file case together.
76  sources.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : "");
77  }
78  }
79 
80  // argc <= 2 means the file (if any) is an input file, so write to stdout.
81  const std::string out_far =
82  argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : "";
83 
84  fst::FarEntryType entry_type;
85  if (!s::GetFarEntryType(FST_FLAGS_entry_type, &entry_type)) {
86  LOG(ERROR) << "Unknown or unsupported FAR entry type: "
87  << FST_FLAGS_entry_type;
88  return 1;
89  }
90 
91  fst::TokenType token_type;
92  if (!s::GetTokenType(FST_FLAGS_token_type, &token_type)) {
93  LOG(ERROR) << "Unknown or unsupported FAR token type: "
94  << FST_FLAGS_token_type;
95  return 1;
96  }
97 
98  fst::FarType far_type;
99  if (!s::GetFarType(FST_FLAGS_far_type, &far_type)) {
100  LOG(ERROR) << "Unknown or unsupported FAR type: "
101  << FST_FLAGS_far_type;
102  return 1;
103  }
104 
105  // Empty fst_type means vector for farcompilestrings, but "input FST type"
106  // for farconvert.
107  const std::string fst_type = FST_FLAGS_fst_type.empty()
108  ? "vector"
109  : FST_FLAGS_fst_type;
110 
111  const auto arc_type = FST_FLAGS_arc_type;
112  if (arc_type.empty()) return 1;
113 
114  std::unique_ptr<FarWriterClass> writer(
115  FarWriterClass::Create(out_far, arc_type, far_type));
116  if (!writer) return 1;
117 
119  sources, *writer, fst_type, FST_FLAGS_generate_keys,
120  entry_type, token_type, FST_FLAGS_symbols,
121  FST_FLAGS_unknown_symbol, FST_FLAGS_keep_symbols,
122  FST_FLAGS_initial_symbols,
123  FST_FLAGS_key_prefix, FST_FLAGS_key_suffix);
124 
125  if (writer->Error()) {
126  FSTERROR() << "Error writing FAR: " << out_far;
127  return 1;
128  }
129 
130  return 0;
131 }
bool GetTokenType(std::string_view str, TokenType *token_type)
Definition: getters.cc:234
int farcompilestrings_main(int argc, char **argv)
DECLARE_int32(generate_keys)
void ExpandArgs(int argc, char **argv, int *argcp, char ***argvp)
Definition: getters.cc:60
#define LOG(type)
Definition: log.h:53
bool GetFarEntryType(std::string_view str, FarEntryType *entry_type)
Definition: getters.cc:49
#define SET_FLAGS(usage, argc, argv, rmflags)
Definition: flags.h:226
#define FSTERROR()
Definition: util.h:56
FarType
Definition: far.h:51
bool GetFarType(std::string_view str, FarType *far_type)
Definition: getters.cc:34
TokenType
Definition: string.h:49
DECLARE_bool(file_list_input)
void Create(const std::vector< std::string > &sources, FarWriterClass &writer, const int32_t generate_keys, const std::string &key_prefix, const std::string &key_suffix)
Definition: farscript.cc:79
DECLARE_string(key_prefix)
FarEntryType
Definition: far.h:49
void CompileStrings(const std::vector< std::string > &sources, FarWriterClass &writer, std::string_view fst_type, int32_t generate_keys, FarEntryType fet, TokenType tt, const std::string &symbols_source, const std::string &unknown_symbol, bool keep_symbols, bool initial_symbols, const std::string &key_prefix, const std::string &key_suffix)
Definition: farscript.cc:46