FST  openfst-1.7.2
OpenFst Library
extract.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Extracts component FSTs from an finite-state archive.
5 
6 #ifndef FST_EXTENSIONS_FAR_EXTRACT_H_
7 #define FST_EXTENSIONS_FAR_EXTRACT_H_
8 
9 #include <memory>
10 #include <string>
11 #include <vector>
12 
13 #include <fst/extensions/far/far.h>
14 #include <fst/util.h>
15 
16 namespace fst {
17 
18 template <class Arc>
19 inline void FarWriteFst(const Fst<Arc> *fst, string key, string *okey,
20  int *nrep, int32 generate_filenames, int i,
21  const string &filename_prefix,
22  const string &filename_suffix) {
23  if (key == *okey) {
24  ++*nrep;
25  } else {
26  *nrep = 0;
27  }
28  *okey = key;
29  string ofilename;
30  if (generate_filenames) {
31  std::ostringstream tmp;
32  tmp.width(generate_filenames);
33  tmp.fill('0');
34  tmp << i;
35  ofilename = tmp.str();
36  } else {
37  if (*nrep > 0) {
38  std::ostringstream tmp;
39  tmp << '.' << nrep;
40  key.append(tmp.str().data(), tmp.str().size());
41  }
42  ofilename = key;
43  }
44  fst->Write(filename_prefix + ofilename + filename_suffix);
45 }
46 
47 template <class Arc>
48 void FarExtract(const std::vector<string> &ifilenames, int32 generate_filenames,
49  const string &keys, const string &key_separator,
50  const string &range_delimiter, const string &filename_prefix,
51  const string &filename_suffix) {
52  std::unique_ptr<FarReader<Arc>> far_reader(
53  FarReader<Arc>::Open(ifilenames));
54  if (!far_reader) return;
55  string okey;
56  int nrep = 0;
57  std::vector<char *> key_vector;
58  // User has specified a set of FSTs to extract, where some of these may in
59  // fact be ranges.
60  if (!keys.empty()) {
61  auto *keys_cstr = new char[keys.size() + 1];
62  strcpy(keys_cstr, keys.c_str());
63  SplitString(keys_cstr, key_separator.c_str(), &key_vector, true);
64  int i = 0;
65  for (size_t k = 0; k < key_vector.size(); ++k, ++i) {
66  string key = key_vector[k];
67  auto *key_cstr = new char[key.size() + 1];
68  strcpy(key_cstr, key.c_str());
69  std::vector<char *> range_vector;
70  SplitString(key_cstr, range_delimiter.c_str(), &range_vector, false);
71  if (range_vector.size() == 1) { // Not a range
72  if (!far_reader->Find(key)) {
73  LOG(ERROR) << "FarExtract: Cannot find key " << key;
74  return;
75  }
76  const auto *fst = far_reader->GetFst();
77  FarWriteFst(fst, key, &okey, &nrep, generate_filenames, i,
78  filename_prefix, filename_suffix);
79  } else if (range_vector.size() == 2) { // A legal range
80  string begin_key = range_vector[0];
81  string end_key = range_vector[1];
82  if (begin_key.empty() || end_key.empty()) {
83  LOG(ERROR) << "FarExtract: Illegal range specification " << key;
84  return;
85  }
86  if (!far_reader->Find(begin_key)) {
87  LOG(ERROR) << "FarExtract: Cannot find key " << begin_key;
88  return;
89  }
90  for (; !far_reader->Done(); far_reader->Next(), ++i) {
91  const auto &ikey = far_reader->GetKey();
92  if (end_key < ikey) break;
93  const auto *fst = far_reader->GetFst();
94  FarWriteFst(fst, ikey, &okey, &nrep, generate_filenames, i,
95  filename_prefix, filename_suffix);
96  }
97  } else {
98  LOG(ERROR) << "FarExtract: Illegal range specification " << key;
99  return;
100  }
101  delete[] key_cstr;
102  }
103  delete[] keys_cstr;
104  return;
105  }
106  // Nothing specified, so just extracts everything.
107  for (size_t i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
108  const auto &key = far_reader->GetKey();
109  const auto *fst = far_reader->GetFst();
110  FarWriteFst(fst, key, &okey, &nrep, generate_filenames, i, filename_prefix,
111  filename_suffix);
112  }
113  return;
114 }
115 
116 } // namespace fst
117 
118 #endif // FST_EXTENSIONS_FAR_EXTRACT_H_
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:270
#define LOG(type)
Definition: log.h:48
void SplitString(char *line, const char *delim, std::vector< char * > *vec, bool omit_empty_strings)
Definition: util.cc:22
void FarExtract(const std::vector< string > &ifilenames, int32 generate_filenames, const string &keys, const string &key_separator, const string &range_delimiter, const string &filename_prefix, const string &filename_suffix)
Definition: extract.h:48
int32_t int32
Definition: types.h:26
void FarWriteFst(const Fst< Arc > *fst, string key, string *okey, int *nrep, int32 generate_filenames, int i, const string &filename_prefix, const string &filename_suffix)
Definition: extract.h:19