FST  openfst-1.7.3
OpenFst Library
extract.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Extracts component FSTs from an finite-state archive.
5 
6 #ifndef FST_EXTENSIONS_FAR_EXTRACT_H_
7 #define FST_EXTENSIONS_FAR_EXTRACT_H_
8 
9 #include <memory>
10 #include <string>
11 #include <vector>
12 
13 #include <fst/extensions/far/far.h>
14 #include <fst/util.h>
15 
16 namespace fst {
17 
18 template <class Arc>
19 inline void FarWriteFst(const Fst<Arc> *fst, std::string key, std::string *okey,
20  int *nrep, int32 generate_filenames, int i,
21  const std::string &filename_prefix,
22  const std::string &filename_suffix) {
23  if (key == *okey) {
24  ++*nrep;
25  } else {
26  *nrep = 0;
27  }
28  *okey = key;
29  std::string ofilename;
30  if (generate_filenames) {
31  std::ostringstream tmp;
32  tmp.width(generate_filenames);
33  tmp.fill('0');
34  tmp << i;
35  ofilename = tmp.str();
36  } else {
37  if (*nrep > 0) {
38  std::ostringstream tmp;
39  tmp << '.' << nrep;
40  key.append(tmp.str().data(), tmp.str().size());
41  }
42  ofilename = key;
43  }
44  fst->Write(filename_prefix + ofilename + filename_suffix);
45 }
46 
47 template <class Arc>
48 void FarExtract(const std::vector<std::string> &ifilenames,
49  int32 generate_filenames, const std::string &keys,
50  const std::string &key_separator,
51  const std::string &range_delimiter,
52  const std::string &filename_prefix,
53  const std::string &filename_suffix) {
54  std::unique_ptr<FarReader<Arc>> far_reader(
55  FarReader<Arc>::Open(ifilenames));
56  if (!far_reader) return;
57  std::string okey;
58  int nrep = 0;
59  std::vector<char *> key_vector;
60  // User has specified a set of FSTs to extract, where some of these may in
61  // fact be ranges.
62  if (!keys.empty()) {
63  auto *keys_cstr = new char[keys.size() + 1];
64  strcpy(keys_cstr, keys.c_str());
65  SplitString(keys_cstr, key_separator.c_str(), &key_vector, true);
66  int i = 0;
67  for (size_t k = 0; k < key_vector.size(); ++k, ++i) {
68  std::string key = key_vector[k];
69  auto *key_cstr = new char[key.size() + 1];
70  strcpy(key_cstr, key.c_str());
71  std::vector<char *> range_vector;
72  SplitString(key_cstr, range_delimiter.c_str(), &range_vector, false);
73  if (range_vector.size() == 1) { // Not a range
74  if (!far_reader->Find(key)) {
75  LOG(ERROR) << "FarExtract: Cannot find key " << key;
76  return;
77  }
78  const auto *fst = far_reader->GetFst();
79  FarWriteFst(fst, key, &okey, &nrep, generate_filenames, i,
80  filename_prefix, filename_suffix);
81  } else if (range_vector.size() == 2) { // A legal range
82  std::string begin_key = range_vector[0];
83  std::string end_key = range_vector[1];
84  if (begin_key.empty() || end_key.empty()) {
85  LOG(ERROR) << "FarExtract: Illegal range specification " << key;
86  return;
87  }
88  if (!far_reader->Find(begin_key)) {
89  LOG(ERROR) << "FarExtract: Cannot find key " << begin_key;
90  return;
91  }
92  for (; !far_reader->Done(); far_reader->Next(), ++i) {
93  const auto &ikey = far_reader->GetKey();
94  if (end_key < ikey) break;
95  const auto *fst = far_reader->GetFst();
96  FarWriteFst(fst, ikey, &okey, &nrep, generate_filenames, i,
97  filename_prefix, filename_suffix);
98  }
99  } else {
100  LOG(ERROR) << "FarExtract: Illegal range specification " << key;
101  return;
102  }
103  delete[] key_cstr;
104  }
105  delete[] keys_cstr;
106  return;
107  }
108  // Nothing specified, so just extracts everything.
109  for (size_t i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
110  const auto &key = far_reader->GetKey();
111  const auto *fst = far_reader->GetFst();
112  FarWriteFst(fst, key, &okey, &nrep, generate_filenames, i, filename_prefix,
113  filename_suffix);
114  }
115 }
116 
117 } // namespace fst
118 
119 #endif // FST_EXTENSIONS_FAR_EXTRACT_H_
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:269
#define LOG(type)
Definition: log.h:46
void FarExtract(const std::vector< std::string > &ifilenames, int32 generate_filenames, const std::string &keys, const std::string &key_separator, const std::string &range_delimiter, const std::string &filename_prefix, const std::string &filename_suffix)
Definition: extract.h:48
void SplitString(char *line, const char *delim, std::vector< char * > *vec, bool omit_empty_strings)
Definition: util.cc:24
void FarWriteFst(const Fst< Arc > *fst, std::string key, std::string *okey, int *nrep, int32 generate_filenames, int i, const std::string &filename_prefix, const std::string &filename_suffix)
Definition: extract.h:19
int32_t int32
Definition: types.h:26