FST  openfst-1.8.2
OpenFst Library
extract.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Extracts component FSTs from an finite-state archive.
19 
20 #ifndef FST_EXTENSIONS_FAR_EXTRACT_H_
21 #define FST_EXTENSIONS_FAR_EXTRACT_H_
22 
23 #include <cstdint>
24 #include <iomanip>
25 #include <memory>
26 #include <string>
27 #include <vector>
28 
29 #include <fst/extensions/far/far.h>
30 #include <fst/util.h>
31 #include <string_view>
32 
33 namespace fst {
34 
35 template <class Arc>
36 inline void FarWriteFst(const Fst<Arc> *fst, std::string_view key,
37  std::string *okey, int *nrep, int32_t generate_sources,
38  int i, std::string_view source_prefix,
39  std::string_view source_suffix) {
40  DCHECK_NE(fst, nullptr);
41  DCHECK_NE(okey, nullptr);
42  DCHECK_NE(nrep, nullptr);
43  if (key == *okey) {
44  ++*nrep;
45  } else {
46  *nrep = 0;
47  }
48  okey->assign(key.data(), key.size());
49  std::ostringstream source_path;
50  source_path << source_prefix;
51  if (generate_sources) {
52  source_path << std::setw(generate_sources) << std::setfill('0') << i;
53  } else {
54  source_path << key;
55  if (*nrep > 0) source_path << '.' << *nrep;
56  }
57  source_path << source_suffix;
58  fst->Write(source_path.str());
59 }
60 
61 template <class Arc>
62 void Extract(FarReader<Arc> &reader, int32_t generate_sources,
63  const std::string &keys, const std::string &key_separator,
64  const std::string &range_delimiter,
65  const std::string &source_prefix,
66  const std::string &source_suffix) {
67  std::string okey;
68  int nrep = 0;
69  // User has specified a set of FSTs to extract, where some of these may in
70  // fact be ranges.
71  if (!keys.empty()) {
72  std::vector<std::string_view> key_vector =
73  StrSplit(keys, ByAnyChar(key_separator), SkipEmpty());
74  int i = 0;
75  for (size_t k = 0; k < key_vector.size(); ++k, ++i) {
76  std::string_view key = key_vector[k];
77  std::vector<std::string_view> range_vector =
78  StrSplit(key, ByAnyChar(range_delimiter));
79  if (range_vector.size() == 1) { // Not a range
80  if (!reader.Find(key)) {
81  LOG(ERROR) << "Extract: Cannot find key " << key;
82  return;
83  }
84  const auto *fst = reader.GetFst();
85  FarWriteFst(fst, key, &okey, &nrep, generate_sources, i, source_prefix,
86  source_suffix);
87  } else if (range_vector.size() == 2) { // A legal range
88  std::string_view begin_key = range_vector[0];
89  std::string_view end_key = range_vector[1];
90  if (begin_key.empty() || end_key.empty()) {
91  LOG(ERROR) << "Extract: Illegal range specification " << key;
92  return;
93  }
94  if (!reader.Find(begin_key)) {
95  LOG(ERROR) << "Extract: Cannot find key " << begin_key;
96  return;
97  }
98  for (; !reader.Done(); reader.Next(), ++i) {
99  const auto &ikey = reader.GetKey();
100  if (end_key < ikey) break;
101  const auto *fst = reader.GetFst();
102  FarWriteFst(fst, ikey, &okey, &nrep, generate_sources, i,
103  source_prefix, source_suffix);
104  }
105  } else {
106  LOG(ERROR) << "Extract: Illegal range specification " << key;
107  return;
108  }
109  }
110  return;
111  }
112  // Nothing specified, so just extracts everything.
113  for (size_t i = 1; !reader.Done(); reader.Next(), ++i) {
114  const auto &key = reader.GetKey();
115  const auto *fst = reader.GetFst();
116  FarWriteFst(fst, key, &okey, &nrep, generate_sources, i, source_prefix,
117  source_suffix);
118  }
119 }
120 
121 } // namespace fst
122 
123 #endif // FST_EXTENSIONS_FAR_EXTRACT_H_
virtual const std::string & GetKey() const =0
void FarWriteFst(const Fst< Arc > *fst, std::string_view key, std::string *okey, int *nrep, int32_t generate_sources, int i, std::string_view source_prefix, std::string_view source_suffix)
Definition: extract.h:36
virtual bool Done() const =0
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:293
#define LOG(type)
Definition: log.h:49
virtual const Fst< Arc > * GetFst() const =0
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:81
void Extract(FarReader< Arc > &reader, int32_t generate_sources, const std::string &keys, const std::string &key_separator, const std::string &range_delimiter, const std::string &source_prefix, const std::string &source_suffix)
Definition: extract.h:62
virtual void Next()=0
virtual bool Find(std::string_view key)=0
#define DCHECK_NE(x, y)
Definition: log.h:76