FST  openfst-1.8.3
OpenFst Library
extract.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Extracts component FSTs from an finite-state archive.
19 
20 #ifndef FST_EXTENSIONS_FAR_EXTRACT_H_
21 #define FST_EXTENSIONS_FAR_EXTRACT_H_
22 
23 #include <cstddef>
24 #include <cstdint>
25 #include <iomanip>
26 #include <memory>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30 
31 #include <fst/log.h>
32 #include <fst/extensions/far/far.h>
33 #include <fst/fst.h>
34 #include <fst/util.h>
35 #include <string_view>
36 
37 namespace fst {
38 
39 template <class Arc>
40 inline void FarWriteFst(const Fst<Arc> *fst, std::string_view key,
41  std::string *okey, int *nrep, int32_t generate_sources,
42  int i, std::string_view source_prefix,
43  std::string_view source_suffix) {
44  DCHECK_NE(fst, nullptr);
45  DCHECK_NE(okey, nullptr);
46  DCHECK_NE(nrep, nullptr);
47  if (key == *okey) {
48  ++*nrep;
49  } else {
50  *nrep = 0;
51  }
52  okey->assign(key.data(), key.size());
53  std::ostringstream source_path;
54  source_path << source_prefix;
55  if (generate_sources) {
56  source_path << std::setw(generate_sources) << std::setfill('0') << i;
57  } else {
58  source_path << key;
59  if (*nrep > 0) source_path << '.' << *nrep;
60  }
61  source_path << source_suffix;
62  fst->Write(source_path.str());
63 }
64 
65 template <class Arc>
66 void Extract(FarReader<Arc> &reader, int32_t generate_sources,
67  const std::string &keys, std::string_view key_separator,
68  std::string_view range_delimiter, std::string_view source_prefix,
69  std::string_view source_suffix) {
70  std::string okey;
71  int nrep = 0;
72  // User has specified a set of FSTs to extract, where some of these may in
73  // fact be ranges.
74  if (!keys.empty()) {
75  std::vector<std::string_view> key_vector =
76  StrSplit(keys, ByAnyChar(key_separator), SkipEmpty());
77  int i = 0;
78  for (size_t k = 0; k < key_vector.size(); ++k, ++i) {
79  std::string_view key = key_vector[k];
80  std::vector<std::string_view> range_vector =
81  StrSplit(key, ByAnyChar(range_delimiter));
82  if (range_vector.size() == 1) { // Not a range
83  if (!reader.Find(key)) {
84  LOG(ERROR) << "Extract: Cannot find key " << key;
85  return;
86  }
87  const auto *fst = reader.GetFst();
88  FarWriteFst(fst, key, &okey, &nrep, generate_sources, i, source_prefix,
89  source_suffix);
90  } else if (range_vector.size() == 2) { // A legal range
91  std::string_view begin_key = range_vector[0];
92  std::string_view end_key = range_vector[1];
93  if (begin_key.empty() || end_key.empty()) {
94  LOG(ERROR) << "Extract: Illegal range specification " << key;
95  return;
96  }
97  if (!reader.Find(begin_key)) {
98  LOG(ERROR) << "Extract: Cannot find key " << begin_key;
99  return;
100  }
101  for (; !reader.Done(); reader.Next(), ++i) {
102  const auto &ikey = reader.GetKey();
103  if (end_key < ikey) break;
104  const auto *fst = reader.GetFst();
105  FarWriteFst(fst, ikey, &okey, &nrep, generate_sources, i,
106  source_prefix, source_suffix);
107  }
108  } else {
109  LOG(ERROR) << "Extract: Illegal range specification " << key;
110  return;
111  }
112  }
113  return;
114  }
115  // Nothing specified, so just extracts everything.
116  for (size_t i = 1; !reader.Done(); reader.Next(), ++i) {
117  const auto &key = reader.GetKey();
118  const auto *fst = reader.GetFst();
119  FarWriteFst(fst, key, &okey, &nrep, generate_sources, i, source_prefix,
120  source_suffix);
121  }
122 }
123 
124 } // namespace fst
125 
126 #endif // FST_EXTENSIONS_FAR_EXTRACT_H_
virtual const std::string & GetKey() const =0
void FarWriteFst(const Fst< Arc > *fst, std::string_view key, std::string *okey, int *nrep, int32_t generate_sources, int i, std::string_view source_prefix, std::string_view source_suffix)
Definition: extract.h:40
virtual bool Done() const =0
virtual bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: fst.h:293
#define LOG(type)
Definition: log.h:53
virtual const Fst< Arc > * GetFst() const =0
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:77
virtual void Next()=0
void Extract(FarReader< Arc > &reader, int32_t generate_sources, const std::string &keys, std::string_view key_separator, std::string_view range_delimiter, std::string_view source_prefix, std::string_view source_suffix)
Definition: extract.h:66
virtual bool Find(std::string_view key)=0
#define DCHECK_NE(x, y)
Definition: log.h:80