FST  openfst-1.8.3
OpenFst Library
print-strings.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Outputs as strings the string FSTs in a finite-state archive.
19 
20 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H_
21 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H_
22 
23 #include <cstdint>
24 #include <iomanip>
25 #include <ios>
26 #include <iostream>
27 #include <memory>
28 #include <ostream>
29 #include <sstream>
30 #include <string>
31 #include <vector>
32 
33 #include <fst/flags.h>
34 #include <fst/log.h>
35 #include <fst/extensions/far/far.h>
36 #include <fstream>
37 #include <fst/shortest-distance.h>
38 #include <fst/string.h>
39 #include <fst/symbol-table.h>
40 
41 DECLARE_string(far_field_separator);
42 
43 namespace fst {
44 
45 template <class Arc>
46 void PrintStrings(FarReader<Arc> &reader, FarEntryType entry_type,
47  TokenType token_type, const std::string &begin_key,
48  const std::string &end_key, bool print_key, bool print_weight,
49  const std::string &symbols_source, bool initial_symbols,
50  int32_t generate_sources, const std::string &source_prefix,
51  const std::string &source_suffix) {
52  std::unique_ptr<const SymbolTable> syms;
53  if (!symbols_source.empty()) {
54  syms.reset(SymbolTable::ReadText(symbols_source,
55  FST_FLAGS_fst_field_separator));
56  if (!syms) {
57  LOG(ERROR) << "PrintStrings: Error reading symbol table "
58  << symbols_source;
59  return;
60  }
61  }
62  if (!begin_key.empty()) reader.Find(begin_key);
63  std::string okey;
64  int nrep = 0;
65  for (int i = 1; !reader.Done(); reader.Next(), ++i) {
66  const auto &key = reader.GetKey();
67  if (!end_key.empty() && end_key < key) break;
68  if (okey == key) {
69  ++nrep;
70  } else {
71  nrep = 0;
72  }
73  okey = key;
74  const auto *fst = reader.GetFst();
75  if (i == 1 && initial_symbols && !syms && fst->InputSymbols()) {
76  syms.reset(fst->InputSymbols()->Copy());
77  }
78  std::string str;
79  VLOG(2) << "Handling key: " << key;
80  const StringPrinter<Arc> printer(token_type,
81  syms ? syms.get() : fst->InputSymbols(),
82  /*omit_epsilon=*/false);
83  printer(*fst, &str);
84  if (entry_type == FarEntryType::LINE) {
85  if (print_key)
86  std::cout << key << FST_FLAGS_far_field_separator[0];
87  std::cout << str;
88  if (print_weight) {
89  std::cout << FST_FLAGS_far_field_separator[0]
90  << ShortestDistance(*fst);
91  }
92  std::cout << '\n';
93  } else if (entry_type == FarEntryType::FILE) {
94  std::stringstream sstrm;
95  if (generate_sources) {
96  sstrm.fill('0');
97  sstrm << std::right << std::setw(generate_sources) << i;
98  } else {
99  sstrm << key;
100  if (nrep > 0) sstrm << "." << nrep;
101  }
102  std::string source;
103  source = source_prefix + sstrm.str() + source_suffix;
104  std::ofstream ostrm(source);
105  if (!ostrm) {
106  LOG(ERROR) << "PrintStrings: Can't open file: " << source;
107  return;
108  }
109  ostrm << str;
110  if (token_type == TokenType::SYMBOL) ostrm << "\n";
111  }
112  }
113 }
114 
115 } // namespace fst
116 
117 #endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H_
virtual const std::string & GetKey() const =0
virtual bool Done() const =0
#define LOG(type)
Definition: log.h:53
virtual const Fst< Arc > * GetFst() const =0
#define VLOG(level)
Definition: log.h:54
static SymbolTable * ReadText(std::istream &strm, std::string_view name, const std::string &sep=FST_FLAGS_fst_field_separator)
Definition: symbol-table.h:381
TokenType
Definition: string.h:49
virtual void Next()=0
void ShortestDistance(const Fst< Arc > &fst, std::vector< typename Arc::Weight > *distance, const ShortestDistanceOptions< Arc, Queue, ArcFilter > &opts)
virtual bool Find(std::string_view key)=0
FarEntryType
Definition: far.h:49
void PrintStrings(FarReader< Arc > &reader, FarEntryType entry_type, TokenType token_type, const std::string &begin_key, const std::string &end_key, bool print_key, bool print_weight, const std::string &symbols_source, bool initial_symbols, int32_t generate_sources, const std::string &source_prefix, const std::string &source_suffix)
Definition: print-strings.h:46