FST  openfst-1.8.3
OpenFst Library
symbol-table-ops.cc
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 
19 #include <fst/symbol-table-ops.h>
20 
21 #include <cstdint>
22 #include <functional>
23 #include <ios>
24 #include <map>
25 #include <memory>
26 #include <string>
27 #include <utility>
28 #include <vector>
29 
30 #include <fst/log.h>
31 #include <fstream>
32 #include <fst/fst.h>
33 #include <fst/symbol-table.h>
34 #include <fst/util.h>
35 
36 namespace fst {
37 
39  bool *right_relabel_output) {
40  // MergeSymbolTable detects several special cases It will return a reference
41  // copied version of SymbolTable of left or right if either symbol table is
42  // a superset of the other.
43  std::unique_ptr<SymbolTable> merged(
44  new SymbolTable("merge_" + left.Name() + "_" + right.Name()));
45  // Copies everything from the left symbol table.
46  bool left_has_all = true;
47  bool right_has_all = true;
48  bool relabel = false;
49  for (const auto &litem : left) {
50  merged->AddSymbol(litem.Symbol(), litem.Label());
51  if (right_has_all) {
52  int64_t key = right.Find(litem.Symbol());
53  if (key == kNoSymbol) {
54  right_has_all = false;
55  } else if (!relabel && key != litem.Label()) {
56  relabel = true;
57  }
58  }
59  }
60  if (right_has_all) {
61  if (right_relabel_output) *right_relabel_output = relabel;
62  return right.Copy();
63  }
64  // Adds all symbols we can from right symbol table.
65  std::vector<std::string> conflicts;
66  for (const auto &ritem : right) {
67  int64_t key = merged->Find(ritem.Symbol());
68  if (key != -1) {
69  // Symbol already exists, maybe with different value.
70  if (key != ritem.Label()) relabel = true;
71  continue;
72  }
73  // Symbol doesn't exist from left.
74  left_has_all = false;
75  if (!merged->Find(ritem.Label()).empty()) {
76  // We can't add this where we want to, add it later, in order.
77  conflicts.push_back(ritem.Symbol());
78  continue;
79  }
80  // There is a hole and we can add this symbol with its ID.
81  merged->AddSymbol(ritem.Symbol(), ritem.Label());
82  }
83  if (right_relabel_output) *right_relabel_output = relabel;
84  if (left_has_all) return left.Copy();
85  // Adds all symbols that conflicted, in order.
86  for (const auto &conflict : conflicts) merged->AddSymbol(conflict);
87  return merged.release();
88 }
89 
91  std::map<int64_t, std::string> sorted;
92  for (const auto &stitem : syms) {
93  sorted[stitem.Label()] = stitem.Symbol();
94  }
95  auto compact = std::make_unique<SymbolTable>(syms.Name() + "_compact");
96  int64_t newkey = 0;
97  for (const auto &kv : sorted) compact->AddSymbol(kv.second, newkey++);
98  return compact.release();
99 }
100 
101 SymbolTable * FstReadSymbols(const std::string &source,
102  bool input_symbols) {
103  std::ifstream in(source, std::ios_base::in | std::ios_base::binary);
104  if (!in) {
105  LOG(ERROR) << "FstReadSymbols: Can't open file " << source;
106  return nullptr;
107  }
108  FstHeader hdr;
109  if (!hdr.Read(in, source)) {
110  LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << source;
111  return nullptr;
112  }
113  if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
114  std::unique_ptr<SymbolTable> isymbols(SymbolTable::Read(in, source));
115  if (isymbols == nullptr) {
116  LOG(ERROR) << "FstReadSymbols: Couldn't read input symbols from "
117  << source;
118  return nullptr;
119  }
120  if (input_symbols) return isymbols.release();
121  }
122  if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
123  std::unique_ptr<SymbolTable> osymbols(SymbolTable::Read(in, source));
124  if (osymbols == nullptr) {
125  LOG(ERROR) << "FstReadSymbols: Couldn't read output symbols from "
126  << source;
127  return nullptr;
128  }
129  if (!input_symbols) return osymbols.release();
130  }
131  LOG(ERROR) << "FstReadSymbols: The file " << source
132  << " doesn't contain the requested symbols";
133  return nullptr;
134 }
135 
136 bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label,
137  int64_t nlabels, SymbolTable *syms) {
138  for (int64_t i = 0; i < nlabels; ++i) {
139  auto index = i + start_label;
140  if (index != syms->AddSymbol(prefix + std::to_string(i), index)) {
141  FSTERROR() << "AddAuxiliarySymbols: Symbol table clash";
142  return false;
143  }
144  }
145  return true;
146 }
147 
148 } // namespace fst
const std::string & Name() const
Definition: symbol-table.h:466
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:56
int64_t AddSymbol(std::string_view symbol, int64_t key)
Definition: symbol-table.h:425
virtual SymbolTable * Copy() const
Definition: symbol-table.h:411
#define LOG(type)
Definition: log.h:53
constexpr int64_t kNoSymbol
Definition: symbol-table.h:55
uint32_t GetFlags() const
Definition: fst.h:142
SymbolTable * CompactSymbolTable(const SymbolTable &syms)
#define FSTERROR()
Definition: util.h:56
SymbolTable * FstReadSymbols(const std::string &source, bool input)
std::string Find(int64_t key) const
Definition: symbol-table.h:450
SymbolTable * MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, bool *right_relabel_output=nullptr)
static SymbolTable * Read(std::istream &strm, std::string_view source)
Definition: symbol-table.h:395
bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label, int64_t nlabels, SymbolTable *syms)