FST  openfst-1.8.2.post1
OpenFst Library
symbol-table-ops.cc
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 
19 #include <fst/symbol-table-ops.h>
20 
21 #include <cstdint>
22 #include <string>
23 
24 namespace fst {
25 
27  bool *right_relabel_output) {
28  // MergeSymbolTable detects several special cases It will return a reference
29  // copied version of SymbolTable of left or right if either symbol table is
30  // a superset of the other.
31  std::unique_ptr<SymbolTable> merged(
32  new SymbolTable("merge_" + left.Name() + "_" + right.Name()));
33  // Copies everything from the left symbol table.
34  bool left_has_all = true;
35  bool right_has_all = true;
36  bool relabel = false;
37  for (const auto &litem : left) {
38  merged->AddSymbol(litem.Symbol(), litem.Label());
39  if (right_has_all) {
40  int64_t key = right.Find(litem.Symbol());
41  if (key == kNoSymbol) {
42  right_has_all = false;
43  } else if (!relabel && key != litem.Label()) {
44  relabel = true;
45  }
46  }
47  }
48  if (right_has_all) {
49  if (right_relabel_output) *right_relabel_output = relabel;
50  return right.Copy();
51  }
52  // Adds all symbols we can from right symbol table.
53  std::vector<std::string> conflicts;
54  for (const auto &ritem : right) {
55  int64_t key = merged->Find(ritem.Symbol());
56  if (key != -1) {
57  // Symbol already exists, maybe with different value.
58  if (key != ritem.Label()) relabel = true;
59  continue;
60  }
61  // Symbol doesn't exist from left.
62  left_has_all = false;
63  if (!merged->Find(ritem.Label()).empty()) {
64  // We can't add this where we want to, add it later, in order.
65  conflicts.push_back(ritem.Symbol());
66  continue;
67  }
68  // There is a hole and we can add this symbol with its ID.
69  merged->AddSymbol(ritem.Symbol(), ritem.Label());
70  }
71  if (right_relabel_output) *right_relabel_output = relabel;
72  if (left_has_all) return left.Copy();
73  // Adds all symbols that conflicted, in order.
74  for (const auto &conflict : conflicts) merged->AddSymbol(conflict);
75  return merged.release();
76 }
77 
79  std::map<int64_t, std::string> sorted;
80  for (const auto &stitem : syms) {
81  sorted[stitem.Label()] = stitem.Symbol();
82  }
83  auto *compact = new SymbolTable(syms.Name() + "_compact");
84  int64_t newkey = 0;
85  for (const auto &kv : sorted) compact->AddSymbol(kv.second, newkey++);
86  return compact;
87 }
88 
89 SymbolTable *FstReadSymbols(const std::string &source, bool input_symbols) {
90  std::ifstream in(source, std::ios_base::in | std::ios_base::binary);
91  if (!in) {
92  LOG(ERROR) << "FstReadSymbols: Can't open file " << source;
93  return nullptr;
94  }
95  FstHeader hdr;
96  if (!hdr.Read(in, source)) {
97  LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << source;
98  return nullptr;
99  }
100  if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
101  std::unique_ptr<SymbolTable> isymbols(SymbolTable::Read(in, source));
102  if (isymbols == nullptr) {
103  LOG(ERROR) << "FstReadSymbols: Couldn't read input symbols from "
104  << source;
105  return nullptr;
106  }
107  if (input_symbols) return isymbols.release();
108  }
109  if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
110  std::unique_ptr<SymbolTable> osymbols(SymbolTable::Read(in, source));
111  if (osymbols == nullptr) {
112  LOG(ERROR) << "FstReadSymbols: Couldn't read output symbols from "
113  << source;
114  return nullptr;
115  }
116  if (!input_symbols) return osymbols.release();
117  }
118  LOG(ERROR) << "FstReadSymbols: The file " << source
119  << " doesn't contain the requested symbols";
120  return nullptr;
121 }
122 
123 bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label,
124  int64_t nlabels, SymbolTable *syms) {
125  for (int64_t i = 0; i < nlabels; ++i) {
126  auto index = i + start_label;
127  if (index != syms->AddSymbol(prefix + std::to_string(i), index)) {
128  FSTERROR() << "AddAuxiliarySymbols: Symbol table clash";
129  return false;
130  }
131  }
132  return true;
133 }
134 
135 } // namespace fst
const std::string & Name() const
Definition: symbol-table.h:462
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:50
int64_t AddSymbol(std::string_view symbol, int64_t key)
Definition: symbol-table.h:421
virtual SymbolTable * Copy() const
Definition: symbol-table.h:407
#define LOG(type)
Definition: log.h:49
constexpr int64_t kNoSymbol
Definition: symbol-table.h:49
uint32_t GetFlags() const
Definition: fst.h:148
SymbolTable * CompactSymbolTable(const SymbolTable &syms)
#define FSTERROR()
Definition: util.h:53
static SymbolTable * Read(std::istream &strm, const std::string &source)
Definition: symbol-table.h:391
SymbolTable * FstReadSymbols(const std::string &source, bool input)
std::string Find(int64_t key) const
Definition: symbol-table.h:446
SymbolTable * MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, bool *right_relabel_output=nullptr)
bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label, int64_t nlabels, SymbolTable *syms)