FST  openfst-1.7.9
OpenFst Library
symbol-table-ops.cc
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 
5 #include <fst/symbol-table-ops.h>
6 
7 #include <string>
8 
9 namespace fst {
10 
12  bool *right_relabel_output) {
13  // MergeSymbolTable detects several special cases It will return a reference
14  // copied version of SymbolTable of left or right if either symbol table is
15  // a superset of the other.
16  std::unique_ptr<SymbolTable> merged(
17  new SymbolTable("merge_" + left.Name() + "_" + right.Name()));
18  // Copies everything from the left symbol table.
19  bool left_has_all = true;
20  bool right_has_all = true;
21  bool relabel = false;
22  for (const auto &litem : left) {
23  merged->AddSymbol(litem.Symbol(), litem.Label());
24  if (right_has_all) {
25  int64 key = right.Find(litem.Symbol());
26  if (key == -1) {
27  right_has_all = false;
28  } else if (!relabel && key != litem.Label()) {
29  relabel = true;
30  }
31  }
32  }
33  if (right_has_all) {
34  if (right_relabel_output) *right_relabel_output = relabel;
35  return right.Copy();
36  }
37  // Adds all symbols we can from right symbol table.
38  std::vector<std::string> conflicts;
39  for (const auto &ritem : right) {
40  int64 key = merged->Find(ritem.Symbol());
41  if (key != -1) {
42  // Symbol already exists, maybe with different value.
43  if (key != ritem.Label()) relabel = true;
44  continue;
45  }
46  // Symbol doesn't exist from left.
47  left_has_all = false;
48  if (!merged->Find(ritem.Label()).empty()) {
49  // We can't add this where we want to, add it later, in order.
50  conflicts.push_back(ritem.Symbol());
51  continue;
52  }
53  // There is a hole and we can add this symbol with its ID.
54  merged->AddSymbol(ritem.Symbol(), ritem.Label());
55  }
56  if (right_relabel_output) *right_relabel_output = relabel;
57  if (left_has_all) return left.Copy();
58  // Adds all symbols that conflicted, in order.
59  for (const auto &conflict : conflicts) merged->AddSymbol(conflict);
60  return merged.release();
61 }
62 
64  std::map<int64, std::string> sorted;
65  for (const auto &stitem : syms) {
66  sorted[stitem.Label()] = stitem.Symbol();
67  }
68  auto *compact = new SymbolTable(syms.Name() + "_compact");
69  int64 newkey = 0;
70  for (const auto &kv : sorted) compact->AddSymbol(kv.second, newkey++);
71  return compact;
72 }
73 
74 SymbolTable *FstReadSymbols(const std::string &source, bool input_symbols) {
75  std::ifstream in(source, std::ios_base::in | std::ios_base::binary);
76  if (!in) {
77  LOG(ERROR) << "FstReadSymbols: Can't open file " << source;
78  return nullptr;
79  }
80  FstHeader hdr;
81  if (!hdr.Read(in, source)) {
82  LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << source;
83  return nullptr;
84  }
85  if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
86  std::unique_ptr<SymbolTable> isymbols(SymbolTable::Read(in, source));
87  if (isymbols == nullptr) {
88  LOG(ERROR) << "FstReadSymbols: Couldn't read input symbols from "
89  << source;
90  return nullptr;
91  }
92  if (input_symbols) return isymbols.release();
93  }
94  if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
95  std::unique_ptr<SymbolTable> osymbols(SymbolTable::Read(in, source));
96  if (osymbols == nullptr) {
97  LOG(ERROR) << "FstReadSymbols: Couldn't read output symbols from "
98  << source;
99  return nullptr;
100  }
101  if (!input_symbols) return osymbols.release();
102  }
103  LOG(ERROR) << "FstReadSymbols: The file " << source
104  << " doesn't contain the requested symbols";
105  return nullptr;
106 }
107 
108 bool AddAuxiliarySymbols(const std::string &prefix, int64 start_label,
109  int64 nlabels, SymbolTable *syms) {
110  for (int64 i = 0; i < nlabels; ++i) {
111  auto index = i + start_label;
112  if (index != syms->AddSymbol(prefix + std::to_string(i), index)) {
113  FSTERROR() << "AddAuxiliarySymbols: Symbol table clash";
114  return false;
115  }
116  }
117  return true;
118 }
119 
120 } // namespace fst
const std::string & Name() const
Definition: symbol-table.h:480
bool Read(std::istream &strm, const std::string &source, bool rewind=false)
Definition: fst.cc:60
virtual SymbolTable * Copy() const
Definition: symbol-table.h:426
int64 AddSymbol(SymbolType symbol, int64 key)
Definition: symbol-table.h:440
#define LOG(type)
Definition: log.h:46
int64_t int64
Definition: types.h:27
SymbolTable * CompactSymbolTable(const SymbolTable &syms)
#define FSTERROR()
Definition: util.h:36
static SymbolTable * Read(std::istream &strm, const SymbolTableReadOptions &opts)
Definition: symbol-table.h:401
std::string Find(int64 key) const
Definition: symbol-table.h:464
bool AddAuxiliarySymbols(const std::string &prefix, int64 start_label, int64 nlabels, SymbolTable *syms)
SymbolTable * FstReadSymbols(const std::string &source, bool input)
uint32 GetFlags() const
Definition: fst.h:132
SymbolTable * MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, bool *right_relabel_output=nullptr)