FST  openfst-1.7.1
OpenFst Library
symbol-table-ops.cc
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 
5 #include <fst/symbol-table-ops.h>
6 
7 #include <string>
8 
9 namespace fst {
10 
12  bool *right_relabel_output) {
13  // MergeSymbolTable detects several special cases. It will return a reference
14  // copied version of SymbolTable of left or right if either symbol table is
15  // a superset of the other.
16  std::unique_ptr<SymbolTable> merged(
17  new SymbolTable("merge_" + left.Name() + "_" + right.Name()));
18  // Copies everything from the left symbol table.
19  bool left_has_all = true;
20  bool right_has_all = true;
21  bool relabel = false;
22  for (SymbolTableIterator liter(left); !liter.Done(); liter.Next()) {
23  merged->AddSymbol(liter.Symbol(), liter.Value());
24  if (right_has_all) {
25  int64 key = right.Find(liter.Symbol());
26  if (key == -1) {
27  right_has_all = false;
28  } else if (!relabel && key != liter.Value()) {
29  relabel = true;
30  }
31  }
32  }
33  if (right_has_all) {
34  if (right_relabel_output) *right_relabel_output = relabel;
35  return right.Copy();
36  }
37  // add all symbols we can from right symbol table
38  std::vector<string> conflicts;
39  for (SymbolTableIterator riter(right); !riter.Done(); riter.Next()) {
40  int64 key = merged->Find(riter.Symbol());
41  if (key != -1) {
42  // Symbol already exists, maybe with different value
43  if (key != riter.Value()) relabel = true;
44  continue;
45  }
46  // Symbol doesn't exist from left
47  left_has_all = false;
48  if (!merged->Find(riter.Value()).empty()) {
49  // we can't add this where we want to, add it later, in order
50  conflicts.push_back(riter.Symbol());
51  continue;
52  }
53  // there is a hole and we can add this symbol with its id
54  merged->AddSymbol(riter.Symbol(), riter.Value());
55  }
56  if (right_relabel_output) *right_relabel_output = relabel;
57  if (left_has_all) return left.Copy();
58  // Add all symbols that conflicted, in order
59  for (const auto &conflict : conflicts) merged->AddSymbol(conflict);
60  return merged.release();
61 }
62 
64  std::map<int64, string> sorted;
65  SymbolTableIterator stiter(syms);
66  for (; !stiter.Done(); stiter.Next()) {
67  sorted[stiter.Value()] = stiter.Symbol();
68  }
69  auto *compact = new SymbolTable(syms.Name() + "_compact");
70  int64 newkey = 0;
71  for (const auto &kv : sorted) compact->AddSymbol(kv.second, newkey++);
72  return compact;
73 }
74 
75 SymbolTable *FstReadSymbols(const string &filename, bool input_symbols) {
76  std::ifstream in(filename, std::ios_base::in | std::ios_base::binary);
77  if (!in) {
78  LOG(ERROR) << "FstReadSymbols: Can't open file " << filename;
79  return nullptr;
80  }
81  FstHeader hdr;
82  if (!hdr.Read(in, filename)) {
83  LOG(ERROR) << "FstReadSymbols: Couldn't read header from " << filename;
84  return nullptr;
85  }
86  if (hdr.GetFlags() & FstHeader::HAS_ISYMBOLS) {
87  std::unique_ptr<SymbolTable> isymbols(SymbolTable::Read(in, filename));
88  if (isymbols == nullptr) {
89  LOG(ERROR) << "FstReadSymbols: Couldn't read input symbols from "
90  << filename;
91  return nullptr;
92  }
93  if (input_symbols) return isymbols.release();
94  }
95  if (hdr.GetFlags() & FstHeader::HAS_OSYMBOLS) {
96  std::unique_ptr<SymbolTable> osymbols(SymbolTable::Read(in, filename));
97  if (osymbols == nullptr) {
98  LOG(ERROR) << "FstReadSymbols: Couldn't read output symbols from "
99  << filename;
100  return nullptr;
101  }
102  if (!input_symbols) return osymbols.release();
103  }
104  LOG(ERROR) << "FstReadSymbols: The file " << filename
105  << " doesn't contain the requested symbols";
106  return nullptr;
107 }
108 
109 bool AddAuxiliarySymbols(const string &prefix, int64 start_label,
110  int64 nlabels, SymbolTable *syms) {
111  for (int64 i = 0; i < nlabels; ++i) {
112  auto index = i + start_label;
113  if (index != syms->AddSymbol(prefix + std::to_string(i), index)) {
114  FSTERROR() << "AddAuxiliarySymbols: Symbol table clash";
115  return false;
116  }
117  }
118  return true;
119 }
120 
121 } // namespace fst
virtual SymbolTable * Copy() const
Definition: symbol-table.h:264
#define LOG(type)
Definition: log.h:48
int64_t int64
Definition: types.h:27
SymbolTable * CompactSymbolTable(const SymbolTable &syms)
#define FSTERROR()
Definition: util.h:35
static SymbolTable * Read(std::istream &strm, const SymbolTableReadOptions &opts)
Definition: symbol-table.h:236
SymbolTable * FstReadSymbols(const string &filename, bool input)
bool AddAuxiliarySymbols(const string &prefix, int64 start_label, int64 nlabels, SymbolTable *syms)
virtual int64 AddSymbol(const string &symbol, int64 key)
Definition: symbol-table.h:268
bool Read(std::istream &strm, const string &source, bool rewind=false)
Definition: fst.cc:58
int32 GetFlags() const
Definition: fst.h:125
string Symbol() const
Definition: symbol-table.h:391
virtual string Find(int64 key) const
Definition: symbol-table.h:299
virtual const string & Name() const
Definition: symbol-table.h:317
SymbolTable * MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, bool *right_relabel_output=nullptr)