FST  openfst-1.8.3
OpenFst Library
symbol-table-ops.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 
18 #ifndef FST_SYMBOL_TABLE_OPS_H_
19 #define FST_SYMBOL_TABLE_OPS_H_
20 
21 #include <cstdint>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include <fst/fst.h>
27 #include <fst/symbol-table.h>
28 #include <unordered_set>
29 
30 namespace fst {
31 
32 // Returns a minimal symbol table containing only symbols referenced by the
33 // passed fst. Symbols preserve their original numbering, so fst does not
34 // require relabeling.
35 template <class Arc>
37  bool input) {
38  std::unordered_set<typename Arc::Label> seen;
39  seen.insert(0); // Always keep epsilon.
40  for (StateIterator<Fst<Arc>> siter(fst); !siter.Done(); siter.Next()) {
41  for (ArcIterator<Fst<Arc>> aiter(fst, siter.Value()); !aiter.Done();
42  aiter.Next()) {
43  const auto sym = (input) ? aiter.Value().ilabel : aiter.Value().olabel;
44  seen.insert(sym);
45  }
46  }
47  auto pruned = std::make_unique<SymbolTable>(syms.Name() + "_pruned");
48  for (const auto &stitem : syms) {
49  const auto label = stitem.Label();
50  if (seen.count(label)) pruned->AddSymbol(stitem.Symbol(), label);
51  }
52  return pruned.release();
53 }
54 
55 // Relabels a symbol table to make it a contiguous mapping.
57 
58 // Merges two SymbolTables, all symbols from left will be merged into right
59 // with the same IDs. Symbols in right that have conflicting IDs with those
60 // in left will be assigned to value assigned from the left SymbolTable.
61 // The returned symbol table will never modify symbol assignments from the left
62 // side, but may do so on the right. If right_relabel_output is non-null, it
63 // will be assigned true if the symbols from the right table needed to be
64 // reassigned.
65 //
66 // A potential use case is to compose two FSTs that have different symbol
67 // tables. You can reconcile them in the following way:
68 //
69 // Fst<Arc> a, b;
70 // bool relabel;
71 // std::unique_ptr<SymbolTable> bnew(MergeSymbolTable(a.OutputSymbols(),
72 // b.InputSymbols(), &relabel);
73 // if (relabel) Relabel(b, bnew.get(), nullptr);
74 // b.SetInputSymbols(bnew);
75 SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
76  bool *right_relabel_output = nullptr);
77 
78 // Read the symbol table from any Fst::Read()able file, without loading the
79 // corresponding FST. Returns nullptr if the FST does not contain a symbol
80 // table or the symbol table cannot be read.
81 SymbolTable * FstReadSymbols(const std::string &source,
82  bool input);
83 
84 // Adds a contiguous range of symbols to a symbol table using a simple prefix
85 // for the string, returning false if the inserted symbol string clashes with
86 // any currently present.
87 bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label,
88  int64_t nlabels, SymbolTable *syms);
89 
90 } // namespace fst
91 
92 #endif // FST_SYMBOL_TABLE_OPS_H_
const std::string & Name() const
Definition: symbol-table.h:466
int64_t AddSymbol(std::string_view symbol, int64_t key)
Definition: symbol-table.h:425
SymbolTable * CompactSymbolTable(const SymbolTable &syms)
SymbolTable * FstReadSymbols(const std::string &source, bool input)
SymbolTable * PruneSymbolTable(const Fst< Arc > &fst, const SymbolTable &syms, bool input)
SymbolTable * MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, bool *right_relabel_output=nullptr)
bool AddAuxiliarySymbols(const std::string &prefix, int64_t start_label, int64_t nlabels, SymbolTable *syms)