FST  openfst-1.8.2.post1
OpenFst Library
pdt.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Common classes for PDT expansion/traversal.
19 
20 #ifndef FST_EXTENSIONS_PDT_PDT_H_
21 #define FST_EXTENSIONS_PDT_PDT_H_
22 
23 #include <map>
24 #include <set>
25 
26 #include <fst/compat.h>
27 #include <fst/log.h>
28 #include <fst/fst.h>
29 #include <fst/state-table.h>
30 #include <unordered_map>
31 
32 namespace fst {
33 
34 // Provides bijection between parenthesis stacks and signed integral stack IDs.
35 // Each stack ID is unique to each distinct stack. The open-close parenthesis
36 // label pairs are passed using the parens argument.
37 template <typename StackId, typename Label>
38 class PdtStack {
39  public:
40  // The stacks are stored in a tree. The nodes are stored in a vector. Each
41  // node represents the top of some stack and is identified by its position in
42  // the vector. Its' parent node represents the stack with the top popped and
43  // its children are stored in child_map_ and accessed by stack_id and label.
44  // The paren_id is
45  // the position in parens of the parenthesis for that node.
46  struct StackNode {
47  StackId parent_id;
48  size_t paren_id;
49 
50  StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
51  };
52 
53  explicit PdtStack(const std::vector<std::pair<Label, Label>> &parens)
54  : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
55  for (size_t i = 0; i < parens.size(); ++i) {
56  const auto &pair = parens[i];
57  paren_map_[pair.first] = i;
58  paren_map_[pair.second] = i;
59  if (min_paren_ == kNoLabel || pair.first < min_paren_) {
60  min_paren_ = pair.first;
61  }
62  if (pair.second < min_paren_) min_paren_ = pair.second;
63  if (max_paren_ == kNoLabel || pair.first > max_paren_) {
64  max_paren_ = pair.first;
65  }
66  if (pair.second > max_paren_) max_paren_ = pair.second;
67  }
68  nodes_.push_back(StackNode(-1, -1)); // Tree root.
69  }
70 
71  // Returns stack ID given the current stack ID (0 if empty) and label read.
72  // Pushes onto the stack if the label is an open parenthesis, returning the
73  // new stack ID. Pops the stack if the label is a close parenthesis that
74  // matches the top of the stack, returning the parent stack ID. Returns -1 if
75  // label is an unmatched close parenthesis. Otherwise, returns the current
76  // stack ID.
77  StackId Find(StackId stack_id, Label label) {
78  if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) {
79  return stack_id; // Non-paren.
80  }
81  const auto it = paren_map_.find(label);
82  // Non-paren.
83  if (it == paren_map_.end()) return stack_id;
84  const auto paren_id = it->second;
85  // Open paren.
86  if (label == parens_[paren_id].first) {
87  auto &child_id = child_map_[std::make_pair(stack_id, label)];
88  if (child_id == 0) { // Child not found; pushes label.
89  child_id = nodes_.size();
90  nodes_.push_back(StackNode(stack_id, paren_id));
91  }
92  return child_id;
93  }
94  const auto &node = nodes_[stack_id];
95  // Matching close paren.
96  if (paren_id == node.paren_id) return node.parent_id;
97  // Non-matching close paren.
98  return -1;
99  }
100 
101  // Returns the stack ID obtained by popping the label at the top of the
102  // current stack ID.
103  StackId Pop(StackId stack_id) const { return nodes_[stack_id].parent_id; }
104 
105  // Returns the paren ID at the top of the stack.
106  ssize_t Top(StackId stack_id) const { return nodes_[stack_id].paren_id; }
107 
108  ssize_t ParenId(Label label) const {
109  const auto it = paren_map_.find(label);
110  if (it == paren_map_.end()) return -1; // Non-paren.
111  return it->second;
112  }
113 
114  private:
115  struct ChildHash {
116  size_t operator()(const std::pair<StackId, Label> &pair) const {
117  static constexpr size_t prime = 7853;
118  return static_cast<size_t>(pair.first) +
119  static_cast<size_t>(pair.second) * prime;
120  }
121  };
122 
123  std::vector<std::pair<Label, Label>> parens_;
124  std::vector<StackNode> nodes_;
125  std::unordered_map<Label, size_t> paren_map_;
126  // Child of stack node w.r.t label
127  std::unordered_map<std::pair<StackId, Label>, StackId, ChildHash> child_map_;
128  Label min_paren_;
129  Label max_paren_;
130 };
131 
132 // State tuple for PDT expansion.
133 template <typename S, typename K>
135  using StateId = S;
136  using StackId = K;
137 
140 
141  explicit PdtStateTuple(StateId state_id = kNoStateId, StackId stack_id = -1)
142  : state_id(state_id), stack_id(stack_id) {}
143 };
144 
145 // Equality of PDT state tuples.
146 template <typename S, typename K>
147 inline bool operator==(const PdtStateTuple<S, K> &x,
148  const PdtStateTuple<S, K> &y) {
149  if (&x == &y) return true;
150  return x.state_id == y.state_id && x.stack_id == y.stack_id;
151 }
152 
153 // Hash function object for PDT state tuples
154 template <class T>
156  public:
157  size_t operator()(const T &tuple) const {
158  static constexpr auto prime = 7853;
159  return tuple.state_id + tuple.stack_id * prime;
160  }
161 };
162 
163 // Tuple to PDT state bijection.
164 template <class StateId, class StackId>
166  PdtStateTuple<StateId, StackId>,
167  PdtStateHash<PdtStateTuple<StateId, StackId>>> {
168  public:
170 
171  PdtStateTable(const PdtStateTable &other) {}
172 
173  private:
174  PdtStateTable &operator=(const PdtStateTable &) = delete;
175 };
176 
177 } // namespace fst
178 
179 #endif // FST_EXTENSIONS_PDT_PDT_H_
PdtStateTuple(StateId state_id=kNoStateId, StackId stack_id=-1)
Definition: pdt.h:141
constexpr int kNoLabel
Definition: fst.h:201
StackId stack_id
Definition: pdt.h:139
StackId parent_id
Definition: pdt.h:47
constexpr int kNoStateId
Definition: fst.h:202
StackNode(StackId p, size_t i)
Definition: pdt.h:50
PdtStateTable(const PdtStateTable &other)
Definition: pdt.h:171
StackId Pop(StackId stack_id) const
Definition: pdt.h:103
ssize_t ParenId(Label label) const
Definition: pdt.h:108
StackId Find(StackId stack_id, Label label)
Definition: pdt.h:77
ssize_t Top(StackId stack_id) const
Definition: pdt.h:106
size_t operator()(const T &tuple) const
Definition: pdt.h:157
bool operator==(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:50
StateId state_id
Definition: pdt.h:138
PdtStack(const std::vector< std::pair< Label, Label >> &parens)
Definition: pdt.h:53