FST  openfst-1.8.3
OpenFst Library
pdt.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Common classes for PDT expansion/traversal.
19 
20 #ifndef FST_EXTENSIONS_PDT_PDT_H_
21 #define FST_EXTENSIONS_PDT_PDT_H_
22 
23 #include <sys/types.h>
24 
25 #include <cstddef>
26 #include <map>
27 #include <set>
28 #include <utility>
29 #include <vector>
30 
31 #include <fst/compat.h>
32 #include <fst/log.h>
33 #include <fst/fst.h>
34 #include <fst/state-table.h>
35 #include <unordered_map>
36 
37 namespace fst {
38 
39 // Provides bijection between parenthesis stacks and signed integral stack IDs.
40 // Each stack ID is unique to each distinct stack. The open-close parenthesis
41 // label pairs are passed using the parens argument.
42 template <typename StackId, typename Label>
43 class PdtStack {
44  public:
45  // The stacks are stored in a tree. The nodes are stored in a vector. Each
46  // node represents the top of some stack and is identified by its position in
47  // the vector. Its' parent node represents the stack with the top popped and
48  // its children are stored in child_map_ and accessed by stack_id and label.
49  // The paren_id is
50  // the position in parens of the parenthesis for that node.
51  struct StackNode {
52  StackId parent_id;
53  size_t paren_id;
54 
55  StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
56  };
57 
58  explicit PdtStack(const std::vector<std::pair<Label, Label>> &parens)
59  : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
60  for (size_t i = 0; i < parens.size(); ++i) {
61  const auto &pair = parens[i];
62  paren_map_[pair.first] = i;
63  paren_map_[pair.second] = i;
64  if (min_paren_ == kNoLabel || pair.first < min_paren_) {
65  min_paren_ = pair.first;
66  }
67  if (pair.second < min_paren_) min_paren_ = pair.second;
68  if (max_paren_ == kNoLabel || pair.first > max_paren_) {
69  max_paren_ = pair.first;
70  }
71  if (pair.second > max_paren_) max_paren_ = pair.second;
72  }
73  nodes_.push_back(StackNode(-1, -1)); // Tree root.
74  }
75 
76  // Returns stack ID given the current stack ID (0 if empty) and label read.
77  // Pushes onto the stack if the label is an open parenthesis, returning the
78  // new stack ID. Pops the stack if the label is a close parenthesis that
79  // matches the top of the stack, returning the parent stack ID. Returns -1 if
80  // label is an unmatched close parenthesis. Otherwise, returns the current
81  // stack ID.
82  StackId Find(StackId stack_id, Label label) {
83  if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) {
84  return stack_id; // Non-paren.
85  }
86  const auto it = paren_map_.find(label);
87  // Non-paren.
88  if (it == paren_map_.end()) return stack_id;
89  const auto paren_id = it->second;
90  // Open paren.
91  if (label == parens_[paren_id].first) {
92  auto &child_id = child_map_[std::make_pair(stack_id, label)];
93  if (child_id == 0) { // Child not found; pushes label.
94  child_id = nodes_.size();
95  nodes_.push_back(StackNode(stack_id, paren_id));
96  }
97  return child_id;
98  }
99  const auto &node = nodes_[stack_id];
100  // Matching close paren.
101  if (paren_id == node.paren_id) return node.parent_id;
102  // Non-matching close paren.
103  return -1;
104  }
105 
106  // Returns the stack ID obtained by popping the label at the top of the
107  // current stack ID.
108  StackId Pop(StackId stack_id) const { return nodes_[stack_id].parent_id; }
109 
110  // Returns the paren ID at the top of the stack.
111  ssize_t Top(StackId stack_id) const { return nodes_[stack_id].paren_id; }
112 
113  ssize_t ParenId(Label label) const {
114  const auto it = paren_map_.find(label);
115  if (it == paren_map_.end()) return -1; // Non-paren.
116  return it->second;
117  }
118 
119  private:
120  struct ChildHash {
121  size_t operator()(const std::pair<StackId, Label> &pair) const {
122  static constexpr size_t prime = 7853;
123  return static_cast<size_t>(pair.first) +
124  static_cast<size_t>(pair.second) * prime;
125  }
126  };
127 
128  std::vector<std::pair<Label, Label>> parens_;
129  std::vector<StackNode> nodes_;
130  std::unordered_map<Label, size_t> paren_map_;
131  // Child of stack node w.r.t label
132  std::unordered_map<std::pair<StackId, Label>, StackId, ChildHash> child_map_;
133  Label min_paren_;
134  Label max_paren_;
135 };
136 
137 // State tuple for PDT expansion.
138 template <typename S, typename K>
140  using StateId = S;
141  using StackId = K;
142 
145 
146  explicit PdtStateTuple(StateId state_id = kNoStateId, StackId stack_id = -1)
147  : state_id(state_id), stack_id(stack_id) {}
148 };
149 
150 // Equality of PDT state tuples.
151 template <typename S, typename K>
152 inline bool operator==(const PdtStateTuple<S, K> &x,
153  const PdtStateTuple<S, K> &y) {
154  if (&x == &y) return true;
155  return x.state_id == y.state_id && x.stack_id == y.stack_id;
156 }
157 
158 // Hash function object for PDT state tuples
159 template <class T>
161  public:
162  size_t operator()(const T &tuple) const {
163  static constexpr auto prime = 7853;
164  return tuple.state_id + tuple.stack_id * prime;
165  }
166 };
167 
168 // Tuple to PDT state bijection.
169 template <class StateId, class StackId>
171  PdtStateTuple<StateId, StackId>,
172  PdtStateHash<PdtStateTuple<StateId, StackId>>> {
173  public:
174  PdtStateTable() = default;
175 
176  PdtStateTable(const PdtStateTable &other) {}
177 
178  private:
179  PdtStateTable &operator=(const PdtStateTable &) = delete;
180 };
181 
182 } // namespace fst
183 
184 #endif // FST_EXTENSIONS_PDT_PDT_H_
PdtStateTuple(StateId state_id=kNoStateId, StackId stack_id=-1)
Definition: pdt.h:146
constexpr int kNoLabel
Definition: fst.h:195
StackId stack_id
Definition: pdt.h:144
StackId parent_id
Definition: pdt.h:52
constexpr int kNoStateId
Definition: fst.h:196
StackNode(StackId p, size_t i)
Definition: pdt.h:55
PdtStateTable(const PdtStateTable &other)
Definition: pdt.h:176
StackId Pop(StackId stack_id) const
Definition: pdt.h:108
ssize_t ParenId(Label label) const
Definition: pdt.h:113
StackId Find(StackId stack_id, Label label)
Definition: pdt.h:82
ssize_t Top(StackId stack_id) const
Definition: pdt.h:111
size_t operator()(const T &tuple) const
Definition: pdt.h:162
bool operator==(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:51
StateId state_id
Definition: pdt.h:143
PdtStack(const std::vector< std::pair< Label, Label >> &parens)
Definition: pdt.h:58