FST  openfst-1.8.3
OpenFst Library
mpdt.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Common classes for Multi Pushdown Transducer (MPDT) expansion/traversal.
19 
20 #ifndef FST_EXTENSIONS_MPDT_MPDT_H_
21 #define FST_EXTENSIONS_MPDT_MPDT_H_
22 
23 #include <sys/types.h>
24 
25 #include <algorithm>
26 #include <array>
27 #include <cstddef>
28 #include <cstdint>
29 #include <functional>
30 #include <map>
31 #include <optional>
32 #include <utility>
33 #include <vector>
34 
35 #include <fst/compat.h>
36 #include <fst/log.h>
37 #include <fst/extensions/pdt/pdt.h>
38 #include <fst/fst.h>
39 #include <fst/util.h>
40 #include <unordered_map>
41 #include <optional>
42 
43 namespace fst {
44 
45 enum class MPdtType : uint8_t {
46  READ_RESTRICT, // Can only read from first empty stack
47  WRITE_RESTRICT, // Can only write to first empty stack
48  NO_RESTRICT, // No read-write restrictions
49 };
50 
51 namespace internal {
52 
53 template <typename StackId, typename Level, Level nlevels>
54 using StackConfig = std::array<StackId, nlevels>;
55 
56 // Forward declaration so `KeyPair` can declare `KeyPairHasher` its friend.
57 // WARNING: Do not use `class` here because that may result in linker errors
58 // under the Microsoft C++ ABI. Please take a look at MS documentation
59 // http://shortn/_TZy5qaXiBe
60 template <typename Level>
62 
63 // Defines the KeyPair type used as the key to MPdtStack.paren_id_map_. The hash
64 // function is provided as a separate struct to match templating syntax.
65 template <typename Level>
66 class KeyPair {
67  public:
68  KeyPair(Level level, size_t id) : level_(level), underlying_id_(id) {}
69 
70  inline bool operator==(const KeyPair &rhs) const {
71  return level_ == rhs.level_ && underlying_id_ == rhs.underlying_id_;
72  }
73 
74  private:
75  friend KeyPairHasher<Level>;
76  Level level_;
77  size_t underlying_id_;
78 };
79 
80 template <typename Level>
81 struct KeyPairHasher {
82  inline size_t operator()(const KeyPair<Level> &keypair) const {
83  return std::hash<Level>()(keypair.level_) ^
84  (std::hash<size_t>()(keypair.underlying_id_) << 1);
85  }
86 };
87 
88 template <typename StackId, typename Level, Level nlevels = 2,
90 class MPdtStack {
91  public:
92  using Label = Level;
94  using ConfigToStackId = std::map<Config, StackId>;
95 
96  MPdtStack(const std::vector<std::pair<Label, Label>> &parens,
97  const std::vector<Level> &assignments);
98 
99  ~MPdtStack() = default;
100 
101  MPdtStack(const MPdtStack &other)
102  : error_(other.error_),
103  min_paren_(other.min_paren_),
104  max_paren_(other.max_paren_),
105  paren_levels_(other.paren_levels_),
106  parens_(other.parens_),
107  paren_map_(other.paren_map_),
108  paren_id_map_(other.paren_id_map_),
109  config_to_stack_id_map_(other.config_to_stack_id_map_),
110  stack_id_to_config_map_(other.stack_id_to_config_map_),
111  next_stack_id_(other.next_stack_id_),
112  stacks_(other.stacks_) {}
113 
114  MPdtStack &operator=(const MPdtStack &other) {
115  *this = MPdtStack(other);
116  return *this;
117  }
118 
119  MPdtStack(MPdtStack &&) = default;
120 
121  MPdtStack &operator=(MPdtStack &&) = default;
122 
123  StackId Find(StackId stack_id, Label label);
124 
125  // For now we do not implement Pop since this is needed only for
126  // ShortestPath().
127 
128  // For Top we find the first non-empty config, and find the paren ID of that
129  // (or -1) if there is none, then map that to the external stack_id to return.
130  ssize_t Top(StackId stack_id) const {
131  if (stack_id == -1) return -1;
132  const auto config = InternalStackIds(stack_id);
133  Level level = 0;
134  StackId underlying_id = -1;
135  for (; level < nlevels; ++level) {
136  if (!Empty(config, level)) {
137  underlying_id = stacks_[level]->Top(config[level]);
138  break;
139  }
140  }
141  if (underlying_id == -1) return -1;
142  const auto it = paren_id_map_.find(KeyPair<Level>(level, underlying_id));
143  if (it == paren_id_map_.end()) return -1; // NB: shouldn't happen.
144  return it->second;
145  }
146 
147  ssize_t ParenId(Label label) const {
148  const auto it = paren_map_.find(label);
149  return it != paren_map_.end() ? it->second : -1;
150  }
151 
152  bool Error() { return error_; }
153 
154  // Each component stack has an internal stack ID for a given configuration and
155  // label.
156  // This function maps a configuration of those to the stack ID the caller
157  // sees.
158  inline StackId ExternalStackId(const Config &config) {
159  const auto it = config_to_stack_id_map_.find(config);
160  StackId result;
161  if (it == config_to_stack_id_map_.end()) {
162  result = next_stack_id_++;
163  config_to_stack_id_map_.insert(
164  std::pair<Config, StackId>(config, result));
165  stack_id_to_config_map_[result] = config;
166  } else {
167  result = it->second;
168  }
169  return result;
170  }
171 
172  // Retrieves the internal stack ID for a corresponding external stack ID.
173  inline const Config InternalStackIds(StackId stack_id) const {
174  auto it = stack_id_to_config_map_.find(stack_id);
175  if (it == stack_id_to_config_map_.end()) {
176  it = stack_id_to_config_map_.find(-1);
177  }
178  return it->second;
179  }
180 
181  inline bool Empty(const Config &config, Level level) const {
182  return config[level] <= 0;
183  }
184 
185  inline bool AllEmpty(const Config &config) {
186  for (Level level = 0; level < nlevels; ++level) {
187  if (!Empty(config, level)) return false;
188  }
189  return true;
190  }
191 
192  private:
193  bool error_;
194  Label min_paren_;
195  Label max_paren_;
196  // Stores level of each paren.
197  std::unordered_map<Label, Label> paren_levels_;
198  std::vector<std::pair<Label, Label>> parens_; // As in pdt.h.
199  std::unordered_map<Label, size_t> paren_map_; // As in pdt.h.
200  // Maps between internal paren_id and external paren_id.
201  std::unordered_map<KeyPair<Level>, size_t, KeyPairHasher<Level>>
202  paren_id_map_;
203  // Maps between internal stack ids and external stack id.
204  ConfigToStackId config_to_stack_id_map_;
205  std::unordered_map<StackId, Config> stack_id_to_config_map_;
206  StackId next_stack_id_;
207  // Array of stacks.
208  std::array<std::optional<PdtStack<StackId, Label>>, nlevels> stacks_;
209 };
210 
211 template <typename StackId, typename Level, Level nlevels, MPdtType restrict>
213  const std::vector<std::pair<Level, Level>> &parens, // NB: Label = Level.
214  const std::vector<Level> &assignments)
215  : error_(false),
216  min_paren_(kNoLabel),
217  max_paren_(kNoLabel),
218  parens_(parens),
219  next_stack_id_(1) {
220  using Label = Level;
221  if (parens.size() != assignments.size()) {
222  FSTERROR() << "MPdtStack: Parens of different size from assignments";
223  error_ = true;
224  return;
225  }
226  std::array<std::vector<std::pair<Label, Label>>, nlevels> vectors;
227  for (Level i = 0; i < assignments.size(); ++i) {
228  // Assignments here start at 0, so assuming the human-readable version has
229  // them starting at 1, we should subtract 1 here
230  const auto level = assignments[i] - 1;
231  if (level < 0 || level >= nlevels) {
232  FSTERROR() << "MPdtStack: Specified level " << level << " out of bounds";
233  error_ = true;
234  return;
235  }
236  const auto &pair = parens[i];
237  vectors[level].push_back(pair);
238  paren_levels_[pair.first] = level;
239  paren_levels_[pair.second] = level;
240  paren_map_[pair.first] = i;
241  paren_map_[pair.second] = i;
242  const KeyPair<Level> key(level, vectors[level].size() - 1);
243  paren_id_map_[key] = i;
244  if (min_paren_ == kNoLabel || pair.first < min_paren_) {
245  min_paren_ = pair.first;
246  }
247  if (pair.second < min_paren_) min_paren_ = pair.second;
248  if (max_paren_ == kNoLabel || pair.first > max_paren_) {
249  max_paren_ = pair.first;
250  }
251  if (pair.second > max_paren_) max_paren_ = pair.second;
252  }
254  Config neg_one;
255  Config zero;
256  for (Level level = 0; level < nlevels; ++level) {
257  stacks_[level].emplace(vectors[level]);
258  neg_one[level] = -1;
259  zero[level] = 0;
260  }
261  config_to_stack_id_map_[neg_one] = -1;
262  config_to_stack_id_map_[zero] = 0;
263  stack_id_to_config_map_[-1] = neg_one;
264  stack_id_to_config_map_[0] = zero;
265 }
266 
267 template <typename StackId, typename Level, Level nlevels, MPdtType restrict>
269  Level label) {
270  // Non-paren.
271  if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) {
272  return stack_id;
273  }
274  const auto it = paren_map_.find(label);
275  // Non-paren.
276  if (it == paren_map_.end()) return stack_id;
277  ssize_t paren_id = it->second;
278  // Gets the configuration associated with this stack_id.
279  const auto config = InternalStackIds(stack_id);
280  // Gets the level.
281  const auto level = paren_levels_.find(label)->second;
282  // If the label is an open paren we push:
283  //
284  // 1) if the restrict type is not MPdtType::WRITE_RESTRICT, or
285  // 2) the restrict type is MPdtType::WRITE_RESTRICT, and all the stacks above
286  // the level are empty.
287  if (label == parens_[paren_id].first) { // Open paren.
288  if (restrict == MPdtType::WRITE_RESTRICT) {
289  for (Level upper_level = 0; upper_level < level; ++upper_level) {
290  if (!Empty(config, upper_level)) return -1; // Non-empty stack blocks.
291  }
292  }
293  // If the label is an close paren we pop:
294  //
295  // 1) if the restrict type is not MPdtType::READ_RESTRICT, or
296  // 2) the restrict type is MPdtType::READ_RESTRICT, and all the stacks above
297  // the level are empty.
298  } else if (restrict == MPdtType::READ_RESTRICT) {
299  for (Level lower_level = 0; lower_level < level; ++lower_level) {
300  if (!Empty(config, lower_level)) return -1; // Non-empty stack blocks.
301  }
302  }
303  const auto nid = stacks_[level]->Find(config[level], label);
304  // If the new ID is -1, that means that there is no valid transition at the
305  // level we want.
306  if (nid == -1) {
307  return -1;
308  } else {
310  Config nconfig(config);
311  nconfig[level] = nid;
312  return ExternalStackId(nconfig);
313  }
314 }
315 
316 } // namespace internal
317 } // namespace fst
318 
319 #endif // FST_EXTENSIONS_MPDT_MPDT_H_
MPdtStack & operator=(const MPdtStack &other)
Definition: mpdt.h:114
size_t operator()(const KeyPair< Level > &keypair) const
Definition: mpdt.h:82
constexpr int kNoLabel
Definition: fst.h:195
MPdtStack(const MPdtStack &other)
Definition: mpdt.h:101
std::array< StackId, nlevels > StackConfig
Definition: mpdt.h:54
ssize_t Top(StackId stack_id) const
Definition: mpdt.h:130
const Config InternalStackIds(StackId stack_id) const
Definition: mpdt.h:173
std::map< Config, StackId > ConfigToStackId
Definition: mpdt.h:94
MPdtType
Definition: mpdt.h:45
StackId ExternalStackId(const Config &config)
Definition: mpdt.h:158
#define FSTERROR()
Definition: util.h:56
bool Empty(const Config &config, Level level) const
Definition: mpdt.h:181
bool operator==(const KeyPair &rhs) const
Definition: mpdt.h:70
KeyPair(Level level, size_t id)
Definition: mpdt.h:68
ssize_t ParenId(Label label) const
Definition: mpdt.h:147
bool AllEmpty(const Config &config)
Definition: mpdt.h:185
StackConfig< StackId, Label, 2 > Config
Definition: mpdt.h:93
StackId Find(StackId stack_id, Label label)
Definition: mpdt.h:268