FST  openfst-1.7.1
OpenFst Library
mpdt.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Common classes for Multi Pushdown Transducer (MPDT) expansion/traversal.
5 
6 #ifndef FST_EXTENSIONS_MPDT_MPDT_H_
7 #define FST_EXTENSIONS_MPDT_MPDT_H_
8 
9 #include <array>
10 #include <functional>
11 #include <map>
12 #include <vector>
13 
14 #include <fst/compat.h>
15 #include <fst/extensions/pdt/pdt.h>
16 
17 namespace fst {
18 
19 enum MPdtType {
20  MPDT_READ_RESTRICT, // Can only read from first empty stack
21  MPDT_WRITE_RESTRICT, // Can only write to first empty stack
22  MPDT_NO_RESTRICT, // No read-write restrictions
23 };
24 
25 namespace internal {
26 
27 // PLEASE READ THIS CAREFULLY:
28 //
29 // When USEVECTOR is set, the stack configurations --- the statewise
30 // representation of the StackId's for each substack --- is stored in a vector.
31 // I would like to do this using an array for efficiency reasons, thus the
32 // definition of StackConfig below. However, while this *works* in that tests
33 // pass, etc. It causes a memory leak in the compose and expand tests, evidently
34 // in the map[] that is being used to store the mapping between these
35 // StackConfigs and the external StackId that the caller sees. There are no
36 // memory leaks when I use a vector, only with this StackId. Why there should be
37 // memory leaks given that I am not mallocing anything is a mystery. In case you
38 // were wondering, clearing the map at the end does not help.
39 
40 template <typename StackId, typename Level, Level nlevels>
41 struct StackConfig {
43 
45  array_ = config.array_;
46  }
47 
48  StackId &operator[](const int index) { return array_[index]; }
49 
50  const StackId &operator[](const int index) const { return array_[index]; }
51 
53  if (this == &config) return *this;
54  array_ = config.array_;
55  return *this;
56  }
57 
58  std::array<StackId, nlevels> array_;
59 };
60 
61 template <typename StackId, typename Level, Level nlevels>
62 class CompConfig {
63  public:
65 
66  bool operator()(const Config &x, const Config &y) const {
67  for (Level level = 0; level < nlevels; ++level) {
68  if (x.array_[level] < y.array_[level]) {
69  return true;
70  } else if (x.array_[level] > y.array_[level]) {
71  return false;
72  }
73  }
74  return false;
75  }
76 };
77 
78 // Defines the KeyPair type used as the key to MPdtStack.paren_id_map_. The hash
79 // function is provided as a separate struct to match templating syntax.
80 template <typename Level>
81 struct KeyPair {
82  Level level;
83  size_t underlying_id;
84 
85  KeyPair(Level level, size_t id) : level(level), underlying_id(id) {}
86 
87  inline bool operator==(const KeyPair<Level> &rhs) const {
88  return level == rhs.level && underlying_id == rhs.underlying_id;
89  }
90 };
91 
92 template <typename Level>
93 struct KeyPairHasher {
94  inline size_t operator()(const KeyPair<Level> &keypair) const {
95  return std::hash<Level>()(keypair.level) ^
96  (std::hash<size_t>()(keypair.underlying_id) << 1);
97  }
98 };
99 
100 template <typename StackId, typename Level, Level nlevels = 2,
101  MPdtType restrict = MPDT_READ_RESTRICT>
102 class MPdtStack {
103  public:
104  using Label = Level;
106  using ConfigToStackId =
107  std::map<Config, StackId, CompConfig<StackId, Level, nlevels>>;
108 
109  MPdtStack(const std::vector<std::pair<Label, Label>> &parens,
110  const std::vector<Level> &assignments);
111 
112  MPdtStack(const MPdtStack &mstack);
113 
115  for (Level level = 0; level < nlevels; ++level) delete stacks_[level];
116  }
117 
118  StackId Find(StackId stack_id, Label label);
119 
120  // For now we do not implement Pop since this is needed only for
121  // ShortestPath().
122 
123  // For Top we find the first non-empty config, and find the paren ID of that
124  // (or -1) if there is none, then map that to the external stack_id to return.
125  ssize_t Top(StackId stack_id) const {
126  if (stack_id == -1) return -1;
127  const auto config = InternalStackIds(stack_id);
128  Level level = 0;
129  StackId underlying_id = -1;
130  for (; level < nlevels; ++level) {
131  if (!Empty(config, level)) {
132  underlying_id = stacks_[level]->Top(config[level]);
133  break;
134  }
135  }
136  if (underlying_id == -1) return -1;
137  const auto it = paren_id_map_.find(KeyPair<Level>(level, underlying_id));
138  if (it == paren_id_map_.end()) return -1; // NB: shouldn't happen.
139  return it->second;
140  }
141 
142  ssize_t ParenId(Label label) const {
143  const auto it = paren_map_.find(label);
144  return it != paren_map_.end() ? it->second : -1;
145  }
146 
147  // TODO(rws): For debugging purposes only: remove later.
148  string PrintConfig(const Config &config) const {
149  string result = "[";
150  for (Level i = 0; i < nlevels; ++i) {
151  char s[128];
152  snprintf(s, sizeof(s), "%d", config[i]);
153  result += string(s);
154  if (i < nlevels - 1) result += ", ";
155  }
156  result += "]";
157  return result;
158  }
159 
160  bool Error() { return error_; }
161 
162  // Each component stack has an internal stack ID for a given configuration and
163  // label.
164  // This function maps a configuration of those to the stack ID the caller
165  // sees.
166  inline StackId ExternalStackId(const Config &config) {
167  const auto it = config_to_stack_id_map_.find(config);
168  StackId result;
169  if (it == config_to_stack_id_map_.end()) {
170  result = next_stack_id_++;
171  config_to_stack_id_map_.insert(
172  std::pair<Config, StackId>(config, result));
173  stack_id_to_config_map_[result] = config;
174  } else {
175  result = it->second;
176  }
177  return result;
178  }
179 
180  // Retrieves the internal stack ID for a corresponding external stack ID.
181  inline const Config InternalStackIds(StackId stack_id) const {
182  auto it = stack_id_to_config_map_.find(stack_id);
183  if (it == stack_id_to_config_map_.end()) {
184  it = stack_id_to_config_map_.find(-1);
185  }
186  return it->second;
187  }
188 
189  inline bool Empty(const Config &config, Level level) const {
190  return config[level] <= 0;
191  }
192 
193  inline bool AllEmpty(const Config &config) {
194  for (Level level = 0; level < nlevels; ++level) {
195  if (!Empty(config, level)) return false;
196  }
197  return true;
198  }
199 
200  bool error_;
203  // Stores level of each paren.
204  std::unordered_map<Label, Label> paren_levels_;
205  std::vector<std::pair<Label, Label>> parens_; // As in pdt.h.
206  std::unordered_map<Label, size_t> paren_map_; // As in pdt.h.
207  // Maps between internal paren_id and external paren_id.
208  std::unordered_map<KeyPair<Level>, size_t, KeyPairHasher<Level>>
210  // Maps between internal stack ids and external stack id.
212  std::unordered_map<StackId, Config> stack_id_to_config_map_;
213  StackId next_stack_id_;
214  // Array of stacks.
215  PdtStack<StackId, Label> *stacks_[nlevels];
216 };
217 
218 template <typename StackId, typename Level, Level nlevels, MPdtType restrict>
220  const std::vector<std::pair<Level, Level>> &parens, // NB: Label = Level.
221  const std::vector<Level> &assignments)
222  : error_(false),
223  min_paren_(kNoLabel),
224  max_paren_(kNoLabel),
225  parens_(parens),
226  next_stack_id_(1) {
227  using Label = Level;
228  if (parens.size() != assignments.size()) {
229  FSTERROR() << "MPdtStack: Parens of different size from assignments";
230  error_ = true;
231  return;
232  }
233  std::vector<std::pair<Label, Label>> vectors[nlevels];
234  for (Level i = 0; i < assignments.size(); ++i) {
235  // Assignments here start at 0, so assuming the human-readable version has
236  // them starting at 1, we should subtract 1 here
237  const auto level = assignments[i] - 1;
238  if (level < 0 || level >= nlevels) {
239  FSTERROR() << "MPdtStack: Specified level " << level << " out of bounds";
240  error_ = true;
241  return;
242  }
243  const auto &pair = parens[i];
244  vectors[level].push_back(pair);
245  paren_levels_[pair.first] = level;
246  paren_levels_[pair.second] = level;
247  paren_map_[pair.first] = i;
248  paren_map_[pair.second] = i;
249  const KeyPair<Level> key(level, vectors[level].size() - 1);
250  paren_id_map_[key] = i;
251  if (min_paren_ == kNoLabel || pair.first < min_paren_) {
252  min_paren_ = pair.first;
253  }
254  if (pair.second < min_paren_) min_paren_ = pair.second;
255  if (max_paren_ == kNoLabel || pair.first > max_paren_) {
256  max_paren_ = pair.first;
257  }
258  if (pair.second > max_paren_) max_paren_ = pair.second;
259  }
261  Config neg_one;
262  Config zero;
263  for (Level level = 0; level < nlevels; ++level) {
264  stacks_[level] = new PdtStack<StackId, Label>(vectors[level]);
265  neg_one[level] = -1;
266  zero[level] = 0;
267  }
268  config_to_stack_id_map_[neg_one] = -1;
269  config_to_stack_id_map_[zero] = 0;
270  stack_id_to_config_map_[-1] = neg_one;
271  stack_id_to_config_map_[0] = zero;
272 }
273 
274 template <typename StackId, typename Level, Level nlevels, MPdtType restrict>
277  : error_(mstack.error_),
278  min_paren_(mstack.min_paren_),
279  max_paren_(mstack.max_paren_),
280  parens_(mstack.parens_),
282  for (const auto &kv : mstack.paren_levels_) {
283  paren_levels_[kv.first] = kv.second;
284  }
285  for (const auto &paren : mstack.parens_) parens_.push_back(paren);
286  for (const auto &kv : mstack.paren_map_) {
287  paren_map_[kv.first] = kv.second;
288  }
289  for (const auto &kv : mstack.paren_id_map_) {
290  paren_id_map_[kv.first] = kv.second;
291  }
292  for (auto it = mstack.config_to_stack_id_map_.begin();
293  it != mstack.config_to_stack_id_map_.end(); ++it) {
294  config_to_stack_id_map_[it->first] = it->second;
295  }
296  for (const auto &kv : mstack.stack_id_to_config_map_) {
298  const Config config(kv.second);
299  stack_id_to_config_map_[kv.first] = config;
300  }
301  for (Level level = 0; level < nlevels; ++level)
302  stacks_[level] = mstack.stacks_[level];
303 }
304 
305 template <typename StackId, typename Level, Level nlevels, MPdtType restrict>
307  Level label) {
308  // Non-paren.
309  if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_) {
310  return stack_id;
311  }
312  const auto it = paren_map_.find(label);
313  // Non-paren.
314  if (it == paren_map_.end()) return stack_id;
315  ssize_t paren_id = it->second;
316  // Gets the configuration associated with this stack_id.
317  const auto config = InternalStackIds(stack_id);
318  // Gets the level.
319  const auto level = paren_levels_.find(label)->second;
320  // If the label is an open paren we push:
321  //
322  // 1) if the restrict type is not MPDT_WRITE_RESTRICT, or
323  // 2) the restrict type is MPDT_WRITE_RESTRICT, and all the stacks above the
324  // level are empty.
325  if (label == parens_[paren_id].first) { // Open paren.
326  if (restrict == MPDT_WRITE_RESTRICT) {
327  for (Level upper_level = 0; upper_level < level; ++upper_level) {
328  if (!Empty(config, upper_level)) return -1; // Non-empty stack blocks.
329  }
330  }
331  // If the label is an close paren we pop:
332  //
333  // 1) if the restrict type is not MPDT_READ_RESTRICT, or
334  // 2) the restrict type is MPDT_READ_RESTRICT, and all the stacks above the
335  // level are empty.
336  } else if (restrict == MPDT_READ_RESTRICT) {
337  for (Level lower_level = 0; lower_level < level; ++lower_level) {
338  if (!Empty(config, lower_level)) return -1; // Non-empty stack blocks.
339  }
340  }
341  const auto nid = stacks_[level]->Find(config[level], label);
342  // If the new ID is -1, that means that there is no valid transition at the
343  // level we want.
344  if (nid == -1) {
345  return -1;
346  } else {
348  Config nconfig(config);
349  nconfig[level] = nid;
350  return ExternalStackId(nconfig);
351  }
352 }
353 
354 } // namespace internal
355 } // namespace fst
356 
357 #endif // FST_EXTENSIONS_MPDT_MPDT_H_
ConfigToStackId config_to_stack_id_map_
Definition: mpdt.h:211
PdtStack< StackId, Label > * stacks_[nlevels]
Definition: mpdt.h:215
size_t operator()(const KeyPair< Level > &keypair) const
Definition: mpdt.h:94
const StackId & operator[](const int index) const
Definition: mpdt.h:50
constexpr int kNoLabel
Definition: fst.h:179
bool operator()(const Config &x, const Config &y) const
Definition: mpdt.h:66
StackId & operator[](const int index)
Definition: mpdt.h:48
std::unordered_map< Label, Label > paren_levels_
Definition: mpdt.h:204
ssize_t Top(StackId stack_id) const
Definition: mpdt.h:125
const Config InternalStackIds(StackId stack_id) const
Definition: mpdt.h:181
size_t underlying_id
Definition: mpdt.h:83
std::array< StackId, nlevels > array_
Definition: mpdt.h:58
std::map< Config, StackId, CompConfig< StackId, Label, 2 >> ConfigToStackId
Definition: mpdt.h:107
StackConfig(const StackConfig< StackId, Level, nlevels > &config)
Definition: mpdt.h:44
StackId ExternalStackId(const Config &config)
Definition: mpdt.h:166
#define FSTERROR()
Definition: util.h:35
bool operator==(const KeyPair< Level > &rhs) const
Definition: mpdt.h:87
bool Empty(const Config &config, Level level) const
Definition: mpdt.h:189
MPdtType
Definition: mpdt.h:19
ssize_t ParenId(Label label) const
Definition: mpdt.h:142
bool AllEmpty(const Config &config)
Definition: mpdt.h:193
std::unordered_map< Label, size_t > paren_map_
Definition: mpdt.h:206
StackId next_stack_id_
Definition: mpdt.h:213
string PrintConfig(const Config &config) const
Definition: mpdt.h:148
std::unordered_map< StackId, Config > stack_id_to_config_map_
Definition: mpdt.h:212
std::unordered_map< KeyPair< Level >, size_t, KeyPairHasher< Level > > paren_id_map_
Definition: mpdt.h:209
KeyPair(Level level, size_t id)
Definition: mpdt.h:85
StackConfig & operator=(const StackConfig< StackId, Level, nlevels > &config)
Definition: mpdt.h:52
std::vector< std::pair< Label, Label > > parens_
Definition: mpdt.h:205
MPdtStack(const std::vector< std::pair< Label, Label >> &parens, const std::vector< Level > &assignments)
Definition: mpdt.h:219
StackId Find(StackId stack_id, Label label)
Definition: mpdt.h:306