FST  openfst-1.8.2.post1
OpenFst Library
expand.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Expands an MPDT to an FST.
19 
20 #ifndef FST_EXTENSIONS_MPDT_EXPAND_H_
21 #define FST_EXTENSIONS_MPDT_EXPAND_H_
22 
23 #include <cstdint>
24 #include <vector>
25 
28 #include <fst/cache.h>
29 #include <fst/mutable-fst.h>
30 #include <fst/queue.h>
31 #include <fst/state-table.h>
32 #include <fst/test-properties.h>
33 
34 namespace fst {
35 
36 template <class Arc>
41 
43  const CacheOptions &opts = CacheOptions(), bool kp = false,
45  nullptr,
47  : CacheOptions(opts), keep_parentheses(kp), stack(s), state_table(st) {}
48 };
49 
50 // Properties for an expanded PDT.
51 inline uint64_t MPdtExpandProperties(uint64_t inprops) {
52  return inprops & (kAcceptor | kAcyclic | kInitialAcyclic | kUnweighted);
53 }
54 
55 namespace internal {
56 
57 // Implementation class for ExpandFst
58 template <class Arc>
59 class MPdtExpandFstImpl : public CacheImpl<Arc> {
60  public:
61  using Label = typename Arc::Label;
62  using StateId = typename Arc::StateId;
63  using Weight = typename Arc::Weight;
64 
65  using StackId = StateId;
68 
74 
75  using CacheBaseImpl<CacheState<Arc>>::PushArc;
76  using CacheBaseImpl<CacheState<Arc>>::HasArcs;
77  using CacheBaseImpl<CacheState<Arc>>::HasFinal;
78  using CacheBaseImpl<CacheState<Arc>>::HasStart;
79  using CacheBaseImpl<CacheState<Arc>>::SetArcs;
80  using CacheBaseImpl<CacheState<Arc>>::SetFinal;
81  using CacheBaseImpl<CacheState<Arc>>::SetStart;
82 
84  const std::vector<std::pair<Label, Label>> &parens,
85  const std::vector<Label> &assignments,
86  const MPdtExpandFstOptions<Arc> &opts)
87  : CacheImpl<Arc>(opts),
88  fst_(fst.Copy()),
89  stack_(opts.stack ? opts.stack : new ParenStack(parens, assignments)),
90  state_table_(opts.state_table ? opts.state_table
91  : new PdtStateTable<StateId, StackId>()),
92  own_stack_(!opts.stack),
93  own_state_table_(!opts.state_table),
94  keep_parentheses_(opts.keep_parentheses) {
95  SetType("expand");
96  const auto props = fst.Properties(kFstProperties, false);
97  SetProperties(MPdtExpandProperties(props), kCopyProperties);
98  SetInputSymbols(fst.InputSymbols());
99  SetOutputSymbols(fst.OutputSymbols());
100  }
101 
103  : CacheImpl<Arc>(impl),
104  fst_(impl.fst_->Copy(true)),
105  stack_(new ParenStack(*impl.stack_)),
106  state_table_(new PdtStateTable<StateId, StackId>()),
107  own_stack_(true),
108  own_state_table_(true),
109  keep_parentheses_(impl.keep_parentheses_) {
110  SetType("expand");
111  SetProperties(impl.Properties(), kCopyProperties);
112  SetInputSymbols(impl.InputSymbols());
113  SetOutputSymbols(impl.OutputSymbols());
114  }
115 
116  ~MPdtExpandFstImpl() override {
117  if (own_stack_) delete stack_;
118  if (own_state_table_) delete state_table_;
119  }
120 
122  if (!HasStart()) {
123  const auto s = fst_->Start();
124  if (s == kNoStateId) return kNoStateId;
125  const StateTuple tuple(s, 0);
126  const auto start = state_table_->FindState(tuple);
127  SetStart(start);
128  }
129  return CacheImpl<Arc>::Start();
130  }
131 
133  if (!HasFinal(s)) {
134  const auto &tuple = state_table_->Tuple(s);
135  const auto weight = fst_->Final(tuple.state_id);
136  SetFinal(s, (weight != Weight::Zero() && tuple.stack_id == 0)
137  ? weight
138  : Weight::Zero());
139  }
140  return CacheImpl<Arc>::Final(s);
141  }
142 
143  size_t NumArcs(StateId s) {
144  if (!HasArcs(s)) ExpandState(s);
145  return CacheImpl<Arc>::NumArcs(s);
146  }
147 
149  if (!HasArcs(s)) ExpandState(s);
151  }
152 
154  if (!HasArcs(s)) ExpandState(s);
156  }
157 
159  if (!HasArcs(s)) ExpandState(s);
161  }
162 
163  // Computes the outgoing transitions from a state, creating new destination
164  // states as needed.
166  const auto tuple = state_table_->Tuple(s);
167  for (ArcIterator<Fst<Arc>> aiter(*fst_, tuple.state_id); !aiter.Done();
168  aiter.Next()) {
169  auto arc = aiter.Value();
170  const auto stack_id = stack_->Find(tuple.stack_id, arc.ilabel);
171  if (stack_id == -1) {
172  continue; // Non-matching close parenthesis.
173  } else if ((stack_id != tuple.stack_id) && !keep_parentheses_) {
174  arc.ilabel = arc.olabel = 0; // Stack push/pop.
175  }
176  const StateTuple ntuple(arc.nextstate, stack_id);
177  arc.nextstate = state_table_->FindState(ntuple);
178  PushArc(s, arc);
179  }
180  SetArcs(s);
181  }
182 
183  const ParenStack &GetStack() const { return *stack_; }
184 
186  return *state_table_;
187  }
188 
189  private:
190  std::unique_ptr<const Fst<Arc>> fst_;
191  ParenStack *stack_;
192  PdtStateTable<StateId, StackId> *state_table_;
193  const bool own_stack_;
194  const bool own_state_table_;
195  const bool keep_parentheses_;
196 
197  MPdtExpandFstImpl &operator=(const MPdtExpandFstImpl &) = delete;
198 };
199 
200 } // namespace internal
201 
202 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
203 // This version is a delayed FST. In the MPDT, some transitions are labeled with
204 // open or close parentheses. To be interpreted as an MPDT, the parens for each
205 // stack must balance on a path. The open-close parenthesis label
206 // pairs are passed using the parens argument, and the assignment of those pairs
207 // to stacks is passed using the assignments argument. Expansion enforces the
208 // parenthesis constraints. The MPDT must be
209 // expandable as an FST.
210 //
211 // This class attaches interface to implementation and handles
212 // reference counting, delegating most methods to ImplToFst.
213 template <class A>
214 class MPdtExpandFst : public ImplToFst<internal::MPdtExpandFstImpl<A>> {
215  public:
216  using Arc = A;
217  using Label = typename Arc::Label;
218  using StateId = typename Arc::StateId;
219  using Weight = typename Arc::Weight;
220 
221  using StackId = StateId;
224  using State = typename Store::State;
226 
227  friend class ArcIterator<MPdtExpandFst<Arc>>;
229 
231  const std::vector<std::pair<Label, Label>> &parens,
232  const std::vector<Label> &assignments)
233  : ImplToFst<Impl>(std::make_shared<Impl>(fst, parens, assignments,
234  MPdtExpandFstOptions<Arc>())) {}
235 
237  const std::vector<std::pair<Label, Label>> &parens,
238  const std::vector<Label> &assignments,
239  const MPdtExpandFstOptions<Arc> &opts)
240  : ImplToFst<Impl>(
241  std::make_shared<Impl>(fst, parens, assignments, opts)) {}
242 
243  // See Fst<>::Copy() for doc.
244  MPdtExpandFst(const MPdtExpandFst<Arc> &fst, bool safe = false)
245  : ImplToFst<Impl>(fst, safe) {}
246 
247  // Get a copy of this ExpandFst. See Fst<>::Copy() for further doc.
248  MPdtExpandFst<Arc> *Copy(bool safe = false) const override {
249  return new MPdtExpandFst<A>(*this, safe);
250  }
251 
252  inline void InitStateIterator(StateIteratorData<Arc> *data) const override;
253 
254  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
255  GetMutableImpl()->InitArcIterator(s, data);
256  }
257 
258  const ParenStack &GetStack() const { return GetImpl()->GetStack(); }
259 
261  return GetImpl()->GetStateTable();
262  }
263 
264  private:
267 
268  void operator=(const MPdtExpandFst &) = delete;
269 };
270 
271 // Specialization for MPdtExpandFst.
272 template <class Arc>
274  : public CacheStateIterator<MPdtExpandFst<Arc>> {
275  public:
277  : CacheStateIterator<MPdtExpandFst<Arc>>(fst, fst.GetMutableImpl()) {}
278 };
279 
280 // Specialization for MPdtExpandFst.
281 template <class Arc>
283  : public CacheArcIterator<MPdtExpandFst<Arc>> {
284  public:
285  using StateId = typename Arc::StateId;
286 
288  : CacheArcIterator<MPdtExpandFst<Arc>>(fst.GetMutableImpl(), s) {
289  if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->ExpandState(s);
290  }
291 };
292 
293 template <class Arc>
295  StateIteratorData<Arc> *data) const {
296  data->base = std::make_unique<StateIterator<MPdtExpandFst<Arc>>>(*this);
297 }
298 
300  bool connect;
302 
303  explicit MPdtExpandOptions(bool connect = true, bool keep_parentheses = false)
304  : connect(connect), keep_parentheses(keep_parentheses) {}
305 };
306 
307 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
308 // This version writes the expanded PDT to a mutable FST. In the MPDT, some
309 // transitions are labeled with open or close parentheses. To be interpreted as
310 // an MPDT, the parens for each stack must balance on a path. The open-close
311 // parenthesis label pair sets are passed using the parens argument, and the
312 // assignment of those pairs to stacks is passed using the assignments argument.
313 // The expansion enforces the parenthesis constraints. The MPDT must be
314 // expandable as an FST.
315 template <class Arc>
316 void Expand(
317  const Fst<Arc> &ifst,
318  const std::vector<std::pair<typename Arc::Label, typename Arc::Label>>
319  &parens,
320  const std::vector<typename Arc::Label> &assignments, MutableFst<Arc> *ofst,
321  const MPdtExpandOptions &opts) {
323  eopts.gc_limit = 0;
324  eopts.keep_parentheses = opts.keep_parentheses;
325  *ofst = MPdtExpandFst<Arc>(ifst, parens, assignments, eopts);
326  if (opts.connect) Connect(ofst);
327 }
328 
329 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
330 // This version writes the expanded PDT to a mutable FST. In the MPDT, some
331 // transitions are labeled with open or close parentheses. To be interpreted as
332 // an MPDT, the parens for each stack must balance on a path. The open-close
333 // parenthesis label pair sets are passed using the parens argument, and the
334 // assignment of those pairs to stacks is passed using the assignments argument.
335 // The expansion enforces the parenthesis constraints. The MPDT must be
336 // expandable as an FST.
337 template <class Arc>
338 void Expand(
339  const Fst<Arc> &ifst,
340  const std::vector<std::pair<typename Arc::Label, typename Arc::Label>>
341  &parens,
342  const std::vector<typename Arc::Label> &assignments, MutableFst<Arc> *ofst,
343  bool connect = true, bool keep_parentheses = false) {
344  const MPdtExpandOptions opts(connect, keep_parentheses);
345  Expand(ifst, parens, assignments, ofst, opts);
346 }
347 
348 } // namespace fst
349 
350 #endif // FST_EXTENSIONS_MPDT_EXPAND_H_
ssize_t NumOutputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:114
CacheOptions(bool gc=FST_FLAGS_fst_default_cache_gc, size_t gc_limit=FST_FLAGS_fst_default_cache_gc_limit)
Definition: cache.h:47
virtual uint64_t Properties(uint64_t mask, bool test) const =0
typename Store::State State
Definition: expand.h:224
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: expand.h:294
const ParenStack & GetStack() const
Definition: expand.h:183
typename Arc::Weight Weight
Definition: expand.h:219
MPdtExpandFstImpl(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments, const MPdtExpandFstOptions< Arc > &opts)
Definition: expand.h:83
size_t NumInputEpsilons(StateId s)
Definition: expand.h:148
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:97
const SymbolTable * OutputSymbols() const
Definition: fst.h:762
constexpr uint64_t kInitialAcyclic
Definition: properties.h:115
SetType
Definition: set-weight.h:52
typename MPdtExpandFst< Arc >::Arc Arc
Definition: cache.h:1149
void Connect(MutableFst< Arc > *fst)
Definition: connect.h:278
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:103
constexpr int kNoStateId
Definition: fst.h:202
typename Arc::Label Label
Definition: expand.h:61
ssize_t NumInputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:108
StateIterator(const MPdtExpandFst< Arc > &fst)
Definition: expand.h:276
virtual uint64_t Properties() const
Definition: fst.h:702
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: expand.h:158
constexpr uint64_t kCopyProperties
Definition: properties.h:162
constexpr uint64_t kAcyclic
Definition: properties.h:110
uint64_t MPdtExpandProperties(uint64_t inprops)
Definition: expand.h:51
typename FirstCacheStore< VectorCacheStore< CacheState< Arc > > >::State State
Definition: cache.h:666
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: expand.h:254
const PdtStateTable< StateId, StackId > & GetStateTable() const
Definition: expand.h:185
PdtStateTable< typename Arc::StateId, typename Arc::StateId > * state_table
Definition: expand.h:40
const ParenStack & GetStack() const
Definition: expand.h:258
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
typename Arc::StateId StateId
Definition: expand.h:218
constexpr uint64_t kFstProperties
Definition: properties.h:325
constexpr uint64_t kUnweighted
Definition: properties.h:105
void ExpandState(StateId s)
Definition: expand.h:165
MPdtExpandOptions(bool connect=true, bool keep_parentheses=false)
Definition: expand.h:303
size_t NumArcs(StateId s)
Definition: expand.h:143
internal::MPdtStack< typename Arc::StateId, typename Arc::Label > * stack
Definition: expand.h:39
typename MPdtExpandFst< Arc >::Arc Arc
Definition: cache.h:1195
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:760
MPdtExpandFst(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments)
Definition: expand.h:230
MPdtExpandFst< Arc > * Copy(bool safe=false) const override
Definition: expand.h:248
ArcIterator(const MPdtExpandFst< Arc > &fst, StateId s)
Definition: expand.h:287
size_t NumOutputEpsilons(StateId s)
Definition: expand.h:153
typename Arc::Weight Weight
Definition: expand.h:63
typename Arc::StateId StateId
Definition: expand.h:285
MPdtExpandFstOptions(const CacheOptions &opts=CacheOptions(), bool kp=false, internal::MPdtStack< typename Arc::StateId, typename Arc::Label > *s=nullptr, PdtStateTable< typename Arc::StateId, typename Arc::StateId > *st=nullptr)
Definition: expand.h:42
void Expand(const Fst< Arc > &ifst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &parens, const std::vector< typename Arc::Label > &assignments, MutableFst< Arc > *ofst, const MPdtExpandOptions &opts)
Definition: expand.h:316
typename CacheState< Arc >::Arc Arc
Definition: cache.h:852
Impl * GetMutableImpl() const
Definition: fst.h:1028
Weight Final(StateId s)
Definition: expand.h:132
MPdtExpandFstImpl(const MPdtExpandFstImpl &impl)
Definition: expand.h:102
typename Arc::StateId StateId
Definition: expand.h:62
size_t gc_limit
Definition: cache.h:45
MPdtExpandFst(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments, const MPdtExpandFstOptions< Arc > &opts)
Definition: expand.h:236
typename Arc::Label Label
Definition: expand.h:217
const PdtStateTable< StateId, StackId > & GetStateTable() const
Definition: expand.h:260
MPdtExpandFst(const MPdtExpandFst< Arc > &fst, bool safe=false)
Definition: expand.h:244
const Impl * GetImpl() const
Definition: fst.h:1026
constexpr uint64_t kAcceptor
Definition: properties.h:63
virtual const SymbolTable * OutputSymbols() const =0