FST  openfst-1.7.2
OpenFst Library
expand.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Expands an MPDT to an FST.
5 
6 #ifndef FST_EXTENSIONS_MPDT_EXPAND_H_
7 #define FST_EXTENSIONS_MPDT_EXPAND_H_
8 
9 #include <vector>
10 
13 #include <fst/cache.h>
14 #include <fst/mutable-fst.h>
15 #include <fst/queue.h>
16 #include <fst/state-table.h>
17 #include <fst/test-properties.h>
18 
19 namespace fst {
20 
21 template <class Arc>
26 
28  const CacheOptions &opts = CacheOptions(), bool kp = false,
30  nullptr,
32  : CacheOptions(opts), keep_parentheses(kp), stack(s), state_table(st) {}
33 };
34 
35 // Properties for an expanded PDT.
37  return inprops & (kAcceptor | kAcyclic | kInitialAcyclic | kUnweighted);
38 }
39 
40 namespace internal {
41 
42 // Implementation class for ExpandFst
43 template <class Arc>
44 class MPdtExpandFstImpl : public CacheImpl<Arc> {
45  public:
46  using Label = typename Arc::Label;
47  using StateId = typename Arc::StateId;
48  using Weight = typename Arc::Weight;
49 
50  using StackId = StateId;
53 
59 
60  using CacheBaseImpl<CacheState<Arc>>::PushArc;
61  using CacheBaseImpl<CacheState<Arc>>::HasArcs;
62  using CacheBaseImpl<CacheState<Arc>>::HasFinal;
63  using CacheBaseImpl<CacheState<Arc>>::HasStart;
64  using CacheBaseImpl<CacheState<Arc>>::SetArcs;
65  using CacheBaseImpl<CacheState<Arc>>::SetFinal;
66  using CacheBaseImpl<CacheState<Arc>>::SetStart;
67 
69  const std::vector<std::pair<Label, Label>> &parens,
70  const std::vector<Label> &assignments,
71  const MPdtExpandFstOptions<Arc> &opts)
72  : CacheImpl<Arc>(opts),
73  fst_(fst.Copy()),
74  stack_(opts.stack ? opts.stack : new ParenStack(parens, assignments)),
75  state_table_(opts.state_table ? opts.state_table
76  : new PdtStateTable<StateId, StackId>()),
77  own_stack_(!opts.stack),
78  own_state_table_(!opts.state_table),
79  keep_parentheses_(opts.keep_parentheses) {
80  SetType("expand");
81  const auto props = fst.Properties(kFstProperties, false);
82  SetProperties(MPdtExpandProperties(props), kCopyProperties);
83  SetInputSymbols(fst.InputSymbols());
84  SetOutputSymbols(fst.OutputSymbols());
85  }
86 
88  : CacheImpl<Arc>(impl),
89  fst_(impl.fst_->Copy(true)),
90  stack_(new ParenStack(*impl.stack_)),
91  state_table_(new PdtStateTable<StateId, StackId>()),
92  own_stack_(true),
93  own_state_table_(true),
94  keep_parentheses_(impl.keep_parentheses_) {
95  SetType("expand");
96  SetProperties(impl.Properties(), kCopyProperties);
97  SetInputSymbols(impl.InputSymbols());
98  SetOutputSymbols(impl.OutputSymbols());
99  }
100 
101  ~MPdtExpandFstImpl() override {
102  if (own_stack_) delete stack_;
103  if (own_state_table_) delete state_table_;
104  }
105 
107  if (!HasStart()) {
108  const auto s = fst_->Start();
109  if (s == kNoStateId) return kNoStateId;
110  const StateTuple tuple(s, 0);
111  const auto start = state_table_->FindState(tuple);
112  SetStart(start);
113  }
114  return CacheImpl<Arc>::Start();
115  }
116 
118  if (!HasFinal(s)) {
119  const auto &tuple = state_table_->Tuple(s);
120  const auto weight = fst_->Final(tuple.state_id);
121  SetFinal(s,
122  (weight != Weight::Zero() && tuple.stack_id == 0)
123  ? weight
124  : Weight::Zero());
125  }
126  return CacheImpl<Arc>::Final(s);
127  }
128 
129  size_t NumArcs(StateId s) {
130  if (!HasArcs(s)) ExpandState(s);
131  return CacheImpl<Arc>::NumArcs(s);
132  }
133 
135  if (!HasArcs(s)) ExpandState(s);
137  }
138 
140  if (!HasArcs(s)) ExpandState(s);
142  }
143 
145  if (!HasArcs(s)) ExpandState(s);
147  }
148 
149  // Computes the outgoing transitions from a state, creating new destination
150  // states as needed.
152  const auto tuple = state_table_->Tuple(s);
153  for (ArcIterator<Fst<Arc>> aiter(*fst_, tuple.state_id); !aiter.Done();
154  aiter.Next()) {
155  auto arc = aiter.Value();
156  const auto stack_id = stack_->Find(tuple.stack_id, arc.ilabel);
157  if (stack_id == -1) {
158  continue; // Non-matching close parenthesis.
159  } else if ((stack_id != tuple.stack_id) && !keep_parentheses_) {
160  arc.ilabel = arc.olabel = 0; // Stack push/pop.
161  }
162  const StateTuple ntuple(arc.nextstate, stack_id);
163  arc.nextstate = state_table_->FindState(ntuple);
164  PushArc(s, arc);
165  }
166  SetArcs(s);
167  }
168 
169  const ParenStack &GetStack() const { return *stack_; }
170 
172  return *state_table_;
173  }
174 
175  private:
176  std::unique_ptr<const Fst<Arc>> fst_;
177  ParenStack *stack_;
178  PdtStateTable<StateId, StackId> *state_table_;
179  const bool own_stack_;
180  const bool own_state_table_;
181  const bool keep_parentheses_;
182 
183  MPdtExpandFstImpl &operator=(const MPdtExpandFstImpl &) = delete;
184 };
185 
186 } // namespace internal
187 
188 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
189 // This version is a delayed FST. In the MPDT, some transitions are labeled with
190 // open or close parentheses. To be interpreted as an MPDT, the parens for each
191 // stack must balance on a path. The open-close parenthesis label
192 // pairs are passed using the parens argument, and the assignment of those pairs
193 // to stacks is passed using the assignments argument. Expansion enforces the
194 // parenthesis constraints. The MPDT must be
195 // expandable as an FST.
196 //
197 // This class attaches interface to implementation and handles
198 // reference counting, delegating most methods to ImplToFst.
199 template <class A>
200 class MPdtExpandFst : public ImplToFst<internal::MPdtExpandFstImpl<A>> {
201  public:
202  using Arc = A;
203  using Label = typename Arc::Label;
204  using StateId = typename Arc::StateId;
205  using Weight = typename Arc::Weight;
206 
207  using StackId = StateId;
210  using State = typename Store::State;
212 
213  friend class ArcIterator<MPdtExpandFst<Arc>>;
215 
217  const std::vector<std::pair<Label, Label>> &parens,
218  const std::vector<Label> &assignments)
219  : ImplToFst<Impl>(std::make_shared<Impl>(fst, parens, assignments,
220  MPdtExpandFstOptions<Arc>())) {}
221 
223  const std::vector<std::pair<Label, Label>> &parens,
224  const std::vector<Label> &assignments,
225  const MPdtExpandFstOptions<Arc> &opts)
226  : ImplToFst<Impl>(
227  std::make_shared<Impl>(fst, parens, assignments, opts)) {}
228 
229  // See Fst<>::Copy() for doc.
230  MPdtExpandFst(const MPdtExpandFst<Arc> &fst, bool safe = false)
231  : ImplToFst<Impl>(fst, safe) {}
232 
233  // Get a copy of this ExpandFst. See Fst<>::Copy() for further doc.
234  MPdtExpandFst<Arc> *Copy(bool safe = false) const override {
235  return new MPdtExpandFst<A>(*this, safe);
236  }
237 
238  inline void InitStateIterator(StateIteratorData<Arc> *data) const override;
239 
240  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
241  GetMutableImpl()->InitArcIterator(s, data);
242  }
243 
244  const ParenStack &GetStack() const { return GetImpl()->GetStack(); }
245 
247  return GetImpl()->GetStateTable();
248  }
249 
250  private:
253 
254  void operator=(const MPdtExpandFst &) = delete;
255 };
256 
257 // Specialization for MPdtExpandFst.
258 template <class Arc>
260  : public CacheStateIterator<MPdtExpandFst<Arc>> {
261  public:
263  : CacheStateIterator<MPdtExpandFst<Arc>>(fst, fst.GetMutableImpl()) {}
264 };
265 
266 // Specialization for MPdtExpandFst.
267 template <class Arc>
269  : public CacheArcIterator<MPdtExpandFst<Arc>> {
270  public:
271  using StateId = typename Arc::StateId;
272 
274  : CacheArcIterator<MPdtExpandFst<Arc>>(fst.GetMutableImpl(), s) {
275  if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->ExpandState(s);
276  }
277 };
278 
279 template <class Arc>
281  StateIteratorData<Arc> *data) const {
282  data->base = new StateIterator<MPdtExpandFst<Arc>>(*this);
283 }
284 
286  bool connect;
288 
289  explicit MPdtExpandOptions(bool connect = true, bool keep_parentheses = false)
290  : connect(connect), keep_parentheses(keep_parentheses) {}
291 };
292 
293 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
294 // This version writes the expanded PDT to a mutable FST. In the MPDT, some
295 // transitions are labeled with open or close parentheses. To be interpreted as
296 // an MPDT, the parens for each stack must balance on a path. The open-close
297 // parenthesis label pair sets are passed using the parens argument, and the
298 // assignment of those pairs to stacks is passed using the assignments argument.
299 // The expansion enforces the parenthesis constraints. The MPDT must be
300 // expandable as an FST.
301 template <class Arc>
302 void Expand(const Fst<Arc> &ifst,
303  const std::vector<
304  std::pair<typename Arc::Label, typename Arc::Label>> &parens,
305  const std::vector<typename Arc::Label> &assignments,
306  MutableFst<Arc> *ofst, const MPdtExpandOptions &opts) {
308  eopts.gc_limit = 0;
309  eopts.keep_parentheses = opts.keep_parentheses;
310  *ofst = MPdtExpandFst<Arc>(ifst, parens, assignments, eopts);
311  if (opts.connect) Connect(ofst);
312 }
313 
314 // Expands a multi-pushdown transducer (MPDT) encoded as an FST into an FST.
315 // This version writes the expanded PDT to a mutable FST. In the MPDT, some
316 // transitions are labeled with open or close parentheses. To be interpreted as
317 // an MPDT, the parens for each stack must balance on a path. The open-close
318 // parenthesis label pair sets are passed using the parens argument, and the
319 // assignment of those pairs to stacks is passed using the assignments argument.
320 // The expansion enforces the parenthesis constraints. The MPDT must be
321 // expandable as an FST.
322 template <class Arc>
323 void Expand(const Fst<Arc> &ifst,
324  const std::vector<std::pair<typename Arc::Label,
325  typename Arc::Label>> &parens,
326  const std::vector<typename Arc::Label> &assignments,
327  MutableFst<Arc> *ofst, bool connect = true,
328  bool keep_parentheses = false) {
329  const MPdtExpandOptions opts(connect, keep_parentheses);
330  Expand(ifst, parens, assignments, ofst, opts);
331 }
332 
333 } // namespace fst
334 
335 #endif // FST_EXTENSIONS_MPDT_EXPAND_H_
ssize_t NumOutputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:99
constexpr uint64 kInitialAcyclic
Definition: properties.h:97
uint64_t uint64
Definition: types.h:32
typename Store::State State
Definition: expand.h:210
uint64 MPdtExpandProperties(uint64 inprops)
Definition: expand.h:36
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: expand.h:280
const ParenStack & GetStack() const
Definition: expand.h:169
typename Arc::Weight Weight
Definition: expand.h:205
MPdtExpandFstImpl(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments, const MPdtExpandFstOptions< Arc > &opts)
Definition: expand.h:68
size_t NumInputEpsilons(StateId s)
Definition: expand.h:134
Arc::Weight Final(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:82
const SymbolTable * OutputSymbols() const
Definition: fst.h:690
SetType
Definition: set-weight.h:37
typename MPdtExpandFst< Arc >::Arc Arc
Definition: cache.h:1148
void Connect(MutableFst< Arc > *fst)
Definition: connect.h:268
ssize_t NumArcs(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:88
constexpr uint64 kFstProperties
Definition: properties.h:301
constexpr uint64 kCopyProperties
Definition: properties.h:138
virtual uint64 Properties() const
Definition: fst.h:666
constexpr int kNoStateId
Definition: fst.h:180
virtual uint64 Properties(uint64 mask, bool test) const =0
constexpr uint64 kUnweighted
Definition: properties.h:87
StateIteratorBase< Arc > * base
Definition: fst.h:351
CacheOptions(bool gc=FLAGS_fst_default_cache_gc, size_t gc_limit=FLAGS_fst_default_cache_gc_limit)
Definition: cache.h:31
typename Arc::Label Label
Definition: expand.h:46
ssize_t NumInputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:93
StateIterator(const MPdtExpandFst< Arc > &fst)
Definition: expand.h:262
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: expand.h:144
typename FirstCacheStore< VectorCacheStore< CacheState< Arc > > >::State State
Definition: cache.h:648
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: expand.h:240
const PdtStateTable< StateId, StackId > & GetStateTable() const
Definition: expand.h:171
PdtStateTable< typename Arc::StateId, typename Arc::StateId > * state_table
Definition: expand.h:25
const ParenStack & GetStack() const
Definition: expand.h:244
constexpr uint64 kAcceptor
Definition: properties.h:45
typename Arc::StateId StateId
Definition: expand.h:204
void ExpandState(StateId s)
Definition: expand.h:151
MPdtExpandOptions(bool connect=true, bool keep_parentheses=false)
Definition: expand.h:289
size_t NumArcs(StateId s)
Definition: expand.h:129
internal::MPdtStack< typename Arc::StateId, typename Arc::Label > * stack
Definition: expand.h:24
typename MPdtExpandFst< Arc >::Arc Arc
Definition: cache.h:1194
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:688
MPdtExpandFst(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments)
Definition: expand.h:216
constexpr uint64 kAcyclic
Definition: properties.h:92
MPdtExpandFst< Arc > * Copy(bool safe=false) const override
Definition: expand.h:234
ArcIterator(const MPdtExpandFst< Arc > &fst, StateId s)
Definition: expand.h:273
size_t NumOutputEpsilons(StateId s)
Definition: expand.h:139
typename Arc::Weight Weight
Definition: expand.h:48
typename Arc::StateId StateId
Definition: expand.h:271
MPdtExpandFstOptions(const CacheOptions &opts=CacheOptions(), bool kp=false, internal::MPdtStack< typename Arc::StateId, typename Arc::Label > *s=nullptr, PdtStateTable< typename Arc::StateId, typename Arc::StateId > *st=nullptr)
Definition: expand.h:27
void Expand(const Fst< Arc > &ifst, const std::vector< std::pair< typename Arc::Label, typename Arc::Label >> &parens, const std::vector< typename Arc::Label > &assignments, MutableFst< Arc > *ofst, const MPdtExpandOptions &opts)
Definition: expand.h:302
typename CacheState< Arc >::Arc Arc
Definition: cache.h:837
Impl * GetMutableImpl() const
Definition: fst.h:947
Weight Final(StateId s)
Definition: expand.h:117
MPdtExpandFstImpl(const MPdtExpandFstImpl &impl)
Definition: expand.h:87
typename Arc::StateId StateId
Definition: expand.h:47
size_t gc_limit
Definition: cache.h:29
MPdtExpandFst(const Fst< Arc > &fst, const std::vector< std::pair< Label, Label >> &parens, const std::vector< Label > &assignments, const MPdtExpandFstOptions< Arc > &opts)
Definition: expand.h:222
typename Arc::Label Label
Definition: expand.h:203
const PdtStateTable< StateId, StackId > & GetStateTable() const
Definition: expand.h:246
MPdtExpandFst(const MPdtExpandFst< Arc > &fst, bool safe=false)
Definition: expand.h:230
const Impl * GetImpl() const
Definition: fst.h:945
virtual const SymbolTable * OutputSymbols() const =0