FST  openfst-1.7.2
OpenFst Library
synchronize.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Synchronize an FST with bounded delay.
5 
6 #ifndef FST_SYNCHRONIZE_H_
7 #define FST_SYNCHRONIZE_H_
8 
9 #include <algorithm>
10 #include <string>
11 #include <unordered_map>
12 #include <unordered_set>
13 #include <utility>
14 #include <vector>
15 
16 #include <fst/cache.h>
17 #include <fst/test-properties.h>
18 
19 
20 namespace fst {
21 
23 
24 namespace internal {
25 
26 // Implementation class for SynchronizeFst.
27 // TODO(kbg,sorenj): Refactor to guarantee thread-safety.
28 
29 template <class Arc>
30 class SynchronizeFstImpl : public CacheImpl<Arc> {
31  public:
32  using Label = typename Arc::Label;
33  using StateId = typename Arc::StateId;
34  using Weight = typename Arc::Weight;
35 
40 
48 
49  using String = basic_string<Label>;
50 
51  struct Element {
52  Element() {}
53 
54  Element(StateId state_, const String *i, const String *o)
55  : state(state_), istring(i), ostring(o) {}
56 
57  StateId state; // Input state ID.
58  const String *istring; // Residual input labels.
59  const String *ostring; // Residual output labels.
60  // Residual strings are represented by const pointers to
61  // basic_string<Label> and are stored in a hash_set. The pointed
62  // memory is owned by the hash_set string_set_.
63  };
64 
66  : CacheImpl<Arc>(opts), fst_(fst.Copy()) {
67  SetType("synchronize");
68  const auto props = fst.Properties(kFstProperties, false);
72  }
73 
75  : CacheImpl<Arc>(impl), fst_(impl.fst_->Copy(true)) {
76  SetType("synchronize");
80  }
81 
82  ~SynchronizeFstImpl() override {
83  for (const auto *ptr : string_set_) delete ptr;
84  }
85 
87  if (!HasStart()) {
88  auto start = fst_->Start();
89  if (start == kNoStateId) return kNoStateId;
90  const auto *empty = FindString(new String());
91  start = FindState(Element(fst_->Start(), empty, empty));
92  SetStart(start);
93  }
94  return CacheImpl<Arc>::Start();
95  }
96 
98  if (!HasFinal(s)) {
99  const auto &element = elements_[s];
100  const auto weight = element.state == kNoStateId
101  ? Weight::One()
102  : fst_->Final(element.state);
103  if ((weight != Weight::Zero()) && (element.istring)->empty() &&
104  (element.ostring)->empty()) {
105  SetFinal(s, weight);
106  } else {
107  SetFinal(s, Weight::Zero());
108  }
109  }
110  return CacheImpl<Arc>::Final(s);
111  }
112 
113  size_t NumArcs(StateId s) {
114  if (!HasArcs(s)) Expand(s);
115  return CacheImpl<Arc>::NumArcs(s);
116  }
117 
119  if (!HasArcs(s)) Expand(s);
121  }
122 
124  if (!HasArcs(s)) Expand(s);
126  }
127 
128  uint64 Properties() const override { return Properties(kFstProperties); }
129 
130  // Sets error if found, returning other FST impl properties.
131  uint64 Properties(uint64 mask) const override {
132  if ((mask & kError) && fst_->Properties(kError, false)) {
133  SetProperties(kError, kError);
134  }
135  return FstImpl<Arc>::Properties(mask);
136  }
137 
139  if (!HasArcs(s)) Expand(s);
141  }
142 
143  // Returns the first character of the string obtained by concatenating the
144  // string and the label.
145  Label Car(const String *str, Label label = 0) const {
146  if (!str->empty()) {
147  return (*str)[0];
148  } else {
149  return label;
150  }
151  }
152 
153  // Computes the residual string obtained by removing the first
154  // character in the concatenation of the string and the label.
155  const String *Cdr(const String *str, Label label = 0) {
156  auto *r = new String();
157  for (size_t i = 1; i < str->size(); ++i) r->push_back((*str)[i]);
158  if (label && !(str->empty())) r->push_back(label);
159  return FindString(r);
160  }
161 
162  // Computes the concatenation of the string and the label.
163  const String *Concat(const String *str, Label label = 0) {
164  auto *r = new String();
165  for (size_t i = 0; i < str->size(); ++i) r->push_back((*str)[i]);
166  if (label) r->push_back(label);
167  return FindString(r);
168  }
169 
170  // Tests if the concatenation of the string and label is empty.
171  bool Empty(const String *str, Label label = 0) const {
172  if (str->empty()) {
173  return label == 0;
174  } else {
175  return false;
176  }
177  }
178 
179  // Finds the string pointed by s in the hash set. Transfers the pointer
180  // ownership to the hash set.
181  const String *FindString(const String *str) {
182  const auto insert_result = string_set_.insert(str);
183  if (!insert_result.second) {
184  delete str;
185  }
186  return *insert_result.first;
187  }
188 
189  // Finds state corresponding to an element. Creates new state if element
190  // is not found.
191  StateId FindState(const Element &element) {
192  const auto insert_result =
193  element_map_.insert(std::make_pair(element, elements_.size()));
194  if (insert_result.second) {
195  elements_.push_back(element);
196  }
197  return insert_result.first->second;
198  }
199 
200  // Computes the outgoing transitions from a state, creating new destination
201  // states as needed.
202  void Expand(StateId s) {
203  const auto element = elements_[s];
204  if (element.state != kNoStateId) {
205  for (ArcIterator<Fst<Arc>> aiter(*fst_, element.state); !aiter.Done();
206  aiter.Next()) {
207  const auto &arc = aiter.Value();
208  if (!Empty(element.istring, arc.ilabel) &&
209  !Empty(element.ostring, arc.olabel)) {
210  const auto *istring = Cdr(element.istring, arc.ilabel);
211  const auto *ostring = Cdr(element.ostring, arc.olabel);
212  EmplaceArc(s, Car(element.istring, arc.ilabel),
213  Car(element.ostring, arc.olabel), arc.weight,
214  FindState(Element(arc.nextstate, istring, ostring)));
215  } else {
216  const auto *istring = Concat(element.istring, arc.ilabel);
217  const auto *ostring = Concat(element.ostring, arc.olabel);
218  EmplaceArc(s, 0, 0, arc.weight,
219  FindState(Element(arc.nextstate, istring, ostring)));
220  }
221  }
222  }
223  const auto weight = element.state == kNoStateId
224  ? Weight::One()
225  : fst_->Final(element.state);
226  if ((weight != Weight::Zero()) &&
227  ((element.istring)->size() + (element.ostring)->size() > 0)) {
228  const auto *istring = Cdr(element.istring);
229  const auto *ostring = Cdr(element.ostring);
230  EmplaceArc(s, Car(element.istring), Car(element.ostring), weight,
232  }
233  SetArcs(s);
234  }
235 
236  private:
237  // Equality function for Elements; assumes strings have been hashed.
238  class ElementEqual {
239  public:
240  bool operator()(const Element &x, const Element &y) const {
241  return x.state == y.state && x.istring == y.istring &&
242  x.ostring == y.ostring;
243  }
244  };
245 
246  // Hash function for Elements to FST states.
247  class ElementKey {
248  public:
249  size_t operator()(const Element &x) const {
250  size_t key = x.state;
251  key = (key << 1) ^ (x.istring)->size();
252  for (size_t i = 0; i < (x.istring)->size(); ++i) {
253  key = (key << 1) ^ (*x.istring)[i];
254  }
255  key = (key << 1) ^ (x.ostring)->size();
256  for (size_t i = 0; i < (x.ostring)->size(); ++i) {
257  key = (key << 1) ^ (*x.ostring)[i];
258  }
259  return key;
260  }
261  };
262 
263  // Equality function for strings.
264  class StringEqual {
265  public:
266  bool operator()(const String *const &x, const String *const &y) const {
267  if (x->size() != y->size()) return false;
268  for (size_t i = 0; i < x->size(); ++i) {
269  if ((*x)[i] != (*y)[i]) return false;
270  }
271  return true;
272  }
273  };
274 
275  // Hash function for set of strings
276  class StringKey {
277  public:
278  size_t operator()(const String *const &x) const {
279  size_t key = x->size();
280  for (size_t i = 0; i < x->size(); ++i) key = (key << 1) ^ (*x)[i];
281  return key;
282  }
283  };
284 
285  using ElementMap =
286  std::unordered_map<Element, StateId, ElementKey, ElementEqual>;
287  using StringSet = std::unordered_set<const String *, StringKey, StringEqual>;
288 
289  std::unique_ptr<const Fst<Arc>> fst_;
290  std::vector<Element> elements_; // Maps FST state to Elements.
291  ElementMap element_map_; // Maps Elements to FST state.
292  StringSet string_set_;
293 };
294 
295 } // namespace internal
296 
297 // Synchronizes a transducer. This version is a delayed FST. The result is an
298 // equivalent FST that has the property that during the traversal of a path,
299 // the delay is either zero or strictly increasing, where the delay is the
300 // difference between the number of non-epsilon output labels and input labels
301 // along the path.
302 //
303 // For the algorithm to terminate, the input transducer must have bounded
304 // delay, i.e., the delay of every cycle must be zero.
305 //
306 // Complexity:
307 //
308 // - A has bounded delay: exponential.
309 // - A does not have bounded delay: does not terminate.
310 //
311 // For more information, see:
312 //
313 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
314 // algorithms. International Journal of Computer Science 14(6): 957-982.
315 //
316 // This class attaches interface to implementation and handles reference
317 // counting, delegating most methods to ImplToFst.
318 template <class A>
319 class SynchronizeFst : public ImplToFst<internal::SynchronizeFstImpl<A>> {
320  public:
321  using Arc = A;
322  using StateId = typename Arc::StateId;
323  using Weight = typename Arc::Weight;
324 
326  using State = typename Store::State;
328 
329  friend class ArcIterator<SynchronizeFst<A>>;
330  friend class StateIterator<SynchronizeFst<A>>;
331 
332  explicit SynchronizeFst(
333  const Fst<A> &fst,
335  : ImplToFst<Impl>(std::make_shared<Impl>(fst, opts)) {}
336 
337  // See Fst<>::Copy() for doc.
338  SynchronizeFst(const SynchronizeFst<Arc> &fst, bool safe = false)
339  : ImplToFst<Impl>(fst, safe) {}
340 
341  // Gets a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
342  SynchronizeFst<Arc> *Copy(bool safe = false) const override {
343  return new SynchronizeFst<Arc>(*this, safe);
344  }
345 
346  inline void InitStateIterator(StateIteratorData<Arc> *data) const override;
347 
348  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
349  GetMutableImpl()->InitArcIterator(s, data);
350  }
351 
352  private:
355 
356  SynchronizeFst &operator=(const SynchronizeFst &) = delete;
357 };
358 
359 // Specialization for SynchronizeFst.
360 template <class Arc>
362  : public CacheStateIterator<SynchronizeFst<Arc>> {
363  public:
365  : CacheStateIterator<SynchronizeFst<Arc>>(fst, fst.GetMutableImpl()) {}
366 };
367 
368 // Specialization for SynchronizeFst.
369 template <class Arc>
371  : public CacheArcIterator<SynchronizeFst<Arc>> {
372  public:
373  using StateId = typename Arc::StateId;
374 
376  : CacheArcIterator<SynchronizeFst<Arc>>(fst.GetMutableImpl(), s) {
377  if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->Expand(s);
378  }
379 };
380 
381 template <class Arc>
383  StateIteratorData<Arc> *data) const {
384  data->base = new StateIterator<SynchronizeFst<Arc>>(*this);
385 }
386 
387 // Synchronizes a transducer. This version writes the synchronized result to a
388 // MutableFst. The result will be an equivalent FST that has the property that
389 // during the traversal of a path, the delay is either zero or strictly
390 // increasing, where the delay is the difference between the number of
391 // non-epsilon output labels and input labels along the path.
392 //
393 // For the algorithm to terminate, the input transducer must have bounded
394 // delay, i.e., the delay of every cycle must be zero.
395 //
396 // Complexity:
397 //
398 // - A has bounded delay: exponential.
399 // - A does not have bounded delay: does not terminate.
400 //
401 // For more information, see:
402 //
403 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
404 // algorithms. International Journal of Computer Science 14(6): 957-982.
405 template <class Arc>
406 void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
407  // Caches only the last state for fastest copy.
408  const SynchronizeFstOptions opts(FLAGS_fst_default_cache_gc, 0);
409  *ofst = SynchronizeFst<Arc>(ifst, opts);
410 }
411 
412 } // namespace fst
413 
414 #endif // FST_SYNCHRONIZE_H_
void SetFinal(StateId s, Weight weight)
Definition: cache.h:905
StateIterator(const SynchronizeFst< Arc > &fst)
Definition: synchronize.h:364
uint64_t uint64
Definition: types.h:32
bool Empty(const String *str, Label label=0) const
Definition: synchronize.h:171
size_t NumOutputEpsilons(StateId s)
Definition: synchronize.h:123
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: synchronize.h:138
const SymbolTable * OutputSymbols() const
Definition: fst.h:690
basic_string< Label > String
Definition: synchronize.h:49
uint64 Properties(uint64 mask) const override
Definition: synchronize.h:131
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:700
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1148
constexpr uint64 kFstProperties
Definition: properties.h:301
constexpr uint64 kCopyProperties
Definition: properties.h:138
virtual uint64 Properties() const
Definition: fst.h:666
constexpr int kNoStateId
Definition: fst.h:180
Label Car(const String *str, Label label=0) const
Definition: synchronize.h:145
virtual uint64 Properties(uint64 mask, bool test) const =0
SynchronizeFst< Arc > * Copy(bool safe=false) const override
Definition: synchronize.h:342
void SetType(const string &type)
Definition: fst.h:664
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: synchronize.h:382
StateIteratorBase< Arc > * base
Definition: fst.h:351
Element(StateId state_, const String *i, const String *o)
Definition: synchronize.h:54
SynchronizeFst(const Fst< A > &fst, const SynchronizeFstOptions &opts=SynchronizeFstOptions())
Definition: synchronize.h:332
typename CacheStore::State State
Definition: cache.h:648
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: cache.h:1041
uint64 SynchronizeProperties(uint64 inprops)
Definition: properties.cc:358
SynchronizeFstImpl(const SynchronizeFstImpl &impl)
Definition: synchronize.h:74
void EmplaceArc(StateId s, T &&...ctor_args)
Definition: cache.h:946
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:696
typename Store::State State
Definition: synchronize.h:326
ArcIterator(const SynchronizeFst< Arc > &fst, StateId s)
Definition: synchronize.h:375
const String * Cdr(const String *str, Label label=0)
Definition: synchronize.h:155
typename internal::SynchronizeFstImpl< Arc >::Arc Arc
Definition: fst.h:188
typename Arc::Weight Weight
Definition: synchronize.h:34
CacheOptions SynchronizeFstOptions
Definition: synchronize.h:22
SynchronizeFst(const SynchronizeFst< Arc > &fst, bool safe=false)
Definition: synchronize.h:338
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1194
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:688
size_t NumInputEpsilons(StateId s)
Definition: synchronize.h:118
constexpr uint64 kError
Definition: properties.h:33
void Synchronize(const Fst< Arc > &ifst, MutableFst< Arc > *ofst)
Definition: synchronize.h:406
typename Arc::StateId StateId
Definition: synchronize.h:33
const String * FindString(const String *str)
Definition: synchronize.h:181
uint64 Properties() const override
Definition: synchronize.h:128
typename CacheState< Arc >::Arc Arc
Definition: cache.h:837
Impl * GetMutableImpl() const
Definition: fst.h:947
const String * Concat(const String *str, Label label=0)
Definition: synchronize.h:163
SynchronizeFstImpl(const Fst< Arc > &fst, const SynchronizeFstOptions &opts)
Definition: synchronize.h:65
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: synchronize.h:348
const Impl * GetImpl() const
Definition: fst.h:945
virtual const SymbolTable * OutputSymbols() const =0
StateId FindState(const Element &element)
Definition: synchronize.h:191