FST  openfst-1.8.2
OpenFst Library
synchronize.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Synchronize an FST with bounded delay.
19 
20 #ifndef FST_SYNCHRONIZE_H_
21 #define FST_SYNCHRONIZE_H_
22 
23 #include <algorithm>
24 #include <cstdint>
25 #include <functional>
26 #include <memory>
27 #include <string>
28 #include <string_view>
29 #include <vector>
30 
31 #include <fst/cache.h>
32 #include <fst/test-properties.h>
33 
34 #include <unordered_map>
35 #include <unordered_set>
36 
37 namespace fst {
38 
40 
41 namespace internal {
42 
43 // Implementation class for SynchronizeFst.
44 // TODO(kbg,sorenj): Refactor to guarantee thread-safety.
45 
46 template <class Arc>
47 class SynchronizeFstImpl : public CacheImpl<Arc> {
48  public:
49  using Label = typename Arc::Label;
50  using StateId = typename Arc::StateId;
51  using Weight = typename Arc::Weight;
52 
57 
65 
66  using String = std::basic_string<Label>;
67  using StringView = std::basic_string_view<Label>;
68 
69  struct Element {
70  Element() {}
71 
73  : state(state_), istring(i), ostring(o) {}
74 
75  StateId state; // Input state ID.
76  StringView istring; // Residual input labels.
77  StringView ostring; // Residual output labels.
78  // Residual strings are represented by std::basic_string_view<Label> whose
79  // values are owned by the hash set string_set_.
80  };
81 
83  : CacheImpl<Arc>(opts), fst_(fst.Copy()) {
84  SetType("synchronize");
85  const auto props = fst.Properties(kFstProperties, false);
89  }
90 
92  : CacheImpl<Arc>(impl), fst_(impl.fst_->Copy(true)) {
93  SetType("synchronize");
97  }
98 
100  if (!HasStart()) {
101  auto start = fst_->Start();
102  if (start == kNoStateId) return kNoStateId;
103  const StringView empty = FindString(String());
104  start = FindState(Element(fst_->Start(), empty, empty));
105  SetStart(start);
106  }
107  return CacheImpl<Arc>::Start();
108  }
109 
111  if (!HasFinal(s)) {
112  const auto &element = elements_[s];
113  const auto weight = element.state == kNoStateId
114  ? Weight::One()
115  : fst_->Final(element.state);
116  if ((weight != Weight::Zero()) && element.istring.empty() &&
117  element.ostring.empty()) {
118  SetFinal(s, weight);
119  } else {
120  SetFinal(s, Weight::Zero());
121  }
122  }
123  return CacheImpl<Arc>::Final(s);
124  }
125 
126  size_t NumArcs(StateId s) {
127  if (!HasArcs(s)) Expand(s);
128  return CacheImpl<Arc>::NumArcs(s);
129  }
130 
132  if (!HasArcs(s)) Expand(s);
134  }
135 
137  if (!HasArcs(s)) Expand(s);
139  }
140 
141  uint64_t Properties() const override { return Properties(kFstProperties); }
142 
143  // Sets error if found, returning other FST impl properties.
144  uint64_t Properties(uint64_t mask) const override {
145  if ((mask & kError) && fst_->Properties(kError, false)) {
146  SetProperties(kError, kError);
147  }
148  return FstImpl<Arc>::Properties(mask);
149  }
150 
152  if (!HasArcs(s)) Expand(s);
154  }
155 
156  // Returns the first character of the string obtained by concatenating the
157  // string and the label.
158  Label Car(StringView str, Label label = 0) const {
159  if (!str.empty()) {
160  return str[0];
161  } else {
162  return label;
163  }
164  }
165 
166  // Computes the residual string obtained by removing the first
167  // character in the concatenation of the string and the label.
168  StringView Cdr(StringView str, Label label = 0) {
169  if (str.empty()) return FindString(String());
170  return Concat(str.substr(1), label);
171  }
172 
173  // Computes the concatenation of the string and the label.
174  StringView Concat(StringView str, Label label = 0) {
175  String r(str);
176  if (label) r.push_back(label);
177  return FindString(std::move(r));
178  }
179 
180  // Tests if the concatenation of the string and label is empty.
181  bool Empty(StringView str, Label label = 0) const {
182  if (str.empty()) {
183  return label == 0;
184  } else {
185  return false;
186  }
187  }
188 
190  const auto [str_it, unused] = string_set_.insert(std::forward<String>(str));
191  return *str_it;
192  }
193 
194  // Finds state corresponding to an element. Creates new state if element
195  // is not found.
196  StateId FindState(const Element &element) {
197  const auto &[iter, inserted] =
198  element_map_.emplace(element, elements_.size());
199  if (inserted) {
200  elements_.push_back(element);
201  }
202  return iter->second;
203  }
204 
205  // Computes the outgoing transitions from a state, creating new destination
206  // states as needed.
207  void Expand(StateId s) {
208  const auto element = elements_[s];
209  if (element.state != kNoStateId) {
210  for (ArcIterator<Fst<Arc>> aiter(*fst_, element.state); !aiter.Done();
211  aiter.Next()) {
212  const auto &arc = aiter.Value();
213  if (!Empty(element.istring, arc.ilabel) &&
214  !Empty(element.ostring, arc.olabel)) {
215  StringView istring = Cdr(element.istring, arc.ilabel);
216  StringView ostring = Cdr(element.ostring, arc.olabel);
217  EmplaceArc(s, Car(element.istring, arc.ilabel),
218  Car(element.ostring, arc.olabel), arc.weight,
219  FindState(Element(arc.nextstate, istring, ostring)));
220  } else {
221  StringView istring = Concat(element.istring, arc.ilabel);
222  StringView ostring = Concat(element.ostring, arc.olabel);
223  EmplaceArc(s, 0, 0, arc.weight,
224  FindState(Element(arc.nextstate, istring, ostring)));
225  }
226  }
227  }
228  const auto weight = element.state == kNoStateId
229  ? Weight::One()
230  : fst_->Final(element.state);
231  if ((weight != Weight::Zero()) &&
232  (element.istring.size() + element.ostring.size() > 0)) {
233  StringView istring = Cdr(element.istring);
234  StringView ostring = Cdr(element.ostring);
235  EmplaceArc(s, Car(element.istring), Car(element.ostring), weight,
236  FindState(Element(kNoStateId, istring, ostring)));
237  }
238  SetArcs(s);
239  }
240 
241  private:
242  // Equality function for Elements; assumes strings have been hashed.
243  class ElementEqual {
244  public:
245  bool operator()(const Element &x, const Element &y) const {
246  return x.state == y.state && x.istring.data() == y.istring.data() &&
247  x.ostring.data() == y.ostring.data();
248  }
249  };
250 
251  // Hash function for Elements to FST states.
252  class ElementKey {
253  public:
254  size_t operator()(const Element &x) const {
255  size_t key = x.state;
256  key = (key << 1) ^ x.istring.size();
257  for (Label label : x.istring) {
258  key = (key << 1) ^ label;
259  }
260  key = (key << 1) ^ x.ostring.size();
261  for (Label label : x.ostring) {
262  key = (key << 1) ^ label;
263  }
264  return key;
265  }
266  };
267 
268  // Hash function for set of strings. This only has to be specified since
269  // `std::hash<std::basic_string<T>>` is only guaranteed to be defined for
270  // certain values of `T`. Not defining this works fine on clang, but fails
271  // under GCC.
272  class StringKey {
273  public:
274  size_t operator()(StringView x) const {
275  size_t key = x.size();
276  for (Label label : x) key = (key << 1) ^ label;
277  return key;
278  }
279  };
280 
281  using ElementMap =
282  std::unordered_map<Element, StateId, ElementKey, ElementEqual>;
283  using StringSet = std::unordered_set<String, StringKey>;
284 
285  std::unique_ptr<const Fst<Arc>> fst_;
286  std::vector<Element> elements_; // Maps FST state to Elements.
287  ElementMap element_map_; // Maps Elements to FST state.
288  StringSet string_set_;
289 };
290 
291 } // namespace internal
292 
293 // Synchronizes a transducer. This version is a delayed FST. The result is an
294 // equivalent FST that has the property that during the traversal of a path,
295 // the delay is either zero or strictly increasing, where the delay is the
296 // difference between the number of non-epsilon output labels and input labels
297 // along the path.
298 //
299 // For the algorithm to terminate, the input transducer must have bounded
300 // delay, i.e., the delay of every cycle must be zero.
301 //
302 // Complexity:
303 //
304 // - A has bounded delay: exponential.
305 // - A does not have bounded delay: does not terminate.
306 //
307 // For more information, see:
308 //
309 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
310 // algorithms. International Journal of Computer Science 14(6): 957-982.
311 //
312 // This class attaches interface to implementation and handles reference
313 // counting, delegating most methods to ImplToFst.
314 template <class A>
315 class SynchronizeFst : public ImplToFst<internal::SynchronizeFstImpl<A>> {
316  public:
317  using Arc = A;
318  using StateId = typename Arc::StateId;
319  using Weight = typename Arc::Weight;
320 
322  using State = typename Store::State;
324 
325  friend class ArcIterator<SynchronizeFst<A>>;
326  friend class StateIterator<SynchronizeFst<A>>;
327 
328  explicit SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts =
330  : ImplToFst<Impl>(std::make_shared<Impl>(fst, opts)) {}
331 
332  // See Fst<>::Copy() for doc.
333  SynchronizeFst(const SynchronizeFst &fst, bool safe = false)
334  : ImplToFst<Impl>(fst, safe) {}
335 
336  // Gets a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
337  SynchronizeFst *Copy(bool safe = false) const override {
338  return new SynchronizeFst(*this, safe);
339  }
340 
341  inline void InitStateIterator(StateIteratorData<Arc> *data) const override;
342 
343  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
344  GetMutableImpl()->InitArcIterator(s, data);
345  }
346 
347  private:
350 
351  SynchronizeFst &operator=(const SynchronizeFst &) = delete;
352 };
353 
354 // Specialization for SynchronizeFst.
355 template <class Arc>
357  : public CacheStateIterator<SynchronizeFst<Arc>> {
358  public:
360  : CacheStateIterator<SynchronizeFst<Arc>>(fst, fst.GetMutableImpl()) {}
361 };
362 
363 // Specialization for SynchronizeFst.
364 template <class Arc>
366  : public CacheArcIterator<SynchronizeFst<Arc>> {
367  public:
368  using StateId = typename Arc::StateId;
369 
371  : CacheArcIterator<SynchronizeFst<Arc>>(fst.GetMutableImpl(), s) {
372  if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->Expand(s);
373  }
374 };
375 
376 template <class Arc>
378  StateIteratorData<Arc> *data) const {
379  data->base = std::make_unique<StateIterator<SynchronizeFst<Arc>>>(*this);
380 }
381 
382 // Synchronizes a transducer. This version writes the synchronized result to a
383 // MutableFst. The result will be an equivalent FST that has the property that
384 // during the traversal of a path, the delay is either zero or strictly
385 // increasing, where the delay is the difference between the number of
386 // non-epsilon output labels and input labels along the path.
387 //
388 // For the algorithm to terminate, the input transducer must have bounded
389 // delay, i.e., the delay of every cycle must be zero.
390 //
391 // Complexity:
392 //
393 // - A has bounded delay: exponential.
394 // - A does not have bounded delay: does not terminate.
395 //
396 // For more information, see:
397 //
398 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
399 // algorithms. International Journal of Computer Science 14(6): 957-982.
400 template <class Arc>
401 void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
402  // Caches only the last state for fastest copy.
403  const SynchronizeFstOptions opts(FST_FLAGS_fst_default_cache_gc,
404  0);
405  *ofst = SynchronizeFst<Arc>(ifst, opts);
406 }
407 
408 } // namespace fst
409 
410 #endif // FST_SYNCHRONIZE_H_
SynchronizeFst(const SynchronizeFst &fst, bool safe=false)
Definition: synchronize.h:333
StateIterator(const SynchronizeFst< Arc > &fst)
Definition: synchronize.h:359
virtual uint64_t Properties(uint64_t mask, bool test) const =0
void SetFinal(StateId s, Weight weight=Weight::One())
Definition: cache.h:923
size_t NumOutputEpsilons(StateId s)
Definition: synchronize.h:136
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: synchronize.h:151
const SymbolTable * OutputSymbols() const
Definition: fst.h:762
constexpr uint64_t kError
Definition: properties.h:51
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:772
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1149
constexpr int kNoStateId
Definition: fst.h:202
uint64_t Properties() const override
Definition: synchronize.h:141
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: synchronize.h:377
StringView FindString(String &&str)
Definition: synchronize.h:189
virtual uint64_t Properties() const
Definition: fst.h:702
SynchronizeFst(const Fst< A > &fst, const SynchronizeFstOptions &opts=SynchronizeFstOptions())
Definition: synchronize.h:328
constexpr uint64_t kCopyProperties
Definition: properties.h:162
typename CacheStore::State State
Definition: cache.h:666
Element(StateId state_, StringView i, StringView o)
Definition: synchronize.h:72
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: cache.h:1042
Label Car(StringView str, Label label=0) const
Definition: synchronize.h:158
SynchronizeFstImpl(const SynchronizeFstImpl &impl)
Definition: synchronize.h:91
void EmplaceArc(StateId s, T &&...ctor_args)
Definition: cache.h:947
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:768
typename Store::State State
Definition: synchronize.h:322
bool Empty(StringView str, Label label=0) const
Definition: synchronize.h:181
constexpr uint64_t kFstProperties
Definition: properties.h:325
uint64_t Properties(uint64_t mask) const override
Definition: synchronize.h:144
ArcIterator(const SynchronizeFst< Arc > &fst, StateId s)
Definition: synchronize.h:370
typename internal::SynchronizeFstImpl< Arc >::Arc Arc
Definition: fst.h:211
std::basic_string< Label > String
Definition: synchronize.h:66
typename Arc::Weight Weight
Definition: synchronize.h:51
CacheOptions SynchronizeFstOptions
Definition: synchronize.h:39
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1195
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:760
void SetType(std::string_view type)
Definition: fst.h:700
size_t NumInputEpsilons(StateId s)
Definition: synchronize.h:131
StringView Cdr(StringView str, Label label=0)
Definition: synchronize.h:168
SynchronizeFst * Copy(bool safe=false) const override
Definition: synchronize.h:337
void Synchronize(const Fst< Arc > &ifst, MutableFst< Arc > *ofst)
Definition: synchronize.h:401
typename Arc::StateId StateId
Definition: synchronize.h:50
std::basic_string_view< Label > StringView
Definition: synchronize.h:67
typename CacheState< Arc >::Arc Arc
Definition: cache.h:852
Impl * GetMutableImpl() const
Definition: fst.h:1028
uint64_t SynchronizeProperties(uint64_t inprops)
Definition: properties.cc:382
SynchronizeFstImpl(const Fst< Arc > &fst, const SynchronizeFstOptions &opts)
Definition: synchronize.h:82
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: synchronize.h:343
const Impl * GetImpl() const
Definition: fst.h:1026
StringView Concat(StringView str, Label label=0)
Definition: synchronize.h:174
virtual const SymbolTable * OutputSymbols() const =0
StateId FindState(const Element &element)
Definition: synchronize.h:196