FST  openfst-1.8.3
OpenFst Library
synchronize.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Synchronize an FST with bounded delay.
19 
20 #ifndef FST_SYNCHRONIZE_H_
21 #define FST_SYNCHRONIZE_H_
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <cstdint>
26 #include <functional>
27 #include <memory>
28 #include <string>
29 #include <string_view>
30 #include <utility>
31 #include <vector>
32 
33 #include <fst/cache.h>
34 #include <fst/fst.h>
35 #include <fst/impl-to-fst.h>
36 #include <fst/mutable-fst.h>
37 #include <fst/properties.h>
38 #include <unordered_map>
39 #include <unordered_set>
40 
41 namespace fst {
42 
44 
45 namespace internal {
46 
47 // Implementation class for SynchronizeFst.
48 // TODO(kbg,sorenj): Refactor to guarantee thread-safety.
49 
50 template <class Arc>
51 class SynchronizeFstImpl : public CacheImpl<Arc> {
52  public:
53  using Label = typename Arc::Label;
54  using StateId = typename Arc::StateId;
55  using Weight = typename Arc::Weight;
56 
61 
69 
70  // To avoid using `std::char_traits<Label>`, which is not guaranteed to exist,
71  // use `char32_t` for the backing strings instead of `Label`. We should
72  // probably use our own traits type instead.
73  static_assert(sizeof(Label) <= sizeof(char32_t),
74  "Label must fit in 32 bits. This is a hack.");
75  using String = std::basic_string<char32_t>;
76  using StringView = std::basic_string_view<char32_t>;
77 
78  struct Element {
79  Element() = default;
80 
82  : state(state_), istring(i), ostring(o) {}
83 
84  StateId state; // Input state ID.
85  StringView istring; // Residual input labels.
86  StringView ostring; // Residual output labels.
87  // Residual strings are represented by std::basic_string_view<Label> whose
88  // values are owned by the hash set string_set_.
89  };
90 
92  : CacheImpl<Arc>(opts), fst_(fst.Copy()) {
93  SetType("synchronize");
94  const auto props = fst.Properties(kFstProperties, false);
98  }
99 
101  : CacheImpl<Arc>(impl), fst_(impl.fst_->Copy(true)) {
102  SetType("synchronize");
106  }
107 
109  if (!HasStart()) {
110  auto start = fst_->Start();
111  if (start == kNoStateId) return kNoStateId;
112  const StringView empty = FindString(String());
113  start = FindState(Element(fst_->Start(), empty, empty));
114  SetStart(start);
115  }
116  return CacheImpl<Arc>::Start();
117  }
118 
120  if (!HasFinal(s)) {
121  const auto &element = elements_[s];
122  const auto weight = element.state == kNoStateId
123  ? Weight::One()
124  : fst_->Final(element.state);
125  if ((weight != Weight::Zero()) && element.istring.empty() &&
126  element.ostring.empty()) {
127  SetFinal(s, weight);
128  } else {
129  SetFinal(s, Weight::Zero());
130  }
131  }
132  return CacheImpl<Arc>::Final(s);
133  }
134 
135  size_t NumArcs(StateId s) {
136  if (!HasArcs(s)) Expand(s);
137  return CacheImpl<Arc>::NumArcs(s);
138  }
139 
141  if (!HasArcs(s)) Expand(s);
143  }
144 
146  if (!HasArcs(s)) Expand(s);
148  }
149 
150  uint64_t Properties() const override { return Properties(kFstProperties); }
151 
152  // Sets error if found, returning other FST impl properties.
153  uint64_t Properties(uint64_t mask) const override {
154  if ((mask & kError) && fst_->Properties(kError, false)) {
155  SetProperties(kError, kError);
156  }
157  return FstImpl<Arc>::Properties(mask);
158  }
159 
161  if (!HasArcs(s)) Expand(s);
163  }
164 
165  // Returns the first character of the string obtained by concatenating the
166  // string and the label.
167  Label Car(StringView str, Label label = 0) const {
168  if (!str.empty()) {
169  return str[0];
170  } else {
171  return label;
172  }
173  }
174 
175  // Computes the residual string obtained by removing the first
176  // character in the concatenation of the string and the label.
177  StringView Cdr(StringView str, Label label = 0) {
178  if (str.empty()) return FindString(String());
179  return Concat(str.substr(1), label);
180  }
181 
182  // Computes the concatenation of the string and the label.
183  StringView Concat(StringView str, Label label = 0) {
184  String r(str);
185  if (label) r.push_back(label);
186  return FindString(std::move(r));
187  }
188 
189  // Tests if the concatenation of the string and label is empty.
190  bool Empty(StringView str, Label label = 0) const {
191  if (str.empty()) {
192  return label == 0;
193  } else {
194  return false;
195  }
196  }
197 
199  const auto [str_it, unused] = string_set_.insert(std::forward<String>(str));
200  return *str_it;
201  }
202 
203  // Finds state corresponding to an element. Creates new state if element
204  // is not found.
205  StateId FindState(const Element &element) {
206  const auto &[iter, inserted] =
207  element_map_.emplace(element, elements_.size());
208  if (inserted) {
209  elements_.push_back(element);
210  }
211  return iter->second;
212  }
213 
214  // Computes the outgoing transitions from a state, creating new destination
215  // states as needed.
216  void Expand(StateId s) {
217  const auto element = elements_[s];
218  if (element.state != kNoStateId) {
219  for (ArcIterator<Fst<Arc>> aiter(*fst_, element.state); !aiter.Done();
220  aiter.Next()) {
221  const auto &arc = aiter.Value();
222  if (!Empty(element.istring, arc.ilabel) &&
223  !Empty(element.ostring, arc.olabel)) {
224  StringView istring = Cdr(element.istring, arc.ilabel);
225  StringView ostring = Cdr(element.ostring, arc.olabel);
226  EmplaceArc(s, Car(element.istring, arc.ilabel),
227  Car(element.ostring, arc.olabel), arc.weight,
228  FindState(Element(arc.nextstate, istring, ostring)));
229  } else {
230  StringView istring = Concat(element.istring, arc.ilabel);
231  StringView ostring = Concat(element.ostring, arc.olabel);
232  EmplaceArc(s, 0, 0, arc.weight,
233  FindState(Element(arc.nextstate, istring, ostring)));
234  }
235  }
236  }
237  const auto weight = element.state == kNoStateId
238  ? Weight::One()
239  : fst_->Final(element.state);
240  if ((weight != Weight::Zero()) &&
241  (element.istring.size() + element.ostring.size() > 0)) {
242  StringView istring = Cdr(element.istring);
243  StringView ostring = Cdr(element.ostring);
244  EmplaceArc(s, Car(element.istring), Car(element.ostring), weight,
245  FindState(Element(kNoStateId, istring, ostring)));
246  }
247  SetArcs(s);
248  }
249 
250  private:
251  // Equality function for Elements; assumes strings have been hashed.
252  class ElementEqual {
253  public:
254  bool operator()(const Element &x, const Element &y) const {
255  return x.state == y.state && x.istring.data() == y.istring.data() &&
256  x.ostring.data() == y.ostring.data();
257  }
258  };
259 
260  // Hash function for Elements to FST states.
261  class ElementKey {
262  public:
263  size_t operator()(const Element &x) const {
264  size_t key = x.state;
265  key = (key << 1) ^ x.istring.size();
266  for (Label label : x.istring) {
267  key = (key << 1) ^ label;
268  }
269  key = (key << 1) ^ x.ostring.size();
270  for (Label label : x.ostring) {
271  key = (key << 1) ^ label;
272  }
273  return key;
274  }
275  };
276 
277  // Hash function for set of strings. This only has to be specified since
278  // `std::hash<std::basic_string<T>>` is only guaranteed to be defined for
279  // certain values of `T`. Not defining this works fine on clang, but fails
280  // under GCC.
281  class StringKey {
282  public:
283  size_t operator()(StringView x) const {
284  size_t key = x.size();
285  for (Label label : x) key = (key << 1) ^ label;
286  return key;
287  }
288  };
289 
290  using ElementMap =
291  std::unordered_map<Element, StateId, ElementKey, ElementEqual>;
292  using StringSet = std::unordered_set<String, StringKey>;
293 
294  std::unique_ptr<const Fst<Arc>> fst_;
295  std::vector<Element> elements_; // Maps FST state to Elements.
296  ElementMap element_map_; // Maps Elements to FST state.
297  StringSet string_set_;
298 };
299 
300 } // namespace internal
301 
302 // Synchronizes a transducer. This version is a delayed FST. The result is an
303 // equivalent FST that has the property that during the traversal of a path,
304 // the delay is either zero or strictly increasing, where the delay is the
305 // difference between the number of non-epsilon output labels and input labels
306 // along the path.
307 //
308 // For the algorithm to terminate, the input transducer must have bounded
309 // delay, i.e., the delay of every cycle must be zero.
310 //
311 // Complexity:
312 //
313 // - A has bounded delay: exponential.
314 // - A does not have bounded delay: does not terminate.
315 //
316 // For more information, see:
317 //
318 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
319 // algorithms. International Journal of Computer Science 14(6): 957-982.
320 //
321 // This class attaches interface to implementation and handles reference
322 // counting, delegating most methods to ImplToFst.
323 template <class A>
324 class SynchronizeFst : public ImplToFst<internal::SynchronizeFstImpl<A>> {
325  public:
326  using Arc = A;
327  using StateId = typename Arc::StateId;
328  using Weight = typename Arc::Weight;
329 
331  using State = typename Store::State;
333 
334  friend class ArcIterator<SynchronizeFst<A>>;
335  friend class StateIterator<SynchronizeFst<A>>;
336 
337  explicit SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts =
339  : ImplToFst<Impl>(std::make_shared<Impl>(fst, opts)) {}
340 
341  // See Fst<>::Copy() for doc.
342  SynchronizeFst(const SynchronizeFst &fst, bool safe = false)
343  : ImplToFst<Impl>(fst, safe) {}
344 
345  // Gets a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
346  SynchronizeFst *Copy(bool safe = false) const override {
347  return new SynchronizeFst(*this, safe);
348  }
349 
350  inline void InitStateIterator(StateIteratorData<Arc> *data) const override;
351 
352  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const override {
353  GetMutableImpl()->InitArcIterator(s, data);
354  }
355 
356  private:
359 
360  SynchronizeFst &operator=(const SynchronizeFst &) = delete;
361 };
362 
363 // Specialization for SynchronizeFst.
364 template <class Arc>
366  : public CacheStateIterator<SynchronizeFst<Arc>> {
367  public:
369  : CacheStateIterator<SynchronizeFst<Arc>>(fst, fst.GetMutableImpl()) {}
370 };
371 
372 // Specialization for SynchronizeFst.
373 template <class Arc>
375  : public CacheArcIterator<SynchronizeFst<Arc>> {
376  public:
377  using StateId = typename Arc::StateId;
378 
380  : CacheArcIterator<SynchronizeFst<Arc>>(fst.GetMutableImpl(), s) {
381  if (!fst.GetImpl()->HasArcs(s)) fst.GetMutableImpl()->Expand(s);
382  }
383 };
384 
385 template <class Arc>
387  StateIteratorData<Arc> *data) const {
388  data->base = std::make_unique<StateIterator<SynchronizeFst<Arc>>>(*this);
389 }
390 
391 // Synchronizes a transducer. This version writes the synchronized result to a
392 // MutableFst. The result will be an equivalent FST that has the property that
393 // during the traversal of a path, the delay is either zero or strictly
394 // increasing, where the delay is the difference between the number of
395 // non-epsilon output labels and input labels along the path.
396 //
397 // For the algorithm to terminate, the input transducer must have bounded
398 // delay, i.e., the delay of every cycle must be zero.
399 //
400 // Complexity:
401 //
402 // - A has bounded delay: exponential.
403 // - A does not have bounded delay: does not terminate.
404 //
405 // For more information, see:
406 //
407 // Mohri, M. 2003. Edit-distance of weighted automata: General definitions and
408 // algorithms. International Journal of Computer Science 14(6): 957-982.
409 template <class Arc>
410 void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
411  // Caches only the last state for fastest copy.
412  const SynchronizeFstOptions opts(FST_FLAGS_fst_default_cache_gc,
413  0);
414  *ofst = SynchronizeFst<Arc>(ifst, opts);
415 }
416 
417 } // namespace fst
418 
419 #endif // FST_SYNCHRONIZE_H_
SynchronizeFst(const SynchronizeFst &fst, bool safe=false)
Definition: synchronize.h:342
StateIterator(const SynchronizeFst< Arc > &fst)
Definition: synchronize.h:368
virtual uint64_t Properties(uint64_t mask, bool test) const =0
void SetFinal(StateId s, Weight weight=Weight::One())
Definition: cache.h:930
size_t NumOutputEpsilons(StateId s)
Definition: synchronize.h:145
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data)
Definition: synchronize.h:160
const SymbolTable * OutputSymbols() const
Definition: fst.h:761
constexpr uint64_t kError
Definition: properties.h:52
void SetOutputSymbols(const SymbolTable *osyms)
Definition: fst.h:771
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1156
constexpr int kNoStateId
Definition: fst.h:196
uint64_t Properties() const override
Definition: synchronize.h:150
void InitStateIterator(StateIteratorData< Arc > *data) const override
Definition: synchronize.h:386
StringView FindString(String &&str)
Definition: synchronize.h:198
virtual uint64_t Properties() const
Definition: fst.h:701
SynchronizeFst(const Fst< A > &fst, const SynchronizeFstOptions &opts=SynchronizeFstOptions())
Definition: synchronize.h:337
constexpr uint64_t kCopyProperties
Definition: properties.h:163
typename CacheStore::State State
Definition: cache.h:673
Element(StateId state_, StringView i, StringView o)
Definition: synchronize.h:81
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const
Definition: cache.h:1049
Label Car(StringView str, Label label=0) const
Definition: synchronize.h:167
SynchronizeFstImpl(const SynchronizeFstImpl &impl)
Definition: synchronize.h:100
void EmplaceArc(StateId s, T &&...ctor_args)
Definition: cache.h:954
void SetInputSymbols(const SymbolTable *isyms)
Definition: fst.h:767
typename Store::State State
Definition: synchronize.h:331
bool Empty(StringView str, Label label=0) const
Definition: synchronize.h:190
constexpr uint64_t kFstProperties
Definition: properties.h:326
uint64_t Properties(uint64_t mask) const override
Definition: synchronize.h:153
ArcIterator(const SynchronizeFst< Arc > &fst, StateId s)
Definition: synchronize.h:379
typename internal::SynchronizeFstImpl< Arc >::Arc Arc
Definition: fst.h:205
typename Arc::Weight Weight
Definition: synchronize.h:55
CacheOptions SynchronizeFstOptions
Definition: synchronize.h:43
typename SynchronizeFst< Arc >::Arc Arc
Definition: cache.h:1202
virtual const SymbolTable * InputSymbols() const =0
const SymbolTable * InputSymbols() const
Definition: fst.h:759
std::basic_string_view< char32_t > StringView
Definition: synchronize.h:76
void SetType(std::string_view type)
Definition: fst.h:699
size_t NumInputEpsilons(StateId s)
Definition: synchronize.h:140
StringView Cdr(StringView str, Label label=0)
Definition: synchronize.h:177
SynchronizeFst * Copy(bool safe=false) const override
Definition: synchronize.h:346
void Synchronize(const Fst< Arc > &ifst, MutableFst< Arc > *ofst)
Definition: synchronize.h:410
typename Arc::StateId StateId
Definition: synchronize.h:54
typename CacheState< Arc >::Arc Arc
Definition: cache.h:859
Impl * GetMutableImpl() const
Definition: impl-to-fst.h:125
std::basic_string< char32_t > String
Definition: synchronize.h:75
uint64_t SynchronizeProperties(uint64_t inprops)
Definition: properties.cc:381
SynchronizeFstImpl(const Fst< Arc > &fst, const SynchronizeFstOptions &opts)
Definition: synchronize.h:91
void InitArcIterator(StateId s, ArcIteratorData< Arc > *data) const override
Definition: synchronize.h:352
const Impl * GetImpl() const
Definition: impl-to-fst.h:123
StringView Concat(StringView str, Label label=0)
Definition: synchronize.h:183
virtual const SymbolTable * OutputSymbols() const =0
StateId FindState(const Element &element)
Definition: synchronize.h:205