FST  openfst-1.7.2
OpenFst Library
set-weight.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Weights consisting of sets (of integral Labels) and
5 // associated semiring operation definitions using intersect
6 // and union.
7 
8 #ifndef FST_SET_WEIGHT_H_
9 #define FST_SET_WEIGHT_H_
10 
11 #include <cstdlib>
12 
13 #include <algorithm>
14 #include <list>
15 #include <string>
16 #include <vector>
17 
18 #include <fst/union-weight.h>
19 #include <fst/weight.h>
20 
21 
22 namespace fst {
23 
24 constexpr int kSetEmpty = 0; // Label for the empty set.
25 constexpr int kSetUniv = -1; // Label for the universal set.
26 constexpr int kSetBad = -2; // Label for a non-set.
27 constexpr char kSetSeparator = '_'; // Label separator in sets.
28 
29 // Determines whether to use (intersect, union) or (union, intersect)
30 // as (+, *) for the semiring. SET_INTERSECT_UNION_RESTRICTED is a
31 // restricted version of (intersect, union) that requires summed
32 // arguments to be equal (or an error is signalled), useful for
33 // algorithms that require a unique labelled path weight. SET_BOOLEAN
34 // treats all non-Zero() elements as equivalent (with Zero() ==
35 // UnivSet()), useful for algorithms that don't really depend on the
36 // detailed sets.
40  SET_BOOLEAN = 3 };
41 
42 template <class>
44 
45 // Set semiring of integral labels.
46 template <typename Label_, SetType S = SET_INTERSECT_UNION>
47 class SetWeight {
48  public:
49  using Label = Label_;
53  // Allow type-converting copy and move constructors private access.
54  template <typename L2, SetType S2>
55  friend class SetWeight;
56 
57  SetWeight() {}
58 
59  // Input should be positive, sorted and unique.
60  template <typename Iterator>
61  SetWeight(const Iterator &begin, const Iterator &end) {
62  for (auto iter = begin; iter != end; ++iter) PushBack(*iter);
63  }
64 
65  // Input should be positive. (Non-positive value has
66  // special internal meaning w.r.t. integral constants above.)
67  explicit SetWeight(Label label) { PushBack(label); }
68 
69  template <SetType S2>
70  explicit SetWeight(const SetWeight<Label, S2> &w)
71  : first_(w.first_), rest_(w.rest_) {}
72 
73  template <SetType S2>
75  : first_(w.first_), rest_(std::move(w.rest_)) { w.Clear(); }
76 
77  template <SetType S2>
79  first_ = w.first_;
80  rest_ = w.rest_;
81  return *this;
82  }
83 
84  template <SetType S2>
86  first_ = w.first_;
87  rest_ = std::move(w.rest_);
88  w.Clear();
89  return *this;
90  }
91 
92  static const SetWeight &Zero() {
93  return S == SET_UNION_INTERSECT ? EmptySet() : UnivSet();
94  }
95 
96  static const SetWeight &One() {
97  return S == SET_UNION_INTERSECT ? UnivSet() : EmptySet();
98  }
99 
100  static const SetWeight &NoWeight() {
101  static const auto *const no_weight = new SetWeight(Label(kSetBad));
102  return *no_weight;
103  }
104 
105  static const string &Type() {
106  static const string *const type = new string(
108  ? "union_intersect_set"
109  : (S == SET_INTERSECT_UNION
110  ? "intersect_union_set"
112  ? "restricted_set_intersect_union"
113  : "boolean_set")));
114  return *type;
115  }
116 
117  bool Member() const;
118 
119  std::istream &Read(std::istream &strm);
120 
121  std::ostream &Write(std::ostream &strm) const;
122 
123  size_t Hash() const;
124 
125  SetWeight Quantize(float delta = kDelta) const { return *this; }
126 
127  ReverseWeight Reverse() const;
128 
129  static constexpr uint64 Properties() {
131  }
132 
133  // These operations combined with the SetWeightIterator
134  // provide the access and mutation of the set internal elements.
135 
136  // The empty set.
137  static const SetWeight &EmptySet() {
138  static const auto *const empty = new SetWeight(Label(kSetEmpty));
139  return *empty;
140  }
141 
142  // The univeral set.
143  static const SetWeight &UnivSet() {
144  static const auto *const univ = new SetWeight(Label(kSetUniv));
145  return *univ;
146  }
147 
148  // Clear existing SetWeight.
149  void Clear() {
150  first_ = kSetEmpty;
151  rest_.clear();
152  }
153 
154  size_t Size() const { return first_ == kSetEmpty ? 0 : rest_.size() + 1; }
155 
157  if (rest_.empty()) {
158  return first_;
159  } else {
160  return rest_.back();
161  }
162  }
163 
164  // Caller must add in sort order and be unique (or error signalled).
165  // Input should also be positive. Non-positive value (for the first
166  // push) has special internal meaning w.r.t. integral constants above.
167  void PushBack(Label label) {
168  if (first_ == kSetEmpty) {
169  first_ = label;
170  } else {
171  if (label <= Back() || label <= 0) {
172  FSTERROR() << "SetWeight: labels must be positive, added"
173  << " in sort order and be unique.";
174  rest_.push_back(Label(kSetBad));
175  }
176  rest_.push_back(label);
177  }
178  }
179 
180  private:
181  Label first_ = kSetEmpty; // First label in set (kSetEmpty if empty).
182  std::list<Label> rest_; // Remaining labels in set.
183 };
184 
185 // Traverses set in forward direction.
186 template <class SetWeight_>
187 class SetWeightIterator {
188  public:
189  using Weight = SetWeight_;
190  using Label = typename Weight::Label;
191 
192  explicit SetWeightIterator(const Weight &w)
193  : first_(w.first_), rest_(w.rest_), init_(true), iter_(rest_.begin()) {}
194 
195  bool Done() const {
196  if (init_) {
197  return first_ == kSetEmpty;
198  } else {
199  return iter_ == rest_.end();
200  }
201  }
202 
203  const Label &Value() const { return init_ ? first_ : *iter_; }
204 
205  void Next() {
206  if (init_) {
207  init_ = false;
208  } else {
209  ++iter_;
210  }
211  }
212 
213  void Reset() {
214  init_ = true;
215  iter_ = rest_.begin();
216  }
217 
218  private:
219  const Label &first_;
220  const decltype(Weight::rest_) &rest_;
221  bool init_; // In the initialized state?
222  typename decltype(Weight::rest_)::const_iterator iter_;
223 };
224 
225 
226 // SetWeight member functions follow that require SetWeightIterator
227 
228 template <typename Label, SetType S>
229 inline std::istream &SetWeight<Label, S>::Read(std::istream &strm) {
230  Clear();
231  int32 size;
232  ReadType(strm, &size);
233  for (int32 i = 0; i < size; ++i) {
234  Label label;
235  ReadType(strm, &label);
236  PushBack(label);
237  }
238  return strm;
239 }
240 
241 template <typename Label, SetType S>
242 inline std::ostream &SetWeight<Label, S>::Write(std::ostream &strm) const {
243  const int32 size = Size();
244  WriteType(strm, size);
245  for (Iterator iter(*this); !iter.Done(); iter.Next()) {
246  WriteType(strm, iter.Value());
247  }
248  return strm;
249 }
250 
251 template <typename Label, SetType S>
252 inline bool SetWeight<Label, S>::Member() const {
253  Iterator iter(*this);
254  return iter.Value() != Label(kSetBad);
255 }
256 
257 template <typename Label, SetType S>
260  return *this;
261 }
262 
263 template <typename Label, SetType S>
264 inline size_t SetWeight<Label, S>::Hash() const {
265  using Weight = SetWeight<Label, S>;
266  if (S == SET_BOOLEAN) {
267  return *this == Weight::Zero() ? 0 : 1;
268  } else {
269  size_t h = 0;
270  for (Iterator iter(*this); !iter.Done(); iter.Next()) {
271  h ^= h << 1 ^ iter.Value();
272  }
273  return h;
274  }
275 }
276 
277 // Default ==
278 template <typename Label, SetType S>
279 inline bool operator==(const SetWeight<Label, S> &w1,
280  const SetWeight<Label, S> &w2) {
281  if (w1.Size() != w2.Size()) return false;
282  using Iterator = typename SetWeight<Label, S>::Iterator;
283  Iterator iter1(w1);
284  Iterator iter2(w2);
285  for (; !iter1.Done(); iter1.Next(), iter2.Next()) {
286  if (iter1.Value() != iter2.Value()) return false;
287  }
288  return true;
289 }
290 
291 // Boolean ==
292 template <typename Label>
294  const SetWeight<Label, SET_BOOLEAN> &w2) {
295  // x == kSetEmpty if x \nin {kUnivSet, kSetBad}
296  if (!w1.Member() || !w2.Member()) return false;
298  Iterator iter1(w1);
299  Iterator iter2(w2);
300  Label label1 = iter1.Done() ? kSetEmpty : iter1.Value();
301  Label label2 = iter2.Done() ? kSetEmpty : iter2.Value();
302  if (label1 == kSetUniv) return label2 == kSetUniv;
303  if (label2 == kSetUniv) return label1 == kSetUniv;
304  return true;
305 }
306 
307 template <typename Label, SetType S>
308 inline bool operator!=(const SetWeight<Label, S> &w1,
309  const SetWeight<Label, S> &w2) {
310  return !(w1 == w2);
311 }
312 
313 template <typename Label, SetType S>
314 inline bool ApproxEqual(const SetWeight<Label, S> &w1,
315  const SetWeight<Label, S> &w2,
316  float delta = kDelta) {
317  return w1 == w2;
318 }
319 
320 template <typename Label, SetType S>
321 inline std::ostream &operator<<(std::ostream &strm,
322  const SetWeight<Label, S> &weight) {
323  typename SetWeight<Label, S>::Iterator iter(weight);
324  if (iter.Done()) {
325  return strm << "EmptySet";
326  } else if (iter.Value() == Label(kSetUniv)) {
327  return strm << "UnivSet";
328  } else if (iter.Value() == Label(kSetBad)) {
329  return strm << "BadSet";
330  } else {
331  for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
332  if (i > 0) strm << kSetSeparator;
333  strm << iter.Value();
334  }
335  }
336  return strm;
337 }
338 
339 template <typename Label, SetType S>
340 inline std::istream &operator>>(std::istream &strm,
341  SetWeight<Label, S> &weight) {
342  string str;
343  strm >> str;
344  using Weight = SetWeight<Label, S>;
345  if (str == "EmptySet") {
346  weight = Weight(Label(kSetEmpty));
347  } else if (str == "UnivSet") {
348  weight = Weight(Label(kSetUniv));
349  } else {
350  weight.Clear();
351  char *p = nullptr;
352  for (const char *cs = str.c_str(); !p || *p != '\0'; cs = p + 1) {
353  const Label label = strtoll(cs, &p, 10);
354  if (p == cs || (*p != 0 && *p != kSetSeparator)) {
355  strm.clear(std::ios::badbit);
356  break;
357  }
358  weight.PushBack(label);
359  }
360  }
361  return strm;
362 }
363 
364 template <typename Label, SetType S>
366  const SetWeight<Label, S> &w1,
367  const SetWeight<Label, S> &w2) {
368  using Weight = SetWeight<Label, S>;
369  using Iterator = typename SetWeight<Label, S>::Iterator;
370  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
371  if (w1 == Weight::EmptySet()) return w2;
372  if (w2 == Weight::EmptySet()) return w1;
373  if (w1 == Weight::UnivSet()) return w1;
374  if (w2 == Weight::UnivSet()) return w2;
375  Iterator it1(w1);
376  Iterator it2(w2);
377  Weight result;
378  while (!it1.Done() && !it2.Done()) {
379  const auto v1 = it1.Value();
380  const auto v2 = it2.Value();
381  if (v1 < v2) {
382  result.PushBack(v1);
383  it1.Next();
384  } else if (v1 > v2) {
385  result.PushBack(v2);
386  it2.Next();
387  } else {
388  result.PushBack(v1);
389  it1.Next();
390  it2.Next();
391  }
392  }
393  for (; !it1.Done(); it1.Next()) result.PushBack(it1.Value());
394  for (; !it2.Done(); it2.Next()) result.PushBack(it2.Value());
395  return result;
396 }
397 
398 template <typename Label, SetType S>
400  const SetWeight<Label, S> &w1,
401  const SetWeight<Label, S> &w2) {
402  using Weight = SetWeight<Label, S>;
403  using Iterator = typename SetWeight<Label, S>::Iterator;
404  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
405  if (w1 == Weight::EmptySet()) return w1;
406  if (w2 == Weight::EmptySet()) return w2;
407  if (w1 == Weight::UnivSet()) return w2;
408  if (w2 == Weight::UnivSet()) return w1;
409  Iterator it1(w1);
410  Iterator it2(w2);
411  Weight result;
412  while (!it1.Done() && !it2.Done()) {
413  const auto v1 = it1.Value();
414  const auto v2 = it2.Value();
415  if (v1 < v2) {
416  it1.Next();
417  } else if (v1 > v2) {
418  it2.Next();
419  } else {
420  result.PushBack(v1);
421  it1.Next();
422  it2.Next();
423  }
424  }
425  return result;
426 }
427 
428 template <typename Label, SetType S>
430  const SetWeight<Label, S> &w1,
431  const SetWeight<Label, S> &w2) {
432  using Weight = SetWeight<Label, S>;
433  using Iterator = typename SetWeight<Label, S>::Iterator;
434  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
435  if (w1 == Weight::EmptySet()) return w1;
436  if (w2 == Weight::EmptySet()) return w1;
437  if (w2 == Weight::UnivSet()) return Weight::EmptySet();
438  Iterator it1(w1);
439  Iterator it2(w2);
440  Weight result;
441  while (!it1.Done() && !it2.Done()) {
442  const auto v1 = it1.Value();
443  const auto v2 = it2.Value();
444  if (v1 < v2) {
445  result.PushBack(v1);
446  it1.Next();
447  } else if (v1 > v2) {
448  it2.Next();
449  } else {
450  it1.Next();
451  it2.Next();
452  }
453  }
454  for (; !it1.Done(); it1.Next()) result.PushBack(it1.Value());
455  return result;
456 }
457 
458 // Default: Plus = Intersect.
459 template <typename Label, SetType S>
461  const SetWeight<Label, S> &w1,
462  const SetWeight<Label, S> &w2) {
463  return Intersect(w1, w2);
464 }
465 
466 // Plus = Union.
467 template <typename Label>
471  return Union(w1, w2);
472 }
473 
474 // Plus = Set equality is required (for non-Zero() input). The
475 // restriction is useful (e.g., in determinization) to ensure the input
476 // has a unique labelled path weight.
477 template <typename Label>
482  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
483  if (w1 == Weight::Zero()) return w2;
484  if (w2 == Weight::Zero()) return w1;
485  if (w1 != w2) {
486  FSTERROR() << "SetWeight::Plus: Unequal arguments "
487  << "(non-unique labelled path weights?)"
488  << " w1 = " << w1 << " w2 = " << w2;
489  return Weight::NoWeight();
490  }
491  return w1;
492 }
493 
494 // Plus = Or.
495 template <typename Label>
498  const SetWeight<Label, SET_BOOLEAN> &w2) {
499  using Weight = SetWeight<Label, SET_BOOLEAN>;
500  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
501  if (w1 == Weight::One()) return w1;
502  if (w2 == Weight::One()) return w2;
503  return Weight::Zero();
504 }
505 
506 // Default: Times = Union.
507 template <typename Label, SetType S>
509  const SetWeight<Label, S> &w1,
510  const SetWeight<Label, S> &w2) {
511  return Union(w1, w2);
512 }
513 
514 // Times = Intersect.
515 template <typename Label>
519  return Intersect(w1, w2);
520 }
521 
522 // Times = And.
523 template <typename Label>
526  const SetWeight<Label, SET_BOOLEAN> &w2) {
527  using Weight = SetWeight<Label, SET_BOOLEAN>;
528  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
529  if (w1 == Weight::One()) return w2;
530  return w1;
531 }
532 
533 // Divide = Difference.
534 template <typename Label, SetType S>
536  const SetWeight<Label, S> &w2,
537  DivideType divide_type = DIVIDE_ANY) {
538  return Difference(w1, w2);
539 }
540 
541 // Divide = dividend (or the universal set if the
542 // dividend == divisor).
543 template <typename Label>
547  DivideType divide_type = DIVIDE_ANY) {
549  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
550  if (w1 == w2) return Weight::UnivSet();
551  return w1;
552 }
553 
554 // Divide = Or Not.
555 template <typename Label>
559  DivideType divide_type = DIVIDE_ANY) {
560  using Weight = SetWeight<Label, SET_BOOLEAN>;
561  if (!w1.Member() || !w2.Member()) return Weight::NoWeight();
562  if (w1 == Weight::One()) return w1;
563  if (w2 == Weight::Zero()) return Weight::One();
564  return Weight::Zero();
565 }
566 
567 // Converts between different set types.
568 template <typename Label, SetType S1, SetType S2>
573  for (Iterator iter(w1); !iter.Done(); iter.Next())
574  w2.PushBack(iter.Value());
575  return w2;
576  }
577 };
578 
579 // This function object generates SetWeights that are random integer sets
580 // from {1, ... , alphabet_size}^{0, max_set_length} U { Zero }. This is
581 // intended primarily for testing.
582 template <class Label, SetType S>
584  public:
586 
587  explicit WeightGenerate(bool allow_zero = true,
588  size_t alphabet_size = kNumRandomWeights,
589  size_t max_set_length = kNumRandomWeights)
590  : allow_zero_(allow_zero),
591  alphabet_size_(alphabet_size),
592  max_set_length_(max_set_length) {}
593 
594  Weight operator()() const {
595  const size_t n = rand() % (max_set_length_ + allow_zero_); // NOLINT
596  if (allow_zero_ && n == max_set_length_) return Weight::Zero();
597  std::vector<Label> labels;
598  for (size_t i = 0; i < n; ++i) {
599  labels.push_back(rand() % alphabet_size_ + 1); // NOLINT
600  }
601  std::sort(labels.begin(), labels.end());
602  const auto labels_end = std::unique(labels.begin(), labels.end());
603  labels.resize(labels_end - labels.begin());
604  return Weight(labels.begin(), labels.end());
605  }
606 
607  private:
608  // Permits Zero() and zero divisors.
609  const bool allow_zero_;
610  // Alphabet size for random weights.
611  const size_t alphabet_size_;
612  // Number of alternative random weights.
613  const size_t max_set_length_;
614 };
615 
616 } // namespace fst
617 
618 #endif // FST_SET_WEIGHT_H_
std::ostream & Write(std::ostream &strm) const
Definition: set-weight.h:242
static const SetWeight & One()
Definition: set-weight.h:96
constexpr char kSetSeparator
Definition: set-weight.h:27
ExpectationWeight< X1, X2 > Divide(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2, DivideType typ=DIVIDE_ANY)
uint64_t uint64
Definition: types.h:32
SetWeight(const Iterator &begin, const Iterator &end)
Definition: set-weight.h:61
void PushBack(Label label)
Definition: set-weight.h:167
static const SetWeight & EmptySet()
Definition: set-weight.h:137
ReverseWeight Reverse() const
Definition: set-weight.h:259
constexpr uint64 kRightSemiring
Definition: weight.h:115
SetWeight(const SetWeight< Label, S2 > &w)
Definition: set-weight.h:70
static const SetWeight & UnivSet()
Definition: set-weight.h:143
SetWeight Quantize(float delta=kDelta) const
Definition: set-weight.h:125
ExpectationWeight< X1, X2 > Times(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
SetType
Definition: set-weight.h:37
void Intersect(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const IntersectOptions &opts=IntersectOptions())
Definition: intersect.h:127
constexpr uint64 kCommutative
Definition: weight.h:120
constexpr int kSetUniv
Definition: set-weight.h:25
constexpr uint64 kLeftSemiring
Definition: weight.h:112
size_t Size() const
Definition: set-weight.h:154
std::istream & Read(std::istream &strm)
Definition: set-weight.h:229
SetWeight(Label label)
Definition: set-weight.h:67
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:155
void Difference(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const DifferenceOptions &opts=DifferenceOptions())
Definition: difference.h:151
const Label & Value() const
Definition: set-weight.h:203
#define FSTERROR()
Definition: util.h:35
typename Weight::Label Label
Definition: set-weight.h:190
size_t Hash() const
Definition: set-weight.h:264
bool Member() const
Definition: set-weight.h:252
std::istream & operator>>(std::istream &strm, FloatWeightTpl< T > &w)
Definition: float-weight.h:160
static constexpr uint64 Properties()
Definition: set-weight.h:129
void Union(RationalFst< Arc > *fst1, const Fst< Arc > &fst2)
Definition: union.h:85
SetWeightIterator(const Weight &w)
Definition: set-weight.h:192
bool operator==(const PdtStateTuple< S, K > &x, const PdtStateTuple< S, K > &y)
Definition: pdt.h:133
static const string & Type()
Definition: set-weight.h:105
constexpr uint64 kIdempotent
Definition: weight.h:123
ExpectationWeight< X1, X2 > Plus(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
std::ostream & operator<<(std::ostream &strm, const FloatWeightTpl< T > &w)
Definition: float-weight.h:146
Label Back()
Definition: set-weight.h:156
static const SetWeight & NoWeight()
Definition: set-weight.h:100
constexpr bool operator!=(const FloatWeightTpl< T > &w1, const FloatWeightTpl< T > &w2)
Definition: float-weight.h:119
static const SetWeight & Zero()
Definition: set-weight.h:92
SetWeight & operator=(const SetWeight< Label, S2 > &w)
Definition: set-weight.h:78
SetWeight< Label, S2 > operator()(const SetWeight< Label, S1 > &w1) const
Definition: set-weight.h:570
SetWeightIterator< SetWeight > Iterator
Definition: set-weight.h:51
int32_t int32
Definition: types.h:26
constexpr int kSetBad
Definition: set-weight.h:26
constexpr bool ApproxEqual(const FloatWeightTpl< T > &w1, const FloatWeightTpl< T > &w2, float delta=kDelta)
Definition: float-weight.h:140
constexpr size_t kNumRandomWeights
Definition: weight.h:130
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:47
SetWeight & operator=(SetWeight< Label, S2 > &&w)
Definition: set-weight.h:85
WeightGenerate(bool allow_zero=true, size_t alphabet_size=kNumRandomWeights, size_t max_set_length=kNumRandomWeights)
Definition: set-weight.h:587
DivideType
Definition: weight.h:142
Label_ Label
Definition: set-weight.h:49
constexpr float kDelta
Definition: weight.h:109
bool Done() const
Definition: set-weight.h:195
SetWeight(SetWeight< Label, S2 > &&w)
Definition: set-weight.h:74
constexpr int kSetEmpty
Definition: set-weight.h:24