FST  openfst-1.7.2 OpenFst Library
weight.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // General weight set and associated semiring operation definitions.
5
6 #ifndef FST_WEIGHT_H_
7 #define FST_WEIGHT_H_
8
9 #include <cctype>
10 #include <cmath>
11 #include <iostream>
12 #include <sstream>
13 #include <type_traits>
14 #include <utility>
15
16 #include <fst/compat.h>
17 #include <fst/log.h>
18
19 #include <fst/util.h>
20
21
22 DECLARE_string(fst_weight_parentheses);
23 DECLARE_string(fst_weight_separator);
24
25 namespace fst {
26
27 // A semiring is specified by two binary operations Plus and Times and two
28 // designated elements Zero and One with the following properties:
29 //
30 // Plus: associative, commutative, and has Zero as its identity.
31 //
32 // Times: associative and has identity One, distributes w.r.t. Plus, and
33 // has Zero as an annihilator:
34 // Times(Zero(), a) == Times(a, Zero()) = Zero().
35 //
36 // A left semiring distributes on the left; a right semiring is similarly
37 // defined.
38 //
39 // A Weight class must have binary functions Plus and Times and static member
40 // functions Zero() and One() and these must form (at least) a left or right
41 // semiring.
42 //
43 // In addition, the following should be defined for a Weight:
44 //
45 // Member: predicate on set membership.
46 //
47 // NoWeight: static member function that returns an element that is
48 // not a set member; used to signal an error.
49 //
50 // >>: reads textual representation of a weight.
51 //
52 // <<: prints textual representation of a weight.
53 //
55 //
56 // Write(ostream &ostrm): writes binary representation of a weight.
57 //
58 // Hash: maps weight to size_t.
59 //
60 // ApproxEqual: approximate equality (for inexact weights)
61 //
62 // Quantize: quantizes w.r.t delta (for inexact weights)
63 //
64 // Divide:
65 // - In a left semiring, for all a, b, b', c:
66 // if Times(a, b) = c, Divide(c, a, DIVIDE_LEFT) = b' and b'.Member(),
67 // then Times(a, b') = c.
68 // - In a right semiring, for all a, a', b, c:
69 // if Times(a, b) = c, Divide(c, b, DIVIDE_RIGHT) = a' and a'.Member(),
70 // then Times(a', b) = c.
71 // - In a commutative semiring,
72 // * for all a, c:
73 // Divide(c, a, DIVIDE_ANY) = Divide(c, a, DIVIDE_LEFT)
74 // = Divide(c, a, DIVIDE_RIGHT)
75 // * for all a, b, b', c:
76 // if Times(a, b), Divide(c, a, DIVIDE_ANY) = b' and b'.Member(),
77 // then Times(a, b') = c
78 // - In the case where there exist no b such that c = Times(a, b), the
79 // return value of Divide(c, a, DIVIDE_LEFT) is unspecified. Returning
80 // Weight::NoWeight() is recommemded but not required in order to
81 // allow the most efficient implementation.
82 // - All algorithms in this library only call Divide(c, a) when it is
83 // guaranteed that there exists a b such that c = Times(a, b).
84 //
85 // ReverseWeight: the type of the corresponding reverse weight.
86 //
87 // Typically the same type as Weight for a (both left and right) semiring.
88 // For the left string semiring, it is the right string semiring.
89 //
90 // Reverse: a mapping from Weight to ReverseWeight s.t.
91 //
92 // --> Reverse(Reverse(a)) = a
93 // --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
94 // --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
95 // Typically the identity mapping in a (both left and right) semiring.
96 // In the left string semiring, it maps to the reverse string in the right
97 // string semiring.
98 //
99 // Properties: specifies additional properties that hold:
100 // LeftSemiring: indicates weights form a left semiring.
101 // RightSemiring: indicates weights form a right semiring.
102 // Commutative: for all a,b: Times(a,b) == Times(b,a)
103 // Idempotent: for all a: Plus(a, a) == a.
104 // Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
105
106 // CONSTANT DEFINITIONS
107
108 // A representable float near .001.
109 constexpr float kDelta = 1.0F / 1024.0F;
110
111 // For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)).
112 constexpr uint64 kLeftSemiring = 0x0000000000000001ULL;
113
114 // For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)).
115 constexpr uint64 kRightSemiring = 0x0000000000000002ULL;
116
117 constexpr uint64 kSemiring = kLeftSemiring | kRightSemiring;
118
119 // For all a, b: Times(a, b) = Times(b, a).
120 constexpr uint64 kCommutative = 0x0000000000000004ULL;
121
122 // For all a: Plus(a, a) = a.
123 constexpr uint64 kIdempotent = 0x0000000000000008ULL;
124
125 // For all a, b: Plus(a, b) = a or Plus(a, b) = b.
126 constexpr uint64 kPath = 0x0000000000000010ULL;
127
128 // For random weight generation: default number of distinct weights.
129 // This is also used for a few other weight generation defaults.
130 constexpr size_t kNumRandomWeights = 5;
131
132 // Weight property boolean constants needed for SFINAE.
133
134 template <class W>
135 using IsIdempotent = std::integral_constant<bool,
136  (W::Properties() & kIdempotent) != 0>;
137
138 template <class W>
139 using IsPath = std::integral_constant<bool, (W::Properties() & kPath) != 0>;
140
141 // Determines direction of division.
143  DIVIDE_LEFT, // left division
144  DIVIDE_RIGHT, // right division
146 }; // division in a commutative semiring
147
148 // NATURAL ORDER
149 //
150 // By definition:
151 //
152 // a <= b iff a + b = a
153 //
154 // The natural order is a negative partial order iff the semiring is
155 // idempotent. It is trivially monotonic for plus. It is left
156 // (resp. right) monotonic for times iff the semiring is left
157 // (resp. right) distributive. It is a total order iff the semiring
158 // has the path property.
159 //
161 //
162 // Mohri, M. 2002. Semiring framework and algorithms for shortest-distance
163 // problems, Journal of Automata, Languages and
164 // Combinatorics 7(3): 321-350, 2002.
165 //
166 // We define the strict version of this order below.
167
168 // Declares the template with a second parameter determining whether or not it
169 // can actually be constructed.
170 template <class W, class IdempotentType = void>
172
173 // Variant for idempotent weights.
174 template <class W>
175 class NaturalLess<W, typename std::enable_if<IsIdempotent<W>::value>::type> {
176  public:
177  using Weight = W;
178
180
181  bool operator()(const Weight &w1, const Weight &w2) const {
182  return w1 != w2 && Plus(w1, w2) == w1;
183  }
184 };
185
186 // Non-constructible variant for non-idempotent weights.
187 template <class W>
188 class NaturalLess<W, typename std::enable_if<!IsIdempotent<W>::value>::type> {
189  public:
190  using Weight = W;
191
192  // TODO(kbg): Trace down anywhere this is being instantiated, then add a
193  // static_assert to prevent this from being instantiated.
195  FSTERROR() << "NaturalLess: Weight type is not idempotent: " << W::Type();
196  }
197
198  bool operator()(const Weight &, const Weight &) const { return false; }
199 };
200
201 // Power is the iterated product for arbitrary semirings such that Power(w, 0)
202 // is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w).
203 template <class Weight>
204 Weight Power(const Weight &weight, size_t n) {
205  auto result = Weight::One();
206  for (size_t i = 0; i < n; ++i) result = Times(result, weight);
207  return result;
208 }
209
210 // Simple default adder class. Specializations might be more complex.
211 template <class Weight>
213  public:
214  explicit Adder(Weight w = Weight::Zero()) : sum_(w) { }
215
216  Weight Add(const Weight &w) {
217  sum_ = Plus(sum_, w);
218  return sum_;
219  }
220
221  Weight Sum() { return sum_; }
222
223  void Reset(Weight w = Weight::Zero()) { sum_ = w; }
224
225  private:
226  Weight sum_;
227 };
228
229 // General weight converter: raises error.
230 template <class W1, class W2>
232  W2 operator()(W1 w1) const {
233  FSTERROR() << "WeightConvert: Can't convert weight from \"" << W1::Type()
234  << "\" to \"" << W2::Type();
235  return W2::NoWeight();
236  }
237 };
238
239 // Specialized weight converter to self.
240 template <class W>
241 struct WeightConvert<W, W> {
242  W operator()(W weight) const { return weight; }
243 };
244
245 // General random weight generator: raises error.
246 template <class W>
248  W operator()() const {
249  FSTERROR() << "WeightGenerate: No random generator for " << W::Type();
250  return W::NoWeight();
251  }
252 };
253
254 namespace internal {
255
257  public:
259  CompositeWeightIO(char separator, std::pair<char, char> parentheses);
260
261  std::pair<char, char> parentheses() const {
262  return {open_paren_, close_paren_};
263  }
264  char separator() const { return separator_; }
265
266  bool error() const { return error_; }
267
268  protected:
269  const char separator_;
270  const char open_paren_;
271  const char close_paren_;
272
273  private:
274  bool error_;
275 };
276
277 } // namespace internal
278
279 // Helper class for writing textual composite weights.
281  public:
282  // Uses configuration from flags (FLAGS_fst_weight_separator,
283  // FLAGS_fst_weight_parentheses).
284  explicit CompositeWeightWriter(std::ostream &ostrm);
285
286  // parentheses defines the opening and closing parenthesis characters.
287  // Set parentheses = {0, 0} to disable writing parenthesis.
288  CompositeWeightWriter(std::ostream &ostrm, char separator,
289  std::pair<char, char> parentheses);
290
292  CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete;
293
294  // Writes open parenthesis to a stream if option selected.
295  void WriteBegin();
296
297  // Writes element to a stream.
298  template <class T>
299  void WriteElement(const T &comp) {
300  if (i_++ > 0) ostrm_ << separator_;
301  ostrm_ << comp;
302  }
303
304  // Writes close parenthesis to a stream if option selected.
305  void WriteEnd();
306
307  private:
308  std::ostream &ostrm_;
309  int i_ = 0; // Element position.
310 };
311
312 // Helper class for reading textual composite weights. Elements are separated by
313 // a separator character. There must be at least one element per textual
314 // representation. Parentheses characters should be set if the composite
315 // weights themselves contain composite weights to ensure proper parsing.
317  public:
318  // Uses configuration from flags (FLAGS_fst_weight_separator,
319  // FLAGS_fst_weight_parentheses).
321
322  // parentheses defines the opening and closing parenthesis characters.
323  // Set parentheses = {0, 0} to disable reading parenthesis.
325  std::pair<char, char> parentheses);
326
329
330  // Reads open parenthesis from a stream if option selected.
332
333  // Reads element from a stream. The second argument, when true, indicates that
334  // this will be the last element (allowing more forgiving formatting of the
335  // last element). Returns false when last element is read.
336  template <class T>
337  bool ReadElement(T *comp, bool last = false);
338
341
342  private:
343  std::istream &istrm_; // Input stream.
344  int c_ = 0; // Last character read, or EOF.
345  int depth_ = 0; // Weight parentheses depth.
346 };
347
348 template <class T>
350  string s;
351  const bool has_parens = open_paren_ != 0;
352  while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) &&
353  (c_ != separator_ || depth_ > 1 || last) &&
354  (c_ != close_paren_ || depth_ != 1)) {
355  s += c_;
356  // If parentheses encountered before separator, they must be matched.
357  if (has_parens && c_ == open_paren_) {
358  ++depth_;
359  } else if (has_parens && c_ == close_paren_) {
360  // Failure on unmatched parentheses.
361  if (depth_ == 0) {
362  FSTERROR() << "CompositeWeightReader: Unmatched close paren: "
363  << "Is the fst_weight_parentheses flag set correctly?";
365  return false;
366  }
367  --depth_;
368  }
369  c_ = istrm_.get();
370  }
371  if (s.empty()) {
372  FSTERROR() << "CompositeWeightReader: Empty element: "
373  << "Is the fst_weight_parentheses flag set correctly?";
375  return false;
376  }
377  std::istringstream istrm(s);
378  istrm >> *comp;
379  // Skips separator/close parenthesis.
380  if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) {
381  c_ = istrm_.get();
382  }
383  const bool is_eof = c_ == std::istream::traits_type::eof();
384  // Clears fail bit if just EOF.
385  if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit);
386  return !is_eof && !std::isspace(c_);
387 }
388
389 } // namespace fst
390
391 #endif // FST_WEIGHT_H_
Definition: weight.h:214
uint64_t uint64
Definition: types.h:32
W operator()() const
Definition: weight.h:248
constexpr uint64 kRightSemiring
Definition: weight.h:115
bool operator()(const Weight &w1, const Weight &w2) const
Definition: weight.h:181
ExpectationWeight< X1, X2 > Times(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
constexpr uint64 kCommutative
Definition: weight.h:120
constexpr uint64 kLeftSemiring
Definition: weight.h:112
#define FSTERROR()
Definition: util.h:35
Definition: weight.h:216
constexpr uint64 kSemiring
Definition: weight.h:117
Weight Sum()
Definition: weight.h:221
constexpr uint64 kIdempotent
Definition: weight.h:123
ExpectationWeight< X1, X2 > Plus(const ExpectationWeight< X1, X2 > &w1, const ExpectationWeight< X1, X2 > &w2)
std::integral_constant< bool,(W::Properties()&kIdempotent)!=0 > IsIdempotent
Definition: weight.h:136
W operator()(W weight) const
Definition: weight.h:242
std::integral_constant< bool,(W::Properties()&kPath)!=0 > IsPath
Definition: weight.h:139
DECLARE_string(fst_weight_parentheses)
Definition: weight.h:349
constexpr uint64 kPath
Definition: weight.h:126
std::pair< char, char > parentheses() const
Definition: weight.h:261
constexpr size_t kNumRandomWeights
Definition: weight.h:130
W2 operator()(W1 w1) const
Definition: weight.h:232
void WriteElement(const T &comp)
Definition: weight.h:299
void Reset(Weight w=Weight::Zero())
Definition: weight.h:223
constexpr TropicalWeightTpl< T > Power(const TropicalWeightTpl< T > &w, V n)
Definition: float-weight.h:377
DivideType
Definition: weight.h:142
constexpr float kDelta
Definition: weight.h:109