FST  openfst-1.8.2
OpenFst Library
weight.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // General weight set and associated semiring operation definitions.
19 
20 #ifndef FST_WEIGHT_H_
21 #define FST_WEIGHT_H_
22 
23 #include <cctype>
24 #include <cmath>
25 #include <cstdint>
26 #include <iostream>
27 #include <sstream>
28 #include <type_traits>
29 #include <utility>
30 
31 #include <fst/compat.h>
32 #include <fst/log.h>
33 
34 #include <fst/util.h>
35 
36 
37 DECLARE_string(fst_weight_parentheses);
38 DECLARE_string(fst_weight_separator);
39 
40 namespace fst {
41 
42 // A semiring is specified by two binary operations Plus and Times and two
43 // designated elements Zero and One with the following properties:
44 //
45 // Plus: associative, commutative, and has Zero as its identity.
46 //
47 // Times: associative and has identity One, distributes w.r.t. Plus, and
48 // has Zero as an annihilator:
49 // Times(Zero(), a) == Times(a, Zero()) = Zero().
50 //
51 // A left semiring distributes on the left; a right semiring is similarly
52 // defined.
53 //
54 // A Weight class must have binary functions Plus and Times and static member
55 // functions Zero() and One() and these must form (at least) a left or right
56 // semiring.
57 //
58 // In addition, the following should be defined for a Weight:
59 //
60 // Member: predicate on set membership.
61 //
62 // NoWeight: static member function that returns an element that is
63 // not a set member; used to signal an error.
64 //
65 // >>: reads textual representation of a weight.
66 //
67 // <<: prints textual representation of a weight.
68 //
69 // Read(istream &istrm): reads binary representation of a weight.
70 //
71 // Write(ostream &ostrm): writes binary representation of a weight.
72 //
73 // Hash: maps weight to size_t.
74 //
75 // ApproxEqual: approximate equality (for inexact weights)
76 //
77 // Quantize: quantizes w.r.t delta (for inexact weights)
78 //
79 // Divide:
80 // - In a left semiring, for all a, b, b', c:
81 // if Times(a, b) = c, Divide(c, a, DIVIDE_LEFT) = b' and b'.Member(),
82 // then Times(a, b') = c.
83 // - In a right semiring, for all a, a', b, c:
84 // if Times(a, b) = c, Divide(c, b, DIVIDE_RIGHT) = a' and a'.Member(),
85 // then Times(a', b) = c.
86 // - In a commutative semiring,
87 // * for all a, c:
88 // Divide(c, a, DIVIDE_ANY) = Divide(c, a, DIVIDE_LEFT)
89 // = Divide(c, a, DIVIDE_RIGHT)
90 // * for all a, b, b', c:
91 // if Times(a, b) = c, Divide(c, a, DIVIDE_ANY) = b' and b'.Member(),
92 // then Times(a, b') = c
93 // - In the case where there exist no b such that c = Times(a, b), the
94 // return value of Divide(c, a, DIVIDE_LEFT) is unspecified. Returning
95 // Weight::NoWeight() is recommemded but not required in order to
96 // allow the most efficient implementation.
97 // - All algorithms in this library only call Divide(c, a) when it is
98 // guaranteed that there exists a b such that c = Times(a, b).
99 //
100 // ReverseWeight: the type of the corresponding reverse weight.
101 //
102 // Typically the same type as Weight for a (both left and right) semiring.
103 // For the left string semiring, it is the right string semiring.
104 //
105 // Reverse: a mapping from Weight to ReverseWeight s.t.
106 //
107 // --> Reverse(Reverse(a)) = a
108 // --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
109 // --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
110 // Typically the identity mapping in a (both left and right) semiring.
111 // In the left string semiring, it maps to the reverse string in the right
112 // string semiring.
113 //
114 // Properties: specifies additional properties that hold:
115 // LeftSemiring: indicates weights form a left semiring.
116 // RightSemiring: indicates weights form a right semiring.
117 // Commutative: for all a, b: Times(a,b) == Times(b, a)
118 // Idempotent: for all a: Plus(a, a) == a.
119 // Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
120 //
121 // User-defined weights and their corresponding operations SHOULD be
122 // defined in the same namespace, but SHOULD NOT defined in the fst
123 // namespace. Defining them in fst would make the user code fragile
124 // to additions in fst. They will be found in another namespace
125 // via argument-dependent lookup.
126 
127 // CONSTANT DEFINITIONS
128 
129 // A representable float near .001.
130 inline constexpr float kDelta = 1.0F / 1024.0F;
131 
132 // For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)).
133 inline constexpr uint64_t kLeftSemiring = 0x0000000000000001ULL;
134 
135 // For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)).
136 inline constexpr uint64_t kRightSemiring = 0x0000000000000002ULL;
137 
138 inline constexpr uint64_t kSemiring = kLeftSemiring | kRightSemiring;
139 
140 // For all a, b: Times(a, b) = Times(b, a).
141 inline constexpr uint64_t kCommutative = 0x0000000000000004ULL;
142 
143 // For all a: Plus(a, a) = a.
144 inline constexpr uint64_t kIdempotent = 0x0000000000000008ULL;
145 
146 // For all a, b: Plus(a, b) = a or Plus(a, b) = b.
147 inline constexpr uint64_t kPath = 0x0000000000000010ULL;
148 
149 // For random weight generation: default number of distinct weights.
150 // This is also used for a few other weight generation defaults.
151 inline constexpr size_t kNumRandomWeights = 5;
152 
153 // Weight property boolean constants needed for SFINAE.
154 
155 template <class W>
156 using IsIdempotent = std::bool_constant<(W::Properties() & kIdempotent) != 0>;
157 
158 template <class W>
159 using IsPath = std::bool_constant<(W::Properties() & kPath) != 0>;
160 
161 // Determines direction of division.
163  DIVIDE_LEFT, // left division
164  DIVIDE_RIGHT, // right division
166 }; // division in a commutative semiring
167 
168 // NATURAL ORDER
169 //
170 // By definition:
171 //
172 // a <= b iff a + b = a
173 //
174 // The natural order is a negative partial order iff the semiring is
175 // idempotent. It is trivially monotonic for plus. It is left
176 // (resp. right) monotonic for times iff the semiring is left
177 // (resp. right) distributive. It is a total order iff the semiring
178 // has the path property.
179 //
180 // For more information, see:
181 //
182 // Mohri, M. 2002. Semiring framework and algorithms for shortest-distance
183 // problems, Journal of Automata, Languages and
184 // Combinatorics 7(3): 321-350, 2002.
185 //
186 // We define the strict version of this order below.
187 
188 // Requires W is idempotent.
189 template <class W>
190 struct NaturalLess {
191  using Weight = W;
192  static_assert(IsIdempotent<W>::value, "W must be idempotent.");
193 
194  NaturalLess() = default; // Work-around possible GCC bug
195 
196  bool operator()(const Weight &w1, const Weight &w2) const {
197  return w1 != w2 && Plus(w1, w2) == w1;
198  }
199 };
200 
201 // Power is the iterated product for arbitrary semirings such that Power(w, 0)
202 // is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w).
203 template <class Weight>
204 Weight Power(const Weight &weight, size_t n) {
205  auto result = Weight::One();
206  for (size_t i = 0; i < n; ++i) result = Times(result, weight);
207  return result;
208 }
209 
210 // Simple default adder class. Specializations might be more complex.
211 template <class Weight>
212 class Adder {
213  public:
214  Adder() : sum_(Weight::Zero()) {}
215 
216  explicit Adder(Weight w) : sum_(std::move(w)) {}
217 
218  Weight Add(const Weight &w) {
219  sum_ = Plus(sum_, w);
220  return sum_;
221  }
222 
223  Weight Sum() const { return sum_; }
224 
225  void Reset(Weight w = Weight::Zero()) { sum_ = std::move(w); }
226 
227  private:
228  Weight sum_;
229 };
230 
231 // General weight converter: raises error.
232 template <class W1, class W2>
234  W2 operator()(W1 w1) const {
235  FSTERROR() << "WeightConvert: Can't convert weight from " << W1::Type()
236  << " to " << W2::Type();
237  return W2::NoWeight();
238  }
239 };
240 
241 // Specialized weight converter to self.
242 template <class W>
243 struct WeightConvert<W, W> {
244  constexpr W operator()(W weight) const { return weight; }
245 };
246 
247 // General random weight generator: raises error.
248 //
249 // The standard interface is roughly:
250 //
251 // class WeightGenerate<MyWeight> {
252 // public:
253 // explicit WeightGenerate(uint64_t seed = std::random_device()(),
254 // bool allow_zero = true,
255 // ...);
256 //
257 // MyWeight operator()() const;
258 // };
259 //
260 // Many weight generators also take trailing constructor arguments specifying
261 // the number of random (unique) weights, the length of weights (e.g., for
262 // string-based weights), etc. with sensible defaults
263 template <class W>
265  W operator()() const {
266  FSTERROR() << "WeightGenerate: No random generator for " << W::Type();
267  return W::NoWeight();
268  }
269 };
270 
271 namespace internal {
272 
274  public:
276  CompositeWeightIO(char separator, std::pair<char, char> parentheses);
277 
278  std::pair<char, char> parentheses() const {
279  return {open_paren_, close_paren_};
280  }
281  char separator() const { return separator_; }
282 
283  bool error() const { return error_; }
284 
285  protected:
286  const char separator_;
287  const char open_paren_;
288  const char close_paren_;
289 
290  private:
291  bool error_;
292 };
293 
294 } // namespace internal
295 
296 // Helper class for writing textual composite weights.
298  public:
299  // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
300  // FST_FLAGS_fst_weight_parentheses).
301  explicit CompositeWeightWriter(std::ostream &ostrm);
302 
303  // parentheses defines the opening and closing parenthesis characters.
304  // Set parentheses = {0, 0} to disable writing parenthesis.
305  CompositeWeightWriter(std::ostream &ostrm, char separator,
306  std::pair<char, char> parentheses);
307 
309  CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete;
310 
311  // Writes open parenthesis to a stream if option selected.
312  void WriteBegin();
313 
314  // Writes element to a stream.
315  template <class T>
316  void WriteElement(const T &comp) {
317  if (i_++ > 0) ostrm_ << separator_;
318  ostrm_ << comp;
319  }
320 
321  // Writes close parenthesis to a stream if option selected.
322  void WriteEnd();
323 
324  private:
325  std::ostream &ostrm_;
326  int i_ = 0; // Element position.
327 };
328 
329 // Helper class for reading textual composite weights. Elements are separated by
330 // a separator character. There must be at least one element per textual
331 // representation. Parentheses characters should be set if the composite
332 // weights themselves contain composite weights to ensure proper parsing.
334  public:
335  // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
336  // FST_FLAGS_fst_weight_parentheses).
337  explicit CompositeWeightReader(std::istream &istrm);
338 
339  // parentheses defines the opening and closing parenthesis characters.
340  // Set parentheses = {0, 0} to disable reading parenthesis.
341  CompositeWeightReader(std::istream &istrm, char separator,
342  std::pair<char, char> parentheses);
343 
345  CompositeWeightReader &operator=(const CompositeWeightReader &) = delete;
346 
347  // Reads open parenthesis from a stream if option selected.
348  void ReadBegin();
349 
350  // Reads element from a stream. The second argument, when true, indicates that
351  // this will be the last element (allowing more forgiving formatting of the
352  // last element). Returns false when last element is read.
353  template <class T>
354  bool ReadElement(T *comp, bool last = false);
355 
356  // Finalizes reading.
357  void ReadEnd();
358 
359  private:
360  std::istream &istrm_; // Input stream.
361  int c_ = 0; // Last character read, or EOF.
362  int depth_ = 0; // Weight parentheses depth.
363 };
364 
365 template <class T>
366 inline bool CompositeWeightReader::ReadElement(T *comp, bool last) {
367  std::string s;
368  const bool has_parens = open_paren_ != 0;
369  while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) &&
370  (c_ != separator_ || depth_ > 1 || last) &&
371  (c_ != close_paren_ || depth_ != 1)) {
372  s += c_;
373  // If parentheses encountered before separator, they must be matched.
374  if (has_parens && c_ == open_paren_) {
375  ++depth_;
376  } else if (has_parens && c_ == close_paren_) {
377  // Failure on unmatched parentheses.
378  if (depth_ == 0) {
379  FSTERROR() << "CompositeWeightReader: Unmatched close paren: "
380  << "Is the fst_weight_parentheses flag set correctly?";
381  istrm_.clear(std::ios::badbit);
382  return false;
383  }
384  --depth_;
385  }
386  c_ = istrm_.get();
387  }
388  if (s.empty()) {
389  FSTERROR() << "CompositeWeightReader: Empty element: "
390  << "Is the fst_weight_parentheses flag set correctly?";
391  istrm_.clear(std::ios::badbit);
392  return false;
393  }
394  std::istringstream istrm(s);
395  istrm >> *comp;
396  // Skips separator/close parenthesis.
397  if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) {
398  c_ = istrm_.get();
399  }
400  const bool is_eof = c_ == std::istream::traits_type::eof();
401  // Clears fail bit if just EOF.
402  if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit);
403  return !is_eof && !std::isspace(c_);
404 }
405 
406 } // namespace fst
407 
408 #endif // FST_WEIGHT_H_
constexpr uint64_t kSemiring
Definition: weight.h:138
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:60
W operator()() const
Definition: weight.h:265
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:63
Weight Sum() const
Definition: weight.h:223
constexpr uint64_t kIdempotent
Definition: weight.h:144
std::bool_constant<(W::Properties()&kIdempotent)!=0 > IsIdempotent
Definition: weight.h:156
constexpr W operator()(W weight) const
Definition: weight.h:244
constexpr uint64_t kRightSemiring
Definition: weight.h:136
#define FSTERROR()
Definition: util.h:53
Weight Add(const Weight &w)
Definition: weight.h:218
constexpr uint64_t kCommutative
Definition: weight.h:141
bool operator()(const Weight &w1, const Weight &w2) const
Definition: weight.h:196
constexpr uint64_t kPath
Definition: weight.h:147
DECLARE_string(fst_weight_parentheses)
bool ReadElement(T *comp, bool last=false)
Definition: weight.h:366
std::pair< char, char > parentheses() const
Definition: weight.h:278
constexpr size_t kNumRandomWeights
Definition: weight.h:151
W2 operator()(W1 w1) const
Definition: weight.h:234
void WriteElement(const T &comp)
Definition: weight.h:316
void Reset(Weight w=Weight::Zero())
Definition: weight.h:225
NaturalLess()=default
constexpr TropicalWeightTpl< T > Power(const TropicalWeightTpl< T > &w, V n)
Definition: float-weight.h:396
DivideType
Definition: weight.h:162
constexpr uint64_t kLeftSemiring
Definition: weight.h:133
constexpr float kDelta
Definition: weight.h:130
std::bool_constant<(W::Properties()&kPath)!=0 > IsPath
Definition: weight.h:159
Adder(Weight w)
Definition: weight.h:216