FST  openfst-1.8.3
OpenFst Library
weight.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // General weight set and associated semiring operation definitions.
19 
20 #ifndef FST_WEIGHT_H_
21 #define FST_WEIGHT_H_
22 
23 #include <cctype>
24 #include <cmath>
25 #include <cstddef>
26 #include <cstdint>
27 #include <ios>
28 #include <iostream>
29 #include <istream>
30 #include <ostream>
31 #include <sstream>
32 #include <string>
33 #include <type_traits>
34 #include <utility>
35 
36 #include <fst/compat.h>
37 #include <fst/log.h>
38 #include <fst/util.h>
39 
40 DECLARE_string(fst_weight_parentheses);
41 DECLARE_string(fst_weight_separator);
42 
43 namespace fst {
44 
45 // A semiring is specified by two binary operations Plus and Times and two
46 // designated elements Zero and One with the following properties:
47 //
48 // Plus: associative, commutative, and has Zero as its identity.
49 //
50 // Times: associative and has identity One, distributes w.r.t. Plus, and
51 // has Zero as an annihilator:
52 // Times(Zero(), a) == Times(a, Zero()) = Zero().
53 //
54 // A left semiring distributes on the left; a right semiring is similarly
55 // defined.
56 //
57 // A Weight class must have binary functions Plus and Times and static member
58 // functions Zero() and One() and these must form (at least) a left or right
59 // semiring.
60 //
61 // In addition, the following should be defined for a Weight:
62 //
63 // Member: predicate on set membership.
64 //
65 // NoWeight: static member function that returns an element that is
66 // not a set member; used to signal an error.
67 //
68 // >>: reads textual representation of a weight.
69 //
70 // <<: prints textual representation of a weight.
71 //
72 // Read(istream &istrm): reads binary representation of a weight.
73 //
74 // Write(ostream &ostrm): writes binary representation of a weight.
75 //
76 // Hash: maps weight to size_t.
77 //
78 // ApproxEqual: approximate equality (for inexact weights)
79 //
80 // Quantize: quantizes w.r.t delta (for inexact weights)
81 //
82 // Divide:
83 // - In a left semiring, for all a, b, b', c:
84 // if Times(a, b) = c, Divide(c, a, DIVIDE_LEFT) = b' and b'.Member(),
85 // then Times(a, b') = c.
86 // - In a right semiring, for all a, a', b, c:
87 // if Times(a, b) = c, Divide(c, b, DIVIDE_RIGHT) = a' and a'.Member(),
88 // then Times(a', b) = c.
89 // - In a commutative semiring,
90 // * for all a, c:
91 // Divide(c, a, DIVIDE_ANY) = Divide(c, a, DIVIDE_LEFT)
92 // = Divide(c, a, DIVIDE_RIGHT)
93 // * for all a, b, b', c:
94 // if Times(a, b) = c, Divide(c, a, DIVIDE_ANY) = b' and b'.Member(),
95 // then Times(a, b') = c
96 // - In the case where there exist no b such that c = Times(a, b), the
97 // return value of Divide(c, a, DIVIDE_LEFT) is unspecified. Returning
98 // Weight::NoWeight() is recommemded but not required in order to
99 // allow the most efficient implementation.
100 // - All algorithms in this library only call Divide(c, a) when it is
101 // guaranteed that there exists a b such that c = Times(a, b).
102 //
103 // ReverseWeight: the type of the corresponding reverse weight.
104 //
105 // Typically the same type as Weight for a (both left and right) semiring.
106 // For the left string semiring, it is the right string semiring.
107 //
108 // Reverse: a mapping from Weight to ReverseWeight s.t.
109 //
110 // --> Reverse(Reverse(a)) = a
111 // --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
112 // --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
113 // Typically the identity mapping in a (both left and right) semiring.
114 // In the left string semiring, it maps to the reverse string in the right
115 // string semiring.
116 //
117 // Properties: specifies additional properties that hold:
118 // LeftSemiring: indicates weights form a left semiring.
119 // RightSemiring: indicates weights form a right semiring.
120 // Commutative: for all a, b: Times(a,b) == Times(b, a)
121 // Idempotent: for all a: Plus(a, a) == a.
122 // Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
123 //
124 // User-defined weights and their corresponding operations SHOULD be
125 // defined in the same namespace, but SHOULD NOT defined in the fst
126 // namespace. Defining them in fst would make the user code fragile
127 // to additions in fst. They will be found in another namespace
128 // via argument-dependent lookup.
129 
130 // CONSTANT DEFINITIONS
131 
132 // A representable float near .001.
133 inline constexpr float kDelta = 1.0F / 1024.0F;
134 
135 // For all a, b, c: Times(c, Plus(a, b)) = Plus(Times(c, a), Times(c, b)).
136 inline constexpr uint64_t kLeftSemiring = 0x0000000000000001ULL;
137 
138 // For all a, b, c: Times(Plus(a, b), c) = Plus(Times(a, c), Times(b, c)).
139 inline constexpr uint64_t kRightSemiring = 0x0000000000000002ULL;
140 
141 inline constexpr uint64_t kSemiring = kLeftSemiring | kRightSemiring;
142 
143 // For all a, b: Times(a, b) = Times(b, a).
144 inline constexpr uint64_t kCommutative = 0x0000000000000004ULL;
145 
146 // For all a: Plus(a, a) = a.
147 inline constexpr uint64_t kIdempotent = 0x0000000000000008ULL;
148 
149 // For all a, b: Plus(a, b) = a or Plus(a, b) = b.
150 inline constexpr uint64_t kPath = 0x0000000000000010ULL;
151 
152 // For random weight generation: default number of distinct weights.
153 // This is also used for a few other weight generation defaults.
154 inline constexpr size_t kNumRandomWeights = 5;
155 
156 // Weight property boolean constants needed for SFINAE.
157 
158 template <class W>
159 using IsIdempotent = std::bool_constant<(W::Properties() & kIdempotent) != 0>;
160 
161 template <class W>
162 using IsPath = std::bool_constant<(W::Properties() & kPath) != 0>;
163 
164 // Determines direction of division.
166  DIVIDE_LEFT, // left division
167  DIVIDE_RIGHT, // right division
169 }; // division in a commutative semiring
170 
171 // NATURAL ORDER
172 //
173 // By definition:
174 //
175 // a <= b iff a + b = a
176 //
177 // The natural order is a negative partial order iff the semiring is
178 // idempotent. It is trivially monotonic for plus. It is left
179 // (resp. right) monotonic for times iff the semiring is left
180 // (resp. right) distributive. It is a total order iff the semiring
181 // has the path property.
182 //
183 // For more information, see:
184 //
185 // Mohri, M. 2002. Semiring framework and algorithms for shortest-distance
186 // problems, Journal of Automata, Languages and
187 // Combinatorics 7(3): 321-350, 2002.
188 //
189 // We define the strict version of this order below.
190 
191 // Requires W is idempotent.
192 template <class W>
193 struct NaturalLess {
194  using Weight = W;
195  static_assert(IsIdempotent<W>::value, "W must be idempotent.");
196 
197  bool operator()(const Weight &w1, const Weight &w2) const {
198  return w1 != w2 && Plus(w1, w2) == w1;
199  }
200 };
201 
202 // Power is the iterated product for arbitrary semirings such that Power(w, 0)
203 // is One() for the semiring, and Power(w, n) = Times(Power(w, n - 1), w).
204 template <class Weight>
205 Weight Power(const Weight &weight, size_t n) {
206  auto result = Weight::One();
207  for (size_t i = 0; i < n; ++i) result = Times(result, weight);
208  return result;
209 }
210 
211 // Simple default adder class. Specializations might be more complex.
212 template <class Weight>
213 class Adder {
214  public:
215  Adder() : sum_(Weight::Zero()) {}
216 
217  explicit Adder(Weight w) : sum_(std::move(w)) {}
218 
219  Weight Add(const Weight &w) {
220  sum_ = Plus(sum_, w);
221  return sum_;
222  }
223 
224  Weight Sum() const { return sum_; }
225 
226  void Reset(Weight w = Weight::Zero()) { sum_ = std::move(w); }
227 
228  private:
229  Weight sum_;
230 };
231 
232 // General weight converter: raises error.
233 template <class W1, class W2>
235  W2 operator()(W1 w1) const {
236  FSTERROR() << "WeightConvert: Can't convert weight from " << W1::Type()
237  << " to " << W2::Type();
238  return W2::NoWeight();
239  }
240 };
241 
242 // Specialized weight converter to self.
243 template <class W>
244 struct WeightConvert<W, W> {
245  constexpr W operator()(W weight) const { return weight; }
246 };
247 
248 // General random weight generator: raises error.
249 //
250 // The standard interface is roughly:
251 //
252 // class WeightGenerate<MyWeight> {
253 // public:
254 // explicit WeightGenerate(uint64_t seed = std::random_device()(),
255 // bool allow_zero = true,
256 // ...);
257 //
258 // MyWeight operator()() const;
259 // };
260 //
261 // Many weight generators also take trailing constructor arguments specifying
262 // the number of random (unique) weights, the length of weights (e.g., for
263 // string-based weights), etc. with sensible defaults
264 template <class W>
266  W operator()() const {
267  FSTERROR() << "WeightGenerate: No random generator for " << W::Type();
268  return W::NoWeight();
269  }
270 };
271 
272 namespace internal {
273 
275  public:
277  CompositeWeightIO(char separator, std::pair<char, char> parentheses);
278 
279  std::pair<char, char> parentheses() const {
280  return {open_paren_, close_paren_};
281  }
282  char separator() const { return separator_; }
283 
284  bool error() const { return error_; }
285 
286  protected:
287  const char separator_;
288  const char open_paren_;
289  const char close_paren_;
290 
291  private:
292  bool error_;
293 };
294 
295 } // namespace internal
296 
297 // Helper class for writing textual composite weights.
299  public:
300  // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
301  // FST_FLAGS_fst_weight_parentheses).
302  explicit CompositeWeightWriter(std::ostream &ostrm);
303 
304  // parentheses defines the opening and closing parenthesis characters.
305  // Set parentheses = {0, 0} to disable writing parenthesis.
306  CompositeWeightWriter(std::ostream &ostrm, char separator,
307  std::pair<char, char> parentheses);
308 
310  CompositeWeightWriter &operator=(const CompositeWeightWriter &) = delete;
311 
312  // Writes open parenthesis to a stream if option selected.
313  void WriteBegin();
314 
315  // Writes element to a stream.
316  template <class T>
317  void WriteElement(const T &comp) {
318  if (i_++ > 0) ostrm_ << separator_;
319  ostrm_ << comp;
320  }
321 
322  // Writes close parenthesis to a stream if option selected.
323  void WriteEnd();
324 
325  private:
326  std::ostream &ostrm_;
327  int i_ = 0; // Element position.
328 };
329 
330 // Helper class for reading textual composite weights. Elements are separated by
331 // a separator character. There must be at least one element per textual
332 // representation. Parentheses characters should be set if the composite
333 // weights themselves contain composite weights to ensure proper parsing.
335  public:
336  // Uses configuration from flags (FST_FLAGS_fst_weight_separator,
337  // FST_FLAGS_fst_weight_parentheses).
338  explicit CompositeWeightReader(std::istream &istrm);
339 
340  // parentheses defines the opening and closing parenthesis characters.
341  // Set parentheses = {0, 0} to disable reading parenthesis.
342  CompositeWeightReader(std::istream &istrm, char separator,
343  std::pair<char, char> parentheses);
344 
346  CompositeWeightReader &operator=(const CompositeWeightReader &) = delete;
347 
348  // Reads open parenthesis from a stream if option selected.
349  void ReadBegin();
350 
351  // Reads element from a stream. The second argument, when true, indicates that
352  // this will be the last element (allowing more forgiving formatting of the
353  // last element). Returns false when last element is read.
354  template <class T>
355  bool ReadElement(T *comp, bool last = false);
356 
357  // Finalizes reading.
358  void ReadEnd();
359 
360  private:
361  std::istream &istrm_; // Input stream.
362  int c_ = 0; // Last character read, or EOF.
363  int depth_ = 0; // Weight parentheses depth.
364 };
365 
366 template <class T>
367 inline bool CompositeWeightReader::ReadElement(T *comp, bool last) {
368  std::string s;
369  const bool has_parens = open_paren_ != 0;
370  while ((c_ != std::istream::traits_type::eof()) && !std::isspace(c_) &&
371  (c_ != separator_ || depth_ > 1 || last) &&
372  (c_ != close_paren_ || depth_ != 1)) {
373  s += c_;
374  // If parentheses encountered before separator, they must be matched.
375  if (has_parens && c_ == open_paren_) {
376  ++depth_;
377  } else if (has_parens && c_ == close_paren_) {
378  // Failure on unmatched parentheses.
379  if (depth_ == 0) {
380  FSTERROR() << "CompositeWeightReader: Unmatched close paren: "
381  << "Is the fst_weight_parentheses flag set correctly?";
382  istrm_.clear(std::ios::badbit);
383  return false;
384  }
385  --depth_;
386  }
387  c_ = istrm_.get();
388  }
389  if (s.empty()) {
390  FSTERROR() << "CompositeWeightReader: Empty element: "
391  << "Is the fst_weight_parentheses flag set correctly?";
392  istrm_.clear(std::ios::badbit);
393  return false;
394  }
395  std::istringstream istrm(s);
396  istrm >> *comp;
397  // Skips separator/close parenthesis.
398  if (c_ != std::istream::traits_type::eof() && !std::isspace(c_)) {
399  c_ = istrm_.get();
400  }
401  const bool is_eof = c_ == std::istream::traits_type::eof();
402  // Clears fail bit if just EOF.
403  if (is_eof && !istrm_.bad()) istrm_.clear(std::ios::eofbit);
404  return !is_eof && !std::isspace(c_);
405 }
406 
407 } // namespace fst
408 
409 #endif // FST_WEIGHT_H_
constexpr uint64_t kSemiring
Definition: weight.h:141
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:61
W operator()() const
Definition: weight.h:266
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
Definition: error-weight.h:64
Weight Sum() const
Definition: weight.h:224
constexpr uint64_t kIdempotent
Definition: weight.h:147
std::bool_constant<(W::Properties()&kIdempotent)!=0 > IsIdempotent
Definition: weight.h:159
constexpr W operator()(W weight) const
Definition: weight.h:245
constexpr uint64_t kRightSemiring
Definition: weight.h:139
#define FSTERROR()
Definition: util.h:56
Weight Add(const Weight &w)
Definition: weight.h:219
constexpr uint64_t kCommutative
Definition: weight.h:144
bool operator()(const Weight &w1, const Weight &w2) const
Definition: weight.h:197
constexpr uint64_t kPath
Definition: weight.h:150
DECLARE_string(fst_weight_parentheses)
bool ReadElement(T *comp, bool last=false)
Definition: weight.h:367
std::pair< char, char > parentheses() const
Definition: weight.h:279
constexpr size_t kNumRandomWeights
Definition: weight.h:154
W2 operator()(W1 w1) const
Definition: weight.h:235
void WriteElement(const T &comp)
Definition: weight.h:317
void Reset(Weight w=Weight::Zero())
Definition: weight.h:226
constexpr TropicalWeightTpl< T > Power(const TropicalWeightTpl< T > &w, V n)
Definition: float-weight.h:401
DivideType
Definition: weight.h:165
constexpr uint64_t kLeftSemiring
Definition: weight.h:136
constexpr float kDelta
Definition: weight.h:133
std::bool_constant<(W::Properties()&kPath)!=0 > IsPath
Definition: weight.h:162
Adder(Weight w)
Definition: weight.h:217