FST  openfst-1.8.3
OpenFst Library
partition.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Functions and classes to create a partition of states.
19 
20 #ifndef FST_PARTITION_H_
21 #define FST_PARTITION_H_
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <type_traits>
26 #include <vector>
27 
28 #include <fst/queue.h>
29 
30 namespace fst {
31 namespace internal {
32 
33 template <typename T>
35 
36 // Defines a partitioning of elements, used to represent equivalence classes
37 // for FST operations like minimization. T must be a signed integer type.
38 //
39 // The elements are numbered from 0 to num_elements - 1.
40 // Initialize(num_elements) sets up the class for a given number of elements.
41 // We maintain a partition of these elements into classes. The classes are also
42 // numbered from zero; you can add a class with AddClass(), or add them in bulk
43 // with AllocateClasses(num_classes). Initially the elements are not assigned
44 // to any class; you set up the initial mapping from elements to classes by
45 // calling Add(element_id, class_id). You can also move an element to a
46 // different class by calling Move(element_id, class_id).
47 //
48 // We also support a rather specialized interface that allows you to efficiently
49 // split classes in the Hopcroft minimization algorithm. This maintains a
50 // binary partition of each class. Let's call these, rather arbitrarily, the
51 // 'yes' subset and the 'no' subset of each class, and assume that by default,
52 // each element of a class is in its 'no' subset. When one calls
53 // SplitOn(element_id), element_id is moved to the 'yes' subset of its class.
54 // (If it was already in the 'yes' set, it just stays there). The aim is to
55 // enable (later) splitting the class in two in time no greater than the time
56 // already spent calling SplitOn() for that class. We keep a list of the classes
57 // which have nonempty 'yes' sets, as visited_classes_. When one calls
58 // FinalizeSplit(Queue *l), for each class in visited_classes_ whose 'yes'
59 // and 'no' sets are both nonempty, it will create a new class consisting of
60 // the smaller of the two subsets (and this class will be added to the queue),
61 // and the old class will now be the larger of the two subsets. This call also
62 // resets all the yes/no partitions so that everything is in the 'no' subsets.
63 //
64 // One cannot use the Move() function if SplitOn() has been called without
65 // a subsequent call to FinalizeSplit()
66 template <typename T>
67 class Partition {
68  static_assert(std::is_signed_v<T> && std::is_integral_v<T>,
69  "T must be a signed integer type");
70 
71  public:
72  Partition() = default;
73 
74  explicit Partition(T num_elements) { Initialize(num_elements); }
75 
76  // Creates an empty partition for num_elements. This means that the elements
77  // are not assigned to a class (i.e class_index = -1); you should set up the
78  // number of classes using AllocateClasses() or AddClass(), and allocate each
79  // element to a class by calling Add(element, class_id).
80  void Initialize(size_t num_elements) {
81  elements_.resize(num_elements);
82  classes_.reserve(num_elements);
83  classes_.clear();
84  yes_counter_ = 1;
85  }
86 
87  // Adds a class; returns new number of classes.
88  T AddClass() {
89  auto num_classes = classes_.size();
90  classes_.resize(num_classes + 1);
91  return num_classes;
92  }
93 
94  // Adds 'num_classes' new (empty) classes.
95  void AllocateClasses(T num_classes) {
96  classes_.resize(classes_.size() + num_classes);
97  }
98 
99  // Adds element_id to class_id. element_id should already have been allocated
100  // by calling Initialize(num_elements)---or the constructor taking
101  // num_elements---with num_elements > element_id. element_id must not
102  // currently be a member of any class; once elements have been added to a
103  // class, use the Move() method to move them from one class to another.
104  void Add(T element_id, T class_id) {
105  auto &this_element = elements_[element_id];
106  auto &this_class = classes_[class_id];
107  ++this_class.size;
108  // Adds the element to the 'no' subset of the class.
109  auto no_head = this_class.no_head;
110  if (no_head >= 0) elements_[no_head].prev_element = element_id;
111  this_class.no_head = element_id;
112  this_element.class_id = class_id;
113  // Adds to the 'no' subset of the class.
114  this_element.yes = 0;
115  this_element.next_element = no_head;
116  this_element.prev_element = -1;
117  }
118 
119  // Moves element_id from 'no' subset of its current class to 'no' subset of
120  // class class_id. This may not work correctly if you have called SplitOn()
121  // [for any element] and haven't subsequently called FinalizeSplit().
122  void Move(T element_id, T class_id) {
123  auto elements = &(elements_[0]);
124  auto &element = elements[element_id];
125  auto &old_class = classes_[element.class_id];
126  --old_class.size;
127  // Excises the element from the 'no' list of its old class, where it is
128  // assumed to be.
129  if (element.prev_element >= 0) {
130  elements[element.prev_element].next_element = element.next_element;
131  } else {
132  old_class.no_head = element.next_element;
133  }
134  if (element.next_element >= 0) {
135  elements[element.next_element].prev_element = element.prev_element;
136  }
137  // Adds to new class.
138  Add(element_id, class_id);
139  }
140 
141  // Moves element_id to the 'yes' subset of its class if it was in the 'no'
142  // subset, and marks the class as having been visited.
143  void SplitOn(T element_id) {
144  auto elements = &(elements_[0]);
145  auto &element = elements[element_id];
146  if (element.yes == yes_counter_) {
147  return; // Already in the 'yes' set; nothing to do.
148  }
149  auto class_id = element.class_id;
150  auto &this_class = classes_[class_id];
151  // Excises the element from the 'no' list of its class.
152  if (element.prev_element >= 0) {
153  elements[element.prev_element].next_element = element.next_element;
154  } else {
155  this_class.no_head = element.next_element;
156  }
157  if (element.next_element >= 0) {
158  elements[element.next_element].prev_element = element.prev_element;
159  }
160  // Adds the element to the 'yes' list.
161  if (this_class.yes_head >= 0) {
162  elements[this_class.yes_head].prev_element = element_id;
163  } else {
164  visited_classes_.push_back(class_id);
165  }
166  element.yes = yes_counter_;
167  element.next_element = this_class.yes_head;
168  element.prev_element = -1;
169  this_class.yes_head = element_id;
170  this_class.yes_size++;
171  }
172 
173  // This should be called after one has possibly called SplitOn for one or more
174  // elements, thus moving those elements to the 'yes' subset for their class.
175  // For each class that has a nontrivial split (i.e., it's not the case that
176  // all members are in the 'yes' or 'no' subset), this function creates a new
177  // class containing the smaller of the two subsets of elements, leaving the
178  // larger group of elements in the old class. The identifier of the new class
179  // will be added to the queue provided as the pointer L. This method then
180  // moves all elements to the 'no' subset of their class.
181  template <class Queue>
182  void FinalizeSplit(Queue *queue) {
183  for (const auto &visited_class : visited_classes_) {
184  const auto new_class = SplitRefine(visited_class);
185  if (new_class != -1 && queue) queue->Enqueue(new_class);
186  }
187  visited_classes_.clear();
188  // Incrementation sets all the 'yes' members of the elements to false.
189  ++yes_counter_;
190  }
191 
192  const T ClassId(T element_id) const { return elements_[element_id].class_id; }
193 
194  const size_t ClassSize(T class_id) const { return classes_[class_id].size; }
195 
196  const T NumClasses() const { return classes_.size(); }
197 
198  private:
199  friend class PartitionIterator<T>;
200 
201  // Information about a given element.
202  struct Element {
203  T class_id; // Class ID of this element.
204  T yes; // This is to be interpreted as a bool, true if it's in the
205  // 'yes' set of this class. The interpretation as bool is
206  // (yes == yes_counter_ ? true : false).
207  T next_element; // Next element in the 'no' list or 'yes' list of this
208  // class, whichever of the two we belong to (think of
209  // this as the 'next' in a doubly-linked list, although
210  // it is an index into the elements array). Negative
211  // values corresponds to null.
212  T prev_element; // Previous element in the 'no' or 'yes' doubly linked
213  // list. Negative values corresponds to null.
214  };
215 
216  // Information about a given class.
217  struct Class {
218  Class() : size(0), yes_size(0), no_head(-1), yes_head(-1) {}
219  T size; // Total number of elements in this class ('no' plus 'yes'
220  // subsets).
221  T yes_size; // Total number of elements of 'yes' subset of this class.
222  T no_head; // Index of head element of doubly-linked list in 'no' subset.
223  // Everything is in the 'no' subset until you call SplitOn().
224  // -1 means no element.
225  T yes_head; // Index of head element of doubly-linked list in 'yes' subset.
226  // -1 means no element.
227  };
228 
229  // This method, called from FinalizeSplit(), checks whether a class has to
230  // be split (a class will be split only if its 'yes' and 'no' subsets are
231  // both nonempty, but one can assume that since this function was called, the
232  // 'yes' subset is nonempty). It splits by taking the smaller subset and
233  // making it a new class, and leaving the larger subset of elements in the
234  // 'no' subset of the old class. It returns the new class if created, or -1
235  // if none was created.
236  T SplitRefine(T class_id) {
237  auto yes_size = classes_[class_id].yes_size;
238  auto size = classes_[class_id].size;
239  auto no_size = size - yes_size;
240  if (no_size == 0) {
241  // All members are in the 'yes' subset, so we don't have to create a new
242  // class, just move them all to the 'no' subset.
243  classes_[class_id].no_head = classes_[class_id].yes_head;
244  classes_[class_id].yes_head = -1;
245  classes_[class_id].yes_size = 0;
246  return -1;
247  } else {
248  auto new_class_id = classes_.size();
249  classes_.resize(classes_.size() + 1);
250  auto &old_class = classes_[class_id];
251  auto &new_class = classes_[new_class_id];
252  // The new_class will have the values from the constructor.
253  if (no_size < yes_size) {
254  // Moves the 'no' subset to new class ('no' subset).
255  new_class.no_head = old_class.no_head;
256  new_class.size = no_size;
257  // And makes the 'yes' subset of the old class ('no' subset).
258  old_class.no_head = old_class.yes_head;
259  old_class.yes_head = -1;
260  old_class.size = yes_size;
261  old_class.yes_size = 0;
262  } else {
263  // Moves the 'yes' subset to the new class (to the 'no' subset)
264  new_class.size = yes_size;
265  new_class.no_head = old_class.yes_head;
266  // Retains only the 'no' subset in the old class.
267  old_class.size = no_size;
268  old_class.yes_size = 0;
269  old_class.yes_head = -1;
270  }
271  auto elements = &(elements_[0]);
272  // Updates the 'class_id' of all the elements we moved.
273  for (auto e = new_class.no_head; e >= 0; e = elements[e].next_element) {
274  elements[e].class_id = new_class_id;
275  }
276  return new_class_id;
277  }
278  }
279 
280  // elements_[i] contains all info about the i'th element.
281  std::vector<Element> elements_;
282  // classes_[i] contains all info about the i'th class.
283  std::vector<Class> classes_;
284  // Set of visited classes to be used in split refine.
285  std::vector<T> visited_classes_;
286  // yes_counter_ is used in interpreting the 'yes' members of class Element.
287  // If element.yes == yes_counter_, we interpret that element as being in the
288  // 'yes' subset of its class. This allows us to, in effect, set all those
289  // bools to false at a stroke by incrementing yes_counter_.
290  T yes_counter_;
291 };
292 
293 // Iterates over members of the 'no' subset of a class in a partition. (When
294 // this is used, everything is in the 'no' subset).
295 template <typename T>
296 class PartitionIterator {
297  public:
298  using Element = typename Partition<T>::Element;
299 
300  PartitionIterator(const Partition<T> &partition, T class_id)
301  : partition_(partition),
302  element_id_(partition_.classes_[class_id].no_head),
303  class_id_(class_id) {}
304 
305  bool Done() { return element_id_ < 0; }
306 
307  const T Value() { return element_id_; }
308 
309  void Next() { element_id_ = partition_.elements_[element_id_].next_element; }
310 
311  void Reset() { element_id_ = partition_.classes_[class_id_].no_head; }
312 
313  private:
314  const Partition<T> &partition_;
315  T element_id_;
316  T class_id_;
317 };
318 
319 } // namespace internal
320 } // namespace fst
321 
322 #endif // FST_PARTITION_H_
const size_t ClassSize(T class_id) const
Definition: partition.h:194
void SplitOn(T element_id)
Definition: partition.h:143
void Move(T element_id, T class_id)
Definition: partition.h:122
void AllocateClasses(T num_classes)
Definition: partition.h:95
typename Partition< T >::Element Element
Definition: partition.h:298
const T NumClasses() const
Definition: partition.h:196
void Initialize(size_t num_elements)
Definition: partition.h:80
Partition(T num_elements)
Definition: partition.h:74
const T ClassId(T element_id) const
Definition: partition.h:192
void Add(T element_id, T class_id)
Definition: partition.h:104
PartitionIterator(const Partition< T > &partition, T class_id)
Definition: partition.h:300
void FinalizeSplit(Queue *queue)
Definition: partition.h:182