FST  openfst-1.8.3
OpenFst Library
interval-set.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Class to represent and operate on sets of intervals.
19 
20 #ifndef FST_INTERVAL_SET_H_
21 #define FST_INTERVAL_SET_H_
22 
23 #include <algorithm>
24 #include <initializer_list>
25 #include <iostream>
26 #include <istream>
27 #include <ostream>
28 #include <vector>
29 
30 #include <fst/util.h>
31 
32 namespace fst {
33 
34 // Half-open integral interval [a, b) of signed integers of type T.
35 template <class T>
36 struct IntInterval {
37  T begin;
38  T end;
39 
40  IntInterval() : begin(-1), end(-1) {}
41 
42  IntInterval(T begin, T end) : begin(begin), end(end) {}
43 
44  bool operator<(const IntInterval<T> &i) const {
45  return begin < i.begin || (begin == i.begin && end > i.end);
46  }
47 
48  bool operator==(const IntInterval<T> &i) const {
49  return begin == i.begin && end == i.end;
50  }
51 
52  bool operator!=(const IntInterval<T> &i) const {
53  return begin != i.begin || end != i.end;
54  }
55 
56  std::istream &Read(std::istream &strm) {
57  T n;
58  ReadType(strm, &n);
59  begin = n;
60  ReadType(strm, &n);
61  end = n;
62  return strm;
63  }
64 
65  std::ostream &Write(std::ostream &strm) const {
66  T n = begin;
67  WriteType(strm, n);
68  n = end;
69  WriteType(strm, n);
70  return strm;
71  }
72 };
73 
74 // Stores IntIntervals<T> in a vector. In addition, keeps the count of points in
75 // all intervals.
76 template <class T>
78  public:
80  using Iterator = typename std::vector<Interval>::const_iterator;
81 
82  VectorIntervalStore() : count_(-1) {}
83  VectorIntervalStore(std::initializer_list<Interval> intervals_init)
84  : intervals_(intervals_init), count_(-1) {}
85 
86  std::vector<Interval> *MutableIntervals() { return &intervals_; }
87 
88  const Interval *Intervals() const { return intervals_.data(); }
89 
90  T Size() const { return intervals_.size(); }
91 
92  T Count() const { return count_; }
93 
94  void SetCount(T count) { count_ = count; }
95 
96  void Clear() {
97  intervals_.clear();
98  count_ = 0;
99  }
100 
101  Iterator begin() const { return intervals_.begin(); }
102 
103  Iterator end() const { return intervals_.end(); }
104 
105  std::istream &Read(std::istream &strm) {
106  ReadType(strm, &intervals_);
107  return ReadType(strm, &count_);
108  }
109 
110  std::ostream &Write(std::ostream &strm) const {
111  WriteType(strm, intervals_);
112  return WriteType(strm, count_);
113  }
114 
115  private:
116  std::vector<Interval> intervals_;
117  T count_;
118 };
119 
120 // Stores and operates on a set of half-open integral intervals [a, b)
121 // of signed integers of type T.
122 template <class T, class Store = VectorIntervalStore<T>>
123 class IntervalSet {
124  public:
126 
127  IntervalSet(std::initializer_list<Interval> intervals_init)
128  : intervals_(intervals_init) {}
129 
130  template <class... A>
131  explicit IntervalSet(A... args) : intervals_(args...) {}
132 
133  // Returns the interval set as a vector.
134  std::vector<Interval> *MutableIntervals() {
135  return intervals_.MutableIntervals();
136  }
137 
138  // Returns a pointer to an array of Size() elements.
139  const Interval *Intervals() const { return intervals_.Intervals(); }
140 
141  bool Empty() const { return Size() == 0; }
142 
143  T Size() const { return intervals_.Size(); }
144 
145  // Number of points in the intervals (undefined if not normalized).
146  T Count() const { return intervals_.Count(); }
147 
148  void Clear() { intervals_.Clear(); }
149 
150  // Adds an interval set to the set. The result may not be normalized.
151  void Union(const IntervalSet<T, Store> &iset) {
152  intervals_.MutableIntervals()->insert(intervals_.MutableIntervals()->end(),
153  iset.intervals_.begin(),
154  iset.intervals_.end());
155  }
156 
157  // Requires intervals be normalized.
158  bool Member(T value) const {
159  const Interval interval(value, value);
160  auto lb = std::lower_bound(intervals_.begin(), intervals_.end(), interval);
161  if (lb == intervals_.begin()) return false;
162  return (--lb)->end > value;
163  }
164 
165  // Requires intervals be normalized.
166  bool operator==(const IntervalSet<T, Store> &iset) const {
167  return Size() == iset.Size() &&
168  std::equal(intervals_.begin(), intervals_.end(),
169  iset.intervals_.begin());
170  }
171 
172  // Requires intervals be normalized.
173  bool operator!=(const IntervalSet<T, Store> &iset) const {
174  return Size() != iset.Size() ||
175  !std::equal(intervals_.begin(), intervals_.end(),
176  iset.intervals_.begin());
177  }
178 
179  bool Singleton() const {
180  return Size() == 1 &&
181  intervals_.begin()->begin + 1 == intervals_.begin()->end;
182  }
183 
184  // Sorts, collapses overlapping and adjacent interals, and sets count.
185  void Normalize();
186 
187  // Intersects an interval set with the set. Requires intervals be normalized.
188  // The result is normalized.
189  void Intersect(const IntervalSet<T, Store> &iset,
190  IntervalSet<T, Store> *oset) const;
191 
192  // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
193  // The result is normalized.
194  void Complement(T maxval, IntervalSet<T, Store> *oset) const;
195 
196  // Subtract an interval set from the set. Requires intervals be normalized.
197  // The result is normalized.
198  void Difference(const IntervalSet<T, Store> &iset,
199  IntervalSet<T, Store> *oset) const;
200 
201  // Determines if an interval set overlaps with the set. Requires intervals be
202  // normalized.
203  bool Overlaps(const IntervalSet<T, Store> &iset) const;
204 
205  // Determines if an interval set overlaps with the set but neither is
206  // contained in the other. Requires intervals be normalized.
207  bool StrictlyOverlaps(const IntervalSet<T, Store> &iset) const;
208 
209  // Determines if an interval set is contained within the set. Requires
210  // intervals be normalized.
211  bool Contains(const IntervalSet<T, Store> &iset) const;
212 
213  std::istream &Read(std::istream &strm) { return intervals_.Read(strm); }
214 
215  std::ostream &Write(std::ostream &strm) const {
216  return intervals_.Write(strm);
217  }
218 
219  typename Store::Iterator begin() const { return intervals_.begin(); }
220 
221  typename Store::Iterator end() const { return intervals_.end(); }
222 
223  private:
224  Store intervals_;
225 };
226 
227 // Sorts, collapses overlapping and adjacent intervals, and sets count.
228 template <typename T, class Store>
230  auto &intervals = *intervals_.MutableIntervals();
231  std::sort(intervals.begin(), intervals.end());
232  T count = 0;
233  T size = 0;
234  for (T i = 0; i < intervals.size(); ++i) {
235  auto &inti = intervals[i];
236  if (inti.begin == inti.end) continue;
237  for (T j = i + 1; j < intervals.size(); ++j) {
238  auto &intj = intervals[j];
239  if (intj.begin > inti.end) break;
240  if (intj.end > inti.end) inti.end = intj.end;
241  ++i;
242  }
243  count += inti.end - inti.begin;
244  intervals[size++] = inti;
245  }
246  intervals.resize(size);
247  intervals_.SetCount(count);
248 }
249 
250 // Intersects an interval set with the set. Requires intervals be normalized.
251 // The result is normalized.
252 template <typename T, class Store>
254  IntervalSet<T, Store> *oset) const {
255  auto *ointervals = oset->MutableIntervals();
256  auto it1 = intervals_.begin();
257  auto it2 = iset.intervals_.begin();
258  ointervals->clear();
259  T count = 0;
260  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
261  if (it1->end <= it2->begin) {
262  ++it1;
263  } else if (it2->end <= it1->begin) {
264  ++it2;
265  } else {
266  ointervals->emplace_back(std::max(it1->begin, it2->begin),
267  std::min(it1->end, it2->end));
268  count += ointervals->back().end - ointervals->back().begin;
269  if (it1->end < it2->end) {
270  ++it1;
271  } else {
272  ++it2;
273  }
274  }
275  }
276  oset->intervals_.SetCount(count);
277 }
278 
279 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
280 // The result is normalized.
281 template <typename T, class Store>
283  IntervalSet<T, Store> *oset) const {
284  auto *ointervals = oset->MutableIntervals();
285  ointervals->clear();
286  T count = 0;
287  Interval interval;
288  interval.begin = 0;
289  for (const auto current_interval : intervals_) {
290  interval.end = std::min(current_interval.begin, maxval);
291  if ((interval.begin) < (interval.end)) {
292  ointervals->push_back(interval);
293  count += interval.end - interval.begin;
294  }
295  interval.begin = current_interval.end;
296  }
297  interval.end = maxval;
298  if ((interval.begin) < (interval.end)) {
299  ointervals->push_back(interval);
300  count += interval.end - interval.begin;
301  }
302  oset->intervals_.SetCount(count);
303 }
304 
305 // Subtract an interval set from the set. Requires intervals be normalized.
306 // The result is normalized.
307 template <typename T, class Store>
309  IntervalSet<T, Store> *oset) const {
310  if (Empty()) {
311  oset->MutableIntervals()->clear();
312  oset->intervals_.SetCount(0);
313  } else {
315  iset.Complement(intervals_.Intervals()[intervals_.Size() - 1].end, &cset);
316  Intersect(cset, oset);
317  }
318 }
319 
320 // Determines if an interval set overlaps with the set. Requires intervals be
321 // normalized.
322 template <typename T, class Store>
324  auto it1 = intervals_.begin();
325  auto it2 = iset.intervals_.begin();
326  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
327  if (it1->end <= it2->begin) {
328  ++it1;
329  } else if (it2->end <= it1->begin) {
330  ++it2;
331  } else {
332  return true;
333  }
334  }
335  return false;
336 }
337 
338 // Determines if an interval set overlaps with the set but neither is contained
339 // in the other. Requires intervals be normalized.
340 template <typename T, class Store>
342  const IntervalSet<T, Store> &iset) const {
343  auto it1 = intervals_.begin();
344  auto it2 = iset.intervals_.begin();
345  bool only1 = false; // Point in intervals_ but not intervals.
346  bool only2 = false; // Point in intervals but not intervals_.
347  bool overlap = false; // Point in both intervals_ and intervals.
348  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
349  if (it1->end <= it2->begin) { // no overlap - it1 first
350  only1 = true;
351  ++it1;
352  } else if (it2->end <= it1->begin) { // no overlap - it2 first
353  only2 = true;
354  ++it2;
355  } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
356  overlap = true;
357  ++it1;
358  ++it2;
359  } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
360  only2 = true;
361  overlap = true;
362  ++it1;
363  } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
364  only1 = true;
365  overlap = true;
366  ++it2;
367  } else { // Strict overlap.
368  only1 = true;
369  only2 = true;
370  overlap = true;
371  }
372  if (only1 == true && only2 == true && overlap == true) return true;
373  }
374  if (it1 != intervals_.end()) only1 = true;
375  if (it2 != iset.intervals_.end()) only2 = true;
376  return only1 == true && only2 == true && overlap == true;
377 }
378 
379 // Determines if an interval set is contained within the set. Requires intervals
380 // be normalized.
381 template <typename T, class Store>
383  if (iset.Count() > Count()) return false;
384  auto it1 = intervals_.begin();
385  auto it2 = iset.intervals_.begin();
386  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
387  if ((it1->end) <= (it2->begin)) { // No overlap; it1 first.
388  ++it1;
389  } else if ((it2->begin) < (it1->begin) ||
390  (it2->end) > (it1->end)) { // No C.
391  return false;
392  } else if (it2->end == it1->end) {
393  ++it1;
394  ++it2;
395  } else {
396  ++it2;
397  }
398  }
399  return it2 == iset.intervals_.end();
400 }
401 
402 template <typename T, class Store>
403 std::ostream &operator<<(std::ostream &strm, const IntervalSet<T, Store> &s) {
404  strm << "{";
405  for (T i = 0; i < s.Size(); ++i) {
406  if (i > 0) {
407  strm << ",";
408  }
409  const auto &interval = s.Intervals()[i];
410  strm << "[" << interval.begin << "," << interval.end << ")";
411  }
412  strm << "}";
413  return strm;
414 }
415 
416 } // namespace fst
417 
418 #endif // FST_INTERVAL_SET_H_
void SetCount(T count)
Definition: interval-set.h:94
bool operator!=(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:173
void Union(const IntervalSet< T, Store > &iset)
Definition: interval-set.h:151
std::istream & Read(std::istream &strm)
Definition: interval-set.h:213
VectorIntervalStore(std::initializer_list< Interval > intervals_init)
Definition: interval-set.h:83
std::istream & Read(std::istream &strm)
Definition: interval-set.h:105
IntInterval(T begin, T end)
Definition: interval-set.h:42
bool operator==(const IntInterval< T > &i) const
Definition: interval-set.h:48
void Intersect(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const IntersectOptions &opts=IntersectOptions())
Definition: intersect.h:150
Iterator end() const
Definition: interval-set.h:103
T Count() const
Definition: interval-set.h:146
const Interval * Intervals() const
Definition: interval-set.h:139
std::vector< Interval > * MutableIntervals()
Definition: interval-set.h:134
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:65
void Intersect(const IntervalSet< T, Store > &iset, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:253
T Size() const
Definition: interval-set.h:143
void Complement(T maxval, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:282
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:228
void Difference(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const DifferenceOptions &opts=DifferenceOptions())
Definition: difference.h:175
std::istream & Read(std::istream &strm)
Definition: interval-set.h:56
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:215
bool Member(T value) const
Definition: interval-set.h:158
bool Empty() const
Definition: interval-set.h:141
bool StrictlyOverlaps(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:341
typename std::vector< Interval >::const_iterator Iterator
Definition: interval-set.h:80
IntervalSet(std::initializer_list< Interval > intervals_init)
Definition: interval-set.h:127
Store::Iterator begin() const
Definition: interval-set.h:219
std::vector< Interval > * MutableIntervals()
Definition: interval-set.h:86
IntervalSet(A...args)
Definition: interval-set.h:131
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:110
void Difference(const IntervalSet< T, Store > &iset, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:308
Store::Iterator end() const
Definition: interval-set.h:221
const Interval * Intervals() const
Definition: interval-set.h:88
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:80
bool Overlaps(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:323
Iterator begin() const
Definition: interval-set.h:101
bool Singleton() const
Definition: interval-set.h:179
bool operator!=(const IntInterval< T > &i) const
Definition: interval-set.h:52
bool Contains(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:382
bool operator==(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:166