FST  openfst-1.7.1
OpenFst Library
interval-set.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Class to represent and operate on sets of intervals.
5 
6 #ifndef FST_INTERVAL_SET_H_
7 #define FST_INTERVAL_SET_H_
8 
9 #include <algorithm>
10 #include <iostream>
11 #include <vector>
12 
13 
14 #include <fst/util.h>
15 
16 
17 namespace fst {
18 
19 // Half-open integral interval [a, b) of signed integers of type T.
20 template <class T>
21 struct IntInterval {
22  T begin;
23  T end;
24 
25  IntInterval() : begin(-1), end(-1) {}
26 
27  IntInterval(T begin, T end) : begin(begin), end(end) {}
28 
29  bool operator<(const IntInterval<T> &i) const {
30  return begin < i.begin || (begin == i.begin && end > i.end);
31  }
32 
33  bool operator==(const IntInterval<T> &i) const {
34  return begin == i.begin && end == i.end;
35  }
36 
37  bool operator!=(const IntInterval<T> &i) const {
38  return begin != i.begin || end != i.end;
39  }
40 
41  std::istream &Read(std::istream &strm) {
42  T n;
43  ReadType(strm, &n);
44  begin = n;
45  ReadType(strm, &n);
46  end = n;
47  return strm;
48  }
49 
50  std::ostream &Write(std::ostream &strm) const {
51  T n = begin;
52  WriteType(strm, n);
53  n = end;
54  WriteType(strm, n);
55  return strm;
56  }
57 };
58 
59 // Stores IntIntervals<T> in a vector. In addition, keeps the count of points in
60 // all intervals.
61 template <class T>
63  public:
65  using Iterator = typename std::vector<Interval>::const_iterator;
66 
67  VectorIntervalStore() : count_(-1) {}
68 
69  std::vector<Interval> *MutableIntervals() { return &intervals_; }
70 
71  const Interval *Intervals() const { return intervals_.data(); }
72 
73  T Size() const { return intervals_.size(); }
74 
75  T Count() const { return count_; }
76 
77  void SetCount(T count) { count_ = count; }
78 
79  void Clear() {
80  intervals_.clear();
81  count_ = 0;
82  }
83 
84  Iterator begin() const { return intervals_.begin(); }
85 
86  Iterator end() const { return intervals_.end(); }
87 
88  std::istream &Read(std::istream &strm) {
89  ReadType(strm, &intervals_);
90  return ReadType(strm, &count_);
91  }
92 
93  std::ostream &Write(std::ostream &strm) const {
94  WriteType(strm, intervals_);
95  return WriteType(strm, count_);
96  }
97 
98  private:
99  std::vector<Interval> intervals_;
100  T count_;
101 };
102 
103 // Stores and operates on a set of half-open integral intervals [a, b)
104 // of signed integers of type T.
105 template <class T, class Store = VectorIntervalStore<T>>
106 class IntervalSet {
107  public:
109 
110  template <class... A>
111  explicit IntervalSet(A... args) : intervals_(args...) {}
112 
113  // Returns the interval set as a vector.
114  std::vector<Interval> *MutableIntervals() {
115  return intervals_.MutableIntervals();
116  }
117 
118  // Returns a pointer to an array of Size() elements.
119  const Interval *Intervals() const { return intervals_.Intervals(); }
120 
121  bool Empty() const { return Size() == 0; }
122 
123  T Size() const { return intervals_.Size(); }
124 
125  // Number of points in the intervals (undefined if not normalized).
126  T Count() const { return intervals_.Count(); }
127 
128  void Clear() { intervals_.Clear(); }
129 
130  // Adds an interval set to the set. The result may not be normalized.
131  void Union(const IntervalSet<T, Store> &iset) {
132  intervals_.MutableIntervals()->insert(intervals_.MutableIntervals()->end(),
133  iset.intervals_.begin(),
134  iset.intervals_.end());
135  }
136 
137  // Requires intervals be normalized.
138  bool Member(T value) const {
139  const Interval interval(value, value);
140  auto lb = std::lower_bound(intervals_.begin(), intervals_.end(), interval);
141  if (lb == intervals_.begin()) return false;
142  return (--lb)->end > value;
143  }
144 
145  // Requires intervals be normalized.
146  bool operator==(const IntervalSet<T, Store> &iset) const {
147  return Size() == iset.Size() &&
148  std::equal(intervals_.begin(), intervals_.end(),
149  iset.intervals_.begin());
150  }
151 
152  // Requires intervals be normalized.
153  bool operator!=(const IntervalSet<T, Store> &iset) const {
154  return Size() != iset.Size() ||
155  !std::equal(intervals_.begin(), intervals_.end(),
156  iset.intervals_.begin());
157  }
158 
159  bool Singleton() const {
160  return Size() == 1 &&
161  intervals_.begin()->begin + 1 == intervals_.begin()->end;
162  }
163 
164  // Sorts, collapses overlapping and adjacent interals, and sets count.
165  void Normalize();
166 
167  // Intersects an interval set with the set. Requires intervals be normalized.
168  // The result is normalized.
169  void Intersect(const IntervalSet<T, Store> &iset,
170  IntervalSet<T, Store> *oset) const;
171 
172  // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
173  // The result is normalized.
174  void Complement(T maxval, IntervalSet<T, Store> *oset) const;
175 
176  // Subtract an interval set from the set. Requires intervals be normalized.
177  // The result is normalized.
178  void Difference(const IntervalSet<T, Store> &iset,
179  IntervalSet<T, Store> *oset) const;
180 
181  // Determines if an interval set overlaps with the set. Requires intervals be
182  // normalized.
183  bool Overlaps(const IntervalSet<T, Store> &iset) const;
184 
185  // Determines if an interval set overlaps with the set but neither is
186  // contained in the other. Requires intervals be normalized.
187  bool StrictlyOverlaps(const IntervalSet<T, Store> &iset) const;
188 
189  // Determines if an interval set is contained within the set. Requires
190  // intervals be normalized.
191  bool Contains(const IntervalSet<T, Store> &iset) const;
192 
193  std::istream &Read(std::istream &strm) { return intervals_.Read(strm); }
194 
195  std::ostream &Write(std::ostream &strm) const {
196  return intervals_.Write(strm);
197  }
198 
199  typename Store::Iterator begin() const { return intervals_.begin(); }
200 
201  typename Store::Iterator end() const { return intervals_.end(); }
202 
203  private:
204  Store intervals_;
205 };
206 
207 // Sorts, collapses overlapping and adjacent intervals, and sets count.
208 template <typename T, class Store>
210  auto &intervals = *intervals_.MutableIntervals();
211  std::sort(intervals.begin(), intervals.end());
212  T count = 0;
213  T size = 0;
214  for (T i = 0; i < intervals.size(); ++i) {
215  auto &inti = intervals[i];
216  if (inti.begin == inti.end) continue;
217  for (T j = i + 1; j < intervals.size(); ++j) {
218  auto &intj = intervals[j];
219  if (intj.begin > inti.end) break;
220  if (intj.end > inti.end) inti.end = intj.end;
221  ++i;
222  }
223  count += inti.end - inti.begin;
224  intervals[size++] = inti;
225  }
226  intervals.resize(size);
227  intervals_.SetCount(count);
228 }
229 
230 // Intersects an interval set with the set. Requires intervals be normalized.
231 // The result is normalized.
232 template <typename T, class Store>
234  IntervalSet<T, Store> *oset) const {
235  auto *ointervals = oset->MutableIntervals();
236  auto it1 = intervals_.begin();
237  auto it2 = iset.intervals_.begin();
238  ointervals->clear();
239  T count = 0;
240  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
241  if (it1->end <= it2->begin) {
242  ++it1;
243  } else if (it2->end <= it1->begin) {
244  ++it2;
245  } else {
246  ointervals->emplace_back(std::max(it1->begin, it2->begin),
247  std::min(it1->end, it2->end));
248  count += ointervals->back().end - ointervals->back().begin;
249  if (it1->end < it2->end) {
250  ++it1;
251  } else {
252  ++it2;
253  }
254  }
255  }
256  oset->intervals_.SetCount(count);
257 }
258 
259 // Complements the set w.r.t [0, maxval). Requires intervals be normalized.
260 // The result is normalized.
261 template <typename T, class Store>
263  IntervalSet<T, Store> *oset) const {
264  auto *ointervals = oset->MutableIntervals();
265  ointervals->clear();
266  T count = 0;
267  Interval interval;
268  interval.begin = 0;
269  for (auto it = intervals_.begin(); it != intervals_.end(); ++it) {
270  interval.end = std::min(it->begin, maxval);
271  if ((interval.begin) < (interval.end)) {
272  ointervals->push_back(interval);
273  count += interval.end - interval.begin;
274  }
275  interval.begin = it->end;
276  }
277  interval.end = maxval;
278  if ((interval.begin) < (interval.end)) {
279  ointervals->push_back(interval);
280  count += interval.end - interval.begin;
281  }
282  oset->intervals_.SetCount(count);
283 }
284 
285 // Subtract an interval set from the set. Requires intervals be normalized.
286 // The result is normalized.
287 template <typename T, class Store>
289  IntervalSet<T, Store> *oset) const {
290  if (Empty()) {
291  oset->MutableIntervals()->clear();
292  oset->intervals_.SetCount(0);
293  } else {
295  iset.Complement(intervals_.Intervals()[intervals_.Size() - 1].end, &cset);
296  Intersect(cset, oset);
297  }
298 }
299 
300 // Determines if an interval set overlaps with the set. Requires intervals be
301 // normalized.
302 template <typename T, class Store>
304  auto it1 = intervals_.begin();
305  auto it2 = iset.intervals_.begin();
306  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
307  if (it1->end <= it2->begin) {
308  ++it1;
309  } else if (it2->end <= it1->begin) {
310  ++it2;
311  } else {
312  return true;
313  }
314  }
315  return false;
316 }
317 
318 // Determines if an interval set overlaps with the set but neither is contained
319 // in the other. Requires intervals be normalized.
320 template <typename T, class Store>
322  const IntervalSet<T, Store> &iset) const {
323  auto it1 = intervals_.begin();
324  auto it2 = iset.intervals_.begin();
325  bool only1 = false; // Point in intervals_ but not intervals.
326  bool only2 = false; // Point in intervals but not intervals_.
327  bool overlap = false; // Point in both intervals_ and intervals.
328  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
329  if (it1->end <= it2->begin) { // no overlap - it1 first
330  only1 = true;
331  ++it1;
332  } else if (it2->end <= it1->begin) { // no overlap - it2 first
333  only2 = true;
334  ++it2;
335  } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
336  overlap = true;
337  ++it1;
338  ++it2;
339  } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
340  only2 = true;
341  overlap = true;
342  ++it1;
343  } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
344  only1 = true;
345  overlap = true;
346  ++it2;
347  } else { // Strict overlap.
348  only1 = true;
349  only2 = true;
350  overlap = true;
351  }
352  if (only1 == true && only2 == true && overlap == true) return true;
353  }
354  if (it1 != intervals_.end()) only1 = true;
355  if (it2 != iset.intervals_.end()) only2 = true;
356  return only1 == true && only2 == true && overlap == true;
357 }
358 
359 // Determines if an interval set is contained within the set. Requires intervals
360 // be normalized.
361 template <typename T, class Store>
363  if (iset.Count() > Count()) return false;
364  auto it1 = intervals_.begin();
365  auto it2 = iset.intervals_.begin();
366  while (it1 != intervals_.end() && it2 != iset.intervals_.end()) {
367  if ((it1->end) <= (it2->begin)) { // No overlap; it1 first.
368  ++it1;
369  } else if ((it2->begin) < (it1->begin) ||
370  (it2->end) > (it1->end)) { // No C.
371  return false;
372  } else if (it2->end == it1->end) {
373  ++it1;
374  ++it2;
375  } else {
376  ++it2;
377  }
378  }
379  return it2 == iset.intervals_.end();
380 }
381 
382 template <typename T, class Store>
383 std::ostream &operator<<(std::ostream &strm, const IntervalSet<T, Store> &s) {
384  strm << "{";
385  for (T i = 0; i < s.Size(); ++i) {
386  if (i > 0) {
387  strm << ",";
388  }
389  const auto &interval = s.Intervals()[i];
390  strm << "[" << interval.begin << "," << interval.end << ")";
391  }
392  strm << "}";
393  return strm;
394 }
395 
396 } // namespace fst
397 
398 #endif // FST_INTERVAL_SET_H_
void SetCount(T count)
Definition: interval-set.h:77
bool operator!=(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:153
void Union(const IntervalSet< T, Store > &iset)
Definition: interval-set.h:131
std::istream & Read(std::istream &strm)
Definition: interval-set.h:193
std::istream & Read(std::istream &strm)
Definition: interval-set.h:88
IntInterval(T begin, T end)
Definition: interval-set.h:27
bool operator==(const IntInterval< T > &i) const
Definition: interval-set.h:33
void Intersect(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const IntersectOptions &opts=IntersectOptions())
Definition: intersect.h:127
Iterator end() const
Definition: interval-set.h:86
T Count() const
Definition: interval-set.h:126
const Interval * Intervals() const
Definition: interval-set.h:119
std::vector< Interval > * MutableIntervals()
Definition: interval-set.h:114
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:50
void Intersect(const IntervalSet< T, Store > &iset, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:233
T Size() const
Definition: interval-set.h:123
void Complement(T maxval, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:262
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:155
void Difference(const Fst< Arc > &ifst1, const Fst< Arc > &ifst2, MutableFst< Arc > *ofst, const DifferenceOptions &opts=DifferenceOptions())
Definition: difference.h:151
std::istream & Read(std::istream &strm)
Definition: interval-set.h:41
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:195
bool Member(T value) const
Definition: interval-set.h:138
bool Empty() const
Definition: interval-set.h:121
bool StrictlyOverlaps(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:321
typename std::vector< Interval >::const_iterator Iterator
Definition: interval-set.h:65
Store::Iterator begin() const
Definition: interval-set.h:199
std::vector< Interval > * MutableIntervals()
Definition: interval-set.h:69
IntervalSet(A...args)
Definition: interval-set.h:111
std::ostream & Write(std::ostream &strm) const
Definition: interval-set.h:93
void Difference(const IntervalSet< T, Store > &iset, IntervalSet< T, Store > *oset) const
Definition: interval-set.h:288
Store::Iterator end() const
Definition: interval-set.h:201
const Interval * Intervals() const
Definition: interval-set.h:71
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:47
bool Overlaps(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:303
Iterator begin() const
Definition: interval-set.h:84
bool Singleton() const
Definition: interval-set.h:159
bool operator!=(const IntInterval< T > &i) const
Definition: interval-set.h:37
bool Contains(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:362
bool operator==(const IntervalSet< T, Store > &iset) const
Definition: interval-set.h:146