FST  openfst-1.8.3
OpenFst Library
bitmap-index.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 
18 #ifndef FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
19 #define FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
20 
21 #include <cstddef>
22 #include <cstdint>
23 #include <utility>
24 #include <vector>
25 
26 #include <fst/compat.h>
27 #include <fst/log.h>
28 
29 // This class is a bitstring storage class with an index that allows
30 // seeking to the Nth set or clear bit in time O(Log(N)) (or
31 // O(log(1/density) if the relevant select index is enabled) where N is the
32 // length of the bit vector, and density is the block density of zeros/ones
33 // (for Select0/Select1 respectively). The block density is for block i is
34 // B / span_i, where B is the block size in bits (512); and span_i is
35 // Select(B * (i + 1)) - Select(B * i), the range of the bitstring that
36 // select index block i indexes. That is, B 0 (or 1) bits occur over
37 // span_i bits of the bit string.
38 //
39 // To turn this into the "standard"constant time select, there would need
40 // to be a span size threshold. Block spanning more than this would need
41 // to have the position of each bit explicitly recorded. 8k is a typical
42 // value for this threshold, but I saw no spans larger than ~6k.
43 //
44 // In addition, this class allows counting set or clear bits over ranges in
45 // constant time.
46 //
47 // This is accomplished by maintaining an index of the running popcounts
48 // of the bitstring. The index is divided into blocks that cover the
49 // size of a cache line (8 64-bit words). Each entry has one absolute count
50 // of all the 1s that appear before the block and 7 relative counts since the
51 // beginning of the block.
52 //
53 // The bitstring itself is stored as uint64s:
54 // uint64_t *bits_;
55 //
56 // The rank index looks like
57 // struct RankIndexEntry {
58 // uint32_t absolute_ones_count();
59 // uint32_t relative_ones_count_1();
60 // ...
61 // uint32_t relative_ones_count_7();
62 // };
63 // vector<RankIndexEntry> rank_index_;
64 //
65 // Where rank_index_[i].absolute_ones_count() == Rank1(512 * i), and
66 // for k in 1 .. 7:
67 // rank_index_[i].relative_ones_count_k() ==
68 // Rank1(512 * i + 64 * k) - Rank1(512 * i).
69 //
70 // This index is queried directly for Rank0 and Rank1 and binary searched
71 // for Select0 and Select1. If configured in the constructor or via
72 // BuildIndex, additional indices for Select0 and Select1 can be built
73 // to reduce these operations to O(log(1/density)) as explained above.
74 //
75 // The select indexes are stored as
76 // vector<uint32_t> select_0_index_;
77 // where
78 // select_0_index_[i] == Select0(512 * i).
79 // Similarly for select_1_index_.
80 //
81 // To save space, the absolute counts are stored as uint32_t. Therefore,
82 // only bitstrings with < 2**32 ones are supported.
83 //
84 // For each 64 bytes of input (8 8-byte words) there are 12 bytes of index
85 // (4 bytes for the absolute count and 2 * 4 bytes for the relative counts)
86 // for a 18.75% space overhead.
87 //
88 // The select indices have 6.25% overhead together.
89 
90 namespace fst {
91 
92 class BitmapIndex {
93  public:
94  static size_t StorageSize(size_t num_bits) {
95  return ((num_bits + kStorageBlockMask) >> kStorageLogBitSize);
96  }
97 
98  BitmapIndex() = default;
99  BitmapIndex(BitmapIndex&&) = default;
100  BitmapIndex& operator=(BitmapIndex&&) = default;
101 
102  // Convenience constructor to avoid a separate BuildIndex call.
103  BitmapIndex(const uint64_t* bits, std::size_t num_bits,
104  bool enable_select_0_index = false,
105  bool enable_select_1_index = false) {
106  BuildIndex(bits, num_bits, enable_select_0_index, enable_select_1_index);
107  }
108 
109  bool Get(size_t index) const { return Get(bits_, index); }
110 
111  static bool Get(const uint64_t* bits, size_t index) {
112  return (bits[index >> kStorageLogBitSize] &
113  (kOne << (index & kStorageBlockMask))) != 0;
114  }
115 
116  static void Set(uint64_t* bits, size_t index) {
117  bits[index >> kStorageLogBitSize] |= (kOne << (index & kStorageBlockMask));
118  }
119 
120  static void Clear(uint64_t* bits, size_t index) {
121  bits[index >> kStorageLogBitSize] &= ~(kOne << (index & kStorageBlockMask));
122  }
123 
124  size_t Bits() const { return num_bits_; }
125 
126  size_t ArraySize() const { return StorageSize(num_bits_); }
127 
128  // Number of bytes used to store the bit vector.
129  size_t ArrayBytes() const { return ArraySize() * sizeof(bits_[0]); }
130 
131  // Number of bytes used to store the rank index.
132  size_t IndexBytes() const {
133  return (rank_index_.size() * sizeof(rank_index_[0]) +
134  select_0_index_.size() * sizeof(select_0_index_[0]) +
135  select_1_index_.size() * sizeof(select_1_index_[0]));
136  }
137 
138  // Returns the number of one bits in the bitmap
139  size_t GetOnesCount() const {
140  // We keep an extra entry with the total count.
141  return rank_index_.back().absolute_ones_count();
142  }
143 
144  // Returns the number of one bits in positions 0 to limit - 1.
145  // REQUIRES: limit <= Bits()
146  size_t Rank1(size_t end) const;
147 
148  // Returns the number of zero bits in positions 0 to limit - 1.
149  // REQUIRES: limit <= Bits()
150  size_t Rank0(size_t end) const { return end - Rank1(end); }
151 
152  // Returns the offset to the nth set bit (zero based)
153  // or Bits() if index >= number of ones
154  size_t Select1(size_t bit_index) const;
155 
156  // Returns the offset to the nth clear bit (zero based)
157  // or Bits() if index > number of
158  size_t Select0(size_t bit_index) const;
159 
160  // Returns the offset of the nth and nth+1 clear bit (zero based),
161  // equivalent to two calls to Select0, but more efficient.
162  std::pair<size_t, size_t> Select0s(size_t bit_index) const;
163 
164  // Rebuilds from index for the associated Bitmap, should be called
165  // whenever changes have been made to the Bitmap or else behavior
166  // of the indexed bitmap methods will be undefined.
167  void BuildIndex(const uint64_t* bits, size_t num_bits,
168  bool enable_select_0_index = false,
169  bool enable_select_1_index = false);
170 
171  static constexpr uint64_t kOne = 1;
172  static constexpr uint32_t kStorageBitSize = 64;
173  static constexpr uint32_t kStorageLogBitSize = 6;
174 
175  private:
176  static constexpr uint32_t kUnitsPerRankIndexEntry = 8;
177  static constexpr uint32_t kBitsPerRankIndexEntry =
178  kUnitsPerRankIndexEntry * kStorageBitSize;
179  static constexpr uint32_t kStorageBlockMask = kStorageBitSize - 1;
180 
181  // TODO(jrosenstock): benchmark different values here.
182  // It's reasonable that these are the same since density is typically around
183  // 1/2.
184  static constexpr uint32_t kBitsPerSelect0Block = 512;
185  static constexpr uint32_t kBitsPerSelect1Block = 512;
186 
187  // If this many or fewer RankIndexEntry blocks need to be searched by
188  // FindRankIndexEntry use a linear search instead of a binary search.
189  // FindInvertedRankIndexEntry always uses binary search, since linear
190  // search never showed improvements on benchmarks. The value of 8 was
191  // faster than smaller values on benchmarks, but I do not feel comfortable
192  // raising it because there are very few times a higher value would
193  // make a difference. Thus, whether a higher value helps or hurts is harder
194  // to measure. TODO(jrosenstock): Try to measure with low bit density.
195  static constexpr uint32_t kMaxLinearSearchBlocks = 8;
196 
197  // A RankIndexEntry covers a block of 8 64-bit words (one cache line on
198  // x86_64 and ARM). It consists of an absolute count of all the 1s that
199  // appear before this block, and 7 relative counts for the 1s within
200  // the block. relative_ones_count_k = popcount(block[0:k]).
201  // The relative counts are stored in bitfields.
202  // A RankIndexEntry takes 12 bytes, for 12/64 = 18.75% overhead.
203  // See also documentation at the top of the file.
204  class RankIndexEntry {
205  public:
206  RankIndexEntry() = default;
207 
208  uint32_t absolute_ones_count() const { return absolute_ones_count_; }
209 
210  // Returns the popcounts of words *before* word `k` in the block.
211  uint32_t relative_ones_count(size_t k) const {
212  assert(k < 8);
213  // TODO(jrosenstock): Try a multiply here instead.
214  const uint32_t c = k < 4 ? 0 : relative_ones_count_4_;
215  // Load starting one byte before the three relative counts we want.
216  // This load is unaligned for `k < 4` and aligned otherwise. The
217  // address is always within `RankIndexEntry`.
218  uint32_t relative_ones_counts_8x4 =
219  UnalignedLoad<uint32_t>(&relative_ones_counts_[k >> 2][0] - 1);
220 #if ((defined __ORDER_LITTLE_ENDIAN__) || (defined _WIN32))
221  // Clear out the garbage byte.
222  relative_ones_counts_8x4 &= ~0xFF;
223  return c + ((relative_ones_counts_8x4 >> (8 * (k & 3))) & 0xFF);
224 #else
225 #error "Big-endian currently unsupported."
226 #endif
227  }
228 
229  uint32_t relative_ones_count_1() const {
230  return relative_ones_counts_[0][0];
231  }
232  uint32_t relative_ones_count_2() const {
233  return relative_ones_counts_[0][1];
234  }
235  uint32_t relative_ones_count_3() const {
236  return relative_ones_counts_[0][2];
237  }
238  uint32_t relative_ones_count_4() const { return relative_ones_count_4_; }
239  uint32_t relative_ones_count_5() const {
240  return relative_ones_count_4() + relative_ones_counts_[1][0];
241  }
242  uint32_t relative_ones_count_6() const {
243  return relative_ones_count_4() + relative_ones_counts_[1][1];
244  }
245  uint32_t relative_ones_count_7() const {
246  return relative_ones_count_4() + relative_ones_counts_[1][2];
247  }
248 
249  void set_absolute_ones_count(uint32_t v) { absolute_ones_count_ = v; }
250  void set_relative_ones_count_1(uint32_t v) {
251  DCHECK_LE(v, kStorageBitSize);
252  relative_ones_counts_[0][0] = v;
253  }
254  void set_relative_ones_count_2(uint32_t v) {
255  DCHECK_LE(v, 2 * kStorageBitSize);
256  relative_ones_counts_[0][1] = v;
257  }
258  void set_relative_ones_count_3(uint32_t v) {
259  DCHECK_LE(v, 3 * kStorageBitSize);
260  relative_ones_counts_[0][2] = v;
261  }
262  void set_relative_ones_count_4(uint32_t v) {
263  DCHECK_LE(v, 4 * kStorageBitSize);
264  DCHECK_EQ(relative_ones_counts_[1][0], 0);
265  DCHECK_EQ(relative_ones_counts_[1][1], 0);
266  DCHECK_EQ(relative_ones_counts_[1][2], 0);
267  relative_ones_count_4_ = v;
268  }
269  void set_relative_ones_count_5(uint32_t v) {
270  DCHECK_LE(v, 5 * kStorageBitSize);
271  relative_ones_counts_[1][0] = v - relative_ones_count_4();
272  }
273  void set_relative_ones_count_6(uint32_t v) {
274  DCHECK_LE(v, 6 * kStorageBitSize);
275  relative_ones_counts_[1][1] = v - relative_ones_count_4();
276  }
277  void set_relative_ones_count_7(uint32_t v) {
278  DCHECK_LE(v, 7 * kStorageBitSize);
279  relative_ones_counts_[1][2] = v - relative_ones_count_4();
280  }
281 
282  private:
283  // Popcount of 1s before this block.
284  // rank_index_[i].absolute_ones_count() == Rank1(512 * i).
285  uint32_t absolute_ones_count_ = 0;
286 
287  // Popcount of 1s since the beginning of the block.
288  // rank_index_[i].relative_ones_count(k) ==
289  // rank_index_[i].relative_ones_count_k() ==
290  // Rank1(512 * i + 64 * k) - Rank1(512 * i).
291  //
292  // Three consecutive uint64s may have a total of at most 3 * 64 = 192 < 256
293  // ones set. Thus we store `relative_ones_count_1(), ...
294  // relative_ones_count_3()` directly in the one-byte integers
295  // `relative_ones_counts_[0][0], ..., relative_ones_counts_[0][2]`. We use
296  // 16 bits for relative_ones_count_4_. In `relative_ones_counts_[1][0], ...
297  // relative_ones_counts_[1][2]`, we store the difference between
298  // `relative_ones_counts_5(), ... `relative_ones_counts_7()` and
299  // `relative_ones_count_4_`. We put `relative_ones_count_4_` in a 16-bit
300  // integer because it's often used as the first split point for binary
301  // search, so we save an addition there.
302  //
303  // More explicitly:
304  // ```
305  // relative_ones_counts_[0][0] = relative_ones_count_1()
306  // relative_ones_counts_[0][1] = relative_ones_count_2()
307  // relative_ones_counts_[0][2] = relative_ones_count_3()
308  // relative_ones_counts_[1][0] =
309  // relative_ones_count_5() - relative_ones_count_4()
310  // relative_ones_counts_[1][1] =
311  // relative_ones_count_6() - relative_ones_count_4()
312  // relative_ones_counts_[1][2] =
313  // relative_ones_count_7() - relative_ones_count_4()
314  // ```
315  //
316  // (As a consequence, it is an error to call set_relative_ones_count_4()
317  // after calling set_relative_ones_count_N() for N in {5, 6, 7}.)
318  uint16_t relative_ones_count_4_ = 0;
319  uint8_t relative_ones_counts_[2][3] = {{0, 0, 0}, {0, 0, 0}};
320  };
321  static_assert(sizeof(RankIndexEntry) == 4 + 8,
322  "RankIndexEntry should be 12 bytes.");
323 
324  // Returns, from the index, the count of ones up to array_index.
325  uint32_t GetIndexOnesCount(size_t array_index) const;
326 
327  // Finds the entry in the rank index for the block containing the
328  // bit_index-th 1 bit.
329  const RankIndexEntry& FindRankIndexEntry(size_t bit_index) const;
330 
331  // Finds the entry in the rank index for the block containing the
332  // bit_index-th 0 bit.
333  const RankIndexEntry& FindInvertedRankIndexEntry(size_t bit_index) const;
334 
335  // We create a combined primary and secondary index, with one extra entry
336  // to hold the total number of bits.
337  size_t rank_index_size() const {
338  return (ArraySize() + kUnitsPerRankIndexEntry - 1) /
339  kUnitsPerRankIndexEntry +
340  1;
341  }
342 
343  const uint64_t* bits_ = nullptr;
344  size_t num_bits_ = 0;
345 
346  std::vector<RankIndexEntry> rank_index_;
347 
348  // Index of positions for Select0
349  // select_0_index_[i] == Select0(kBitsPerSelect0Block * i).
350  // Empty means there is no index, otherwise, we always add an extra entry
351  // with num_bits_. Overhead is 4 bytes / 64 bytes of zeros,
352  // so 4/64 times the density of zeros. This is 6.25% * zeros_density.
353  std::vector<uint32_t> select_0_index_;
354 
355  // Index of positions for Select1
356  // select_1_index_[i] == Select1(kBitsPerSelect1Block * i).
357  // Empty means there is no index, otherwise, we always add an extra entry
358  // with num_bits_. Overhead is 4 bytes / 64 bytes of ones,
359  // so 4/64 times the density of ones. This is 6.25% * ones_density.
360  std::vector<uint32_t> select_1_index_;
361 };
362 
363 } // end namespace fst
364 
365 #endif // FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_
BitmapIndex()=default
BitmapIndex & operator=(BitmapIndex &&)=default
static void Clear(uint64_t *bits, size_t index)
Definition: bitmap-index.h:120
static constexpr uint32_t kStorageBitSize
Definition: bitmap-index.h:172
static void Set(uint64_t *bits, size_t index)
Definition: bitmap-index.h:116
void BuildIndex(const uint64_t *bits, size_t num_bits, bool enable_select_0_index=false, bool enable_select_1_index=false)
size_t Bits() const
Definition: bitmap-index.h:124
size_t Select1(size_t bit_index) const
Definition: bitmap-index.cc:58
#define DCHECK_EQ(x, y)
Definition: log.h:75
static constexpr uint64_t kOne
Definition: bitmap-index.h:171
BitmapIndex(const uint64_t *bits, std::size_t num_bits, bool enable_select_0_index=false, bool enable_select_1_index=false)
Definition: bitmap-index.h:103
size_t Rank1(size_t end) const
Definition: bitmap-index.cc:42
size_t Select0(size_t bit_index) const
size_t GetOnesCount() const
Definition: bitmap-index.h:139
static size_t StorageSize(size_t num_bits)
Definition: bitmap-index.h:94
size_t ArrayBytes() const
Definition: bitmap-index.h:129
bool Get(size_t index) const
Definition: bitmap-index.h:109
size_t ArraySize() const
Definition: bitmap-index.h:126
#define DCHECK_LE(x, y)
Definition: log.h:78
std::pair< size_t, size_t > Select0s(size_t bit_index) const
static bool Get(const uint64_t *bits, size_t index)
Definition: bitmap-index.h:111
size_t IndexBytes() const
Definition: bitmap-index.h:132
size_t Rank0(size_t end) const
Definition: bitmap-index.h:150
static constexpr uint32_t kStorageLogBitSize
Definition: bitmap-index.h:173