FST  openfst-1.7.3
OpenFst Library
bitmap-index.cc
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 
5 
6 #include <algorithm>
7 #include <iterator>
8 
9 #include <fst/log.h>
11 
12 namespace fst {
13 namespace {
14 const size_t kPrimaryBlockBits =
16 
17 // If [begin, begin+size) is a monotonically increasing running sum of
18 // popcounts for a bitmap, this will return the index of the word that contains
19 // the value'th zero. If value is larger then the number of zeros in the
20 // bitmap, size will be returned. The idea is that the number of zerocounts
21 // (i.e. the popcount of logical NOT of values) is offset * kStorageBitSize
22 // minus the value for each element of the running sum.
23 template <size_t BlockSize, typename Container>
24 size_t InvertedSearch(const Container& c,
25  size_t first_idx,
26  size_t last_idx,
27  size_t value) {
28  const size_t begin_idx = first_idx;
29  while (first_idx != last_idx) {
30  // Invariant: [first_idx, last_idx) is the search range.
31  size_t mid_idx = first_idx + ((last_idx - first_idx) / 2);
32  size_t mid_value = BlockSize * (1 + (mid_idx - begin_idx)) - c[mid_idx];
33  if (mid_value < value) {
34  first_idx = mid_idx + 1;
35  } else {
36  last_idx = mid_idx;
37  }
38  }
39  return first_idx;
40 }
41 } // namespace
42 
43 size_t BitmapIndex::Rank1(size_t end) const {
44  if (end == 0) return 0;
45  const uint32 end_word = (end - 1) >> BitmapIndex::kStorageLogBitSize;
46  const uint32 sum = get_index_ones_count(end_word);
47  const size_t masked = end & kStorageBlockMask;
48  if (masked == 0) {
49  return sum + __builtin_popcountll(bits_[end_word]);
50  } else {
51  const uint64 zero = 0;
52  return sum + __builtin_popcountll(bits_[end_word] &
53  (~zero >> (kStorageBitSize - masked)));
54  }
55 }
56 
57 size_t BitmapIndex::Select1(size_t bit_index) const {
58  if (bit_index >= GetOnesCount()) return Bits();
59  // search primary index for the relevant block
60  uint32 rembits = bit_index + 1;
61  const uint32 block = find_primary_block(bit_index + 1);
62  uint32 offset = 0;
63  if (block > 0) {
64  rembits -= primary_index_[block - 1];
65  offset += block * kSecondaryBlockSize;
66  }
67  // search the secondary index
68  uint32 word = find_secondary_block(offset, rembits);
69  if (word > 0) {
70  rembits -= secondary_index_[offset + word - 1];
71  offset += word;
72  }
73  int nth = nth_bit(bits_[offset], rembits);
74  return (offset << BitmapIndex::kStorageLogBitSize) + nth;
75 }
76 
77 size_t BitmapIndex::Select0(size_t bit_index) const {
78  if (bit_index >= Bits() - GetOnesCount()) return Bits();
79  // search inverted primary index for relevant block
80  uint32 remzeros = bit_index + 1;
81  uint32 offset = 0;
82  const uint32 block = find_inverted_primary_block(bit_index + 1);
83  if (block > 0) {
84  remzeros -= kPrimaryBlockBits * block - primary_index_[block - 1];
85  offset += block * kSecondaryBlockSize;
86  }
87  // search the inverted secondary index
88  uint32 word = find_inverted_secondary_block(offset, remzeros);
89  if (word > 0) {
90  remzeros -= BitmapIndex::kStorageBitSize * word -
91  secondary_index_[offset + word - 1];
92  offset += word;
93  }
94  int nth = nth_bit(~bits_[offset], remzeros);
95  return (offset << BitmapIndex::kStorageLogBitSize) + nth;
96 }
97 
98 std::pair<size_t, size_t> BitmapIndex::Select0s(size_t bit_index) const {
99  const uint64 zero = 0;
100  const uint64 ones = ~zero;
101  size_t zeros_count = Bits() - GetOnesCount();
102  if (bit_index >= zeros_count) return std::make_pair(Bits(), Bits());
103  if (bit_index + 1 >= zeros_count) {
104  return std::make_pair(Select0(bit_index), Bits());
105  }
106  // search inverted primary index for relevant block
107  uint32 remzeros = bit_index + 1;
108  uint32 offset = 0;
109  const uint32 block = find_inverted_primary_block(bit_index + 1);
110  size_t num_zeros_in_block =
111  kPrimaryBlockBits * (1 + block) - primary_index_[block];
112  if (block > 0) {
113  size_t num_zeros_next =
114  kPrimaryBlockBits * block - primary_index_[block - 1];
115  num_zeros_in_block -= num_zeros_next;
116  remzeros -= num_zeros_next;
117  offset += block * kSecondaryBlockSize;
118  }
119  // search the inverted secondary index
120  uint32 word = find_inverted_secondary_block(offset, remzeros);
121  uint32 sum_zeros_next_word = BitmapIndex::kStorageBitSize * (1 + word) -
122  secondary_index_[offset + word];
123  uint32 sum_zeros_this_word = 0;
124  if (word > 0) {
125  sum_zeros_this_word = BitmapIndex::kStorageBitSize * word -
126  secondary_index_[offset + word - 1];
127  remzeros -= sum_zeros_this_word;
128  offset += word;
129  }
130  int nth = nth_bit(~bits_[offset], remzeros);
131  size_t current_zero = (offset << BitmapIndex::kStorageLogBitSize) + nth;
132 
133  size_t next_zero;
134  // Does the current block contain the next zero?
135  if (num_zeros_in_block > remzeros + 1) {
136  if (sum_zeros_next_word - sum_zeros_this_word >= remzeros + 1) {
137  // the next zero is in this word
138  next_zero = (offset << BitmapIndex::kStorageLogBitSize) +
139  nth_bit(~bits_[offset], remzeros + 1);
140  } else {
141  // Find the first field that is not all ones by linear scan.
142  // In the worst case, this may scan 8Kbytes. The alternative is
143  // to inspect secondary_index_ looking for a place to jump to, but
144  // that would probably use more cache.
145  while (bits_[++offset] == ones) {
146  }
147  next_zero = (offset << BitmapIndex::kStorageLogBitSize) +
148  __builtin_ctzll(~bits_[offset]);
149  }
150  } else {
151  // the next zero is in a different block, a full search is required.
152  next_zero = Select0(bit_index + 1);
153  }
154  return std::make_pair(current_zero, next_zero);
155 }
156 
157 size_t BitmapIndex::get_index_ones_count(size_t array_index) const {
158  uint32 sum = 0;
159  if (array_index > 0) {
160  sum += secondary_index_[array_index - 1];
161  uint32 end_block = (array_index - 1) / kSecondaryBlockSize;
162  if (end_block > 0) sum += primary_index_[end_block - 1];
163  }
164  return sum;
165 }
166 
167 void BitmapIndex::BuildIndex(const uint64 *bits, size_t size) {
168  bits_ = bits;
169  size_ = size;
170  primary_index_.resize(primary_index_size());
171  secondary_index_.resize(ArraySize());
172  const uint64 zero = 0;
173  const uint64 ones = ~zero;
174  uint32 popcount = 0;
175  for (uint32 block = 0; block * kSecondaryBlockSize < ArraySize(); block++) {
176  uint32 block_popcount = 0;
177  uint32 block_begin = block * kSecondaryBlockSize;
178  uint32 block_end = block_begin + kSecondaryBlockSize;
179  if (block_end > ArraySize()) block_end = ArraySize();
180  for (uint32 j = block_begin; j < block_end; ++j) {
181  uint64 mask = ones;
182  if (j == ArraySize() - 1) {
183  mask = ones >> (-size_ & BitmapIndex::kStorageBlockMask);
184  }
185  block_popcount += __builtin_popcountll(bits_[j] & mask);
186  secondary_index_[j] = block_popcount;
187  }
188  popcount += block_popcount;
189  primary_index_[block] = popcount;
190  }
191 }
192 
193 size_t BitmapIndex::find_secondary_block(size_t block_begin,
194  size_t rem_bit_index) const {
195  size_t block_end = block_begin + kSecondaryBlockSize;
196  if (block_end > ArraySize()) block_end = ArraySize();
197  return std::distance(
198  secondary_index_.begin() + block_begin,
199  std::lower_bound(secondary_index_.begin() + block_begin,
200  secondary_index_.begin() + block_end, rem_bit_index));
201 }
202 
203 size_t BitmapIndex::find_inverted_secondary_block(size_t block_begin,
204  size_t rem_bit_index) const {
205  size_t block_end = block_begin + kSecondaryBlockSize;
206  if (block_end > ArraySize()) block_end = ArraySize();
207  return InvertedSearch<BitmapIndex::kStorageBitSize>(secondary_index_,
208  block_begin, block_end,
209  rem_bit_index)
210  - block_begin;
211 }
212 
213 inline size_t BitmapIndex::find_primary_block(size_t bit_index) const {
214  return std::distance(
215  primary_index_.begin(),
216  std::lower_bound(primary_index_.begin(),
217  primary_index_.begin() + primary_index_size(),
218  bit_index));
219 }
220 
221 size_t BitmapIndex::find_inverted_primary_block(size_t bit_index) const {
222  return InvertedSearch<kPrimaryBlockBits>(
223  primary_index_, 0, primary_index_.size(), bit_index);
224 }
225 } // end namespace fst
uint32 nth_bit(uint64 v, uint32 r)
Definition: nthbit.h:33
uint64_t uint64
Definition: types.h:32
static const uint32 kSecondaryBlockSize
Definition: bitmap-index.h:108
size_t Bits() const
Definition: bitmap-index.h:49
size_t Select1(size_t bit_index) const
Definition: bitmap-index.cc:57
size_t Rank1(size_t end) const
Definition: bitmap-index.cc:43
size_t Select0(size_t bit_index) const
Definition: bitmap-index.cc:77
size_t GetOnesCount() const
Definition: bitmap-index.h:54
void BuildIndex(const uint64 *bits, size_t size)
size_t ArraySize() const
Definition: bitmap-index.h:51
uint32_t uint32
Definition: types.h:31
std::pair< size_t, size_t > Select0s(size_t bit_index) const
Definition: bitmap-index.cc:98
static const uint32 kStorageLogBitSize
Definition: bitmap-index.h:107
static const uint32 kStorageBitSize
Definition: bitmap-index.h:106