18 #ifndef FST_EXTENSIONS_NGRAM_NTHBIT_H_ 19 #define FST_EXTENSIONS_NGRAM_NTHBIT_H_ 24 #if defined(__aarch64__) 30 #if defined(__BMI2__) // Intel Bit Manipulation Instruction Set 2 33 #include <immintrin.h> 38 inline int nth_bit(uint64_t v, uint32_t r) {
46 return __builtin_ctzll(_pdep_u64(uint64_t{1} << r, v));
50 #elif SIZE_MAX == UINT32_MAX 56 int nth_bit(uint64_t v, uint32_t r);
59 #elif SIZE_MAX == UINT64_MAX 65 constexpr std::array<uint64_t, 64> PrefixSumOverflows() {
66 std::array<uint64_t, 64> a{};
67 constexpr uint64_t kOnesStep8 = 0x0101010101010101;
68 for (
int i = 0; i < 64; ++i) {
69 a[i] = (0x7F - i) * kOnesStep8;
74 constexpr std::array<uint64_t, 64> kPrefixSumOverflow = PrefixSumOverflows();
76 extern const uint8_t kSelectInByte[2048];
86 inline int nth_bit(
const uint64_t v,
const uint32_t r) {
87 constexpr uint64_t kOnesStep8 = 0x0101010101010101;
88 constexpr uint64_t kMSBsStep8 = 0x80 * kOnesStep8;
94 #if defined(__aarch64__) 97 reinterpret_cast<uint64_t
>(vcnt_u8(reinterpret_cast<uint8x8_t>(v)));
99 constexpr uint64_t kOnesStep4 = 0x1111111111111111;
101 s = s - ((s >> 1) & (0x5 * kOnesStep4));
102 s = (s & (0x3 * kOnesStep4)) + ((s >> 2) & (0x3 * kOnesStep4));
103 s = (s + (s >> 4)) & (0xF * kOnesStep8);
109 uint64_t byte_sums = s * kOnesStep8;
115 const uint64_t b = (byte_sums + internal::kPrefixSumOverflow[r]) & kMSBsStep8;
119 const int byte_nr = __builtin_ctzll(b) >> 3;
120 const int shift = byte_nr << 3;
124 const int rank_in_byte = r - (byte_sums >> shift) & 0xFF;
126 internal::kSelectInByte[(rank_in_byte << 8) + ((v >> shift) & 0xFF)];
132 #error Unrecognized architecture size 136 #endif // FST_EXTENSIONS_NGRAM_NTHBIT_H_
int nth_bit(const uint64_t v, uint32_t r)