bit_util: Add fls_ functions; "find last set".

These simplify a lot of the bit_util module, which had grown bits and pieces of
this functionality across a variety of places over the years.

While we're here, kill off BIT_UTIL_INLINE and don't do reentrancy testing for
bit_util.
This commit is contained in:
David Goldblatt 2020-07-22 07:10:06 -07:00 committed by David Goldblatt
parent 1ed0288d9c
commit 22da836094
3 changed files with 292 additions and 133 deletions

View File

@ -2118,7 +2118,7 @@ esac
fi fi
dnl ============================================================================ dnl ============================================================================
dnl Check for __builtin_clz() and __builtin_clzl(). dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
AC_CACHE_CHECK([for __builtin_clz], AC_CACHE_CHECK([for __builtin_clz],
[je_cv_builtin_clz], [je_cv_builtin_clz],
@ -2132,6 +2132,10 @@ AC_CACHE_CHECK([for __builtin_clz],
unsigned long x = 0; unsigned long x = 0;
int y = __builtin_clzl(x); int y = __builtin_clzl(x);
} }
{
unsigned long long x = 0;
int y = __builtin_clzll(x);
}
])], ])],
[je_cv_builtin_clz=yes], [je_cv_builtin_clz=yes],
[je_cv_builtin_clz=no])]) [je_cv_builtin_clz=no])])

View File

@ -3,8 +3,6 @@
#include "jemalloc/internal/assert.h" #include "jemalloc/internal/assert.h"
#define BIT_UTIL_INLINE static inline
/* Sanity check. */ /* Sanity check. */
#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ #if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
|| !defined(JEMALLOC_INTERNAL_FFS) || !defined(JEMALLOC_INTERNAL_FFS)
@ -18,26 +16,171 @@
* value of zero as a sentinel. This tends to simplify logic in callers, and * value of zero as a sentinel. This tends to simplify logic in callers, and
* allows for consistency with the builtins we build fls on top of. * allows for consistency with the builtins we build fls on top of.
*/ */
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_llu(unsigned long long x) { ffs_llu(unsigned long long x) {
util_assume(x != 0); util_assume(x != 0);
return JEMALLOC_INTERNAL_FFSLL(x) - 1; return JEMALLOC_INTERNAL_FFSLL(x) - 1;
} }
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_lu(unsigned long x) { ffs_lu(unsigned long x) {
util_assume(x != 0); util_assume(x != 0);
return JEMALLOC_INTERNAL_FFSL(x) - 1; return JEMALLOC_INTERNAL_FFSL(x) - 1;
} }
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_u(unsigned x) { ffs_u(unsigned x) {
util_assume(x != 0); util_assume(x != 0);
return JEMALLOC_INTERNAL_FFS(x) - 1; return JEMALLOC_INTERNAL_FFS(x) - 1;
} }
#define DO_FLS_SLOW(x, suffix) do { \
util_assume(x != 0); \
x |= (x >> 1); \
x |= (x >> 2); \
x |= (x >> 4); \
x |= (x >> 8); \
x |= (x >> 16); \
if (sizeof(x) > 4) { \
/* \
* If sizeof(x) is 4, then the expression "x >> 32" \
* will generate compiler warnings even if the code \
* never executes. This circumvents the warning, and \
* gets compiled out in optimized builds. \
*/ \
int constant_32 = sizeof(x) * 4; \
x |= (x >> constant_32); \
} \
x++; \
if (x == 0) { \
return 8 * sizeof(x) - 1; \
} \
return ffs_##suffix(x) - 1; \
} while(0)
static inline unsigned
fls_llu_slow(unsigned long long x) {
DO_FLS_SLOW(x, llu);
}
static inline unsigned
fls_lu_slow(unsigned long x) {
DO_FLS_SLOW(x, lu);
}
static inline unsigned
fls_u_slow(unsigned x) {
DO_FLS_SLOW(x, u);
}
#undef DO_FLS_SLOW
#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
static inline unsigned
fls_llu(unsigned long long x) {
util_assume(x != 0);
/*
* Note that the xor here is more naturally written as subtraction; the
* last bit set is the number of bits in the type minus the number of
* leading zero bits. But GCC implements that as:
* bsr edi, edi
* mov eax, 31
* xor edi, 31
* sub eax, edi
* If we write it as xor instead, then we get
* bsr eax, edi
* as desired.
*/
return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
}
static inline unsigned
fls_lu(unsigned long x) {
util_assume(x != 0);
return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
}
static inline unsigned
fls_u(unsigned x) {
util_assume(x != 0);
return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
}
#elif defined(_MSC_VER)
#if LG_SIZEOF_PTR == 3
#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
#else
/*
* This never actually runs; we're just dodging a compiler error for the
* never-taken branch where sizeof(void *) == 8.
*/
#define DO_BSR64(bit, x) bit = 0; unreachable()
#endif
#define DO_FLS(x) do { \
if (x == 0) { \
return 8 * sizeof(x); \
} \
unsigned long bit; \
if (sizeof(x) == 4) { \
_BitScanReverse(&bit, (unsigned)x); \
return (unsigned)bit; \
} \
if (sizeof(x) == 8 && sizeof(void *) == 8) { \
DO_BSR64(bit, x); \
return (unsigned)bit; \
} \
if (sizeof(x) == 8 && sizeof(void *) == 4) { \
/* Dodge a compiler warning, as above. */ \
int constant_32 = sizeof(x) * 4; \
if (_BitScanReverse(&bit, \
(unsigned)(x >> constant_32))) { \
return 32 + (unsigned)bit; \
} else { \
_BitScanReverse(&bit, (unsigned)x); \
return (unsigned)bit; \
} \
} \
unreachable(); \
} while (0)
static inline unsigned
fls_llu(unsigned long long x) {
DO_FLS(x);
}
static inline unsigned
fls_lu(unsigned long x) {
DO_FLS(x);
}
static inline unsigned
fls_u(unsigned x) {
DO_FLS(x);
}
#undef DO_FLS
#undef DO_BSR64
#else
static inline unsigned
fls_llu(unsigned long long x) {
return fls_llu_slow(x);
}
static inline unsigned
fls_lu(unsigned long x) {
return fls_lu_slow(x);
}
static inline unsigned
fls_u(unsigned x) {
return fls_u_slow(x);
}
#endif
#ifdef JEMALLOC_INTERNAL_POPCOUNTL #ifdef JEMALLOC_INTERNAL_POPCOUNTL
BIT_UTIL_INLINE unsigned static inline unsigned
popcount_lu(unsigned long bitmap) { popcount_lu(unsigned long bitmap) {
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap); return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
} }
@ -48,7 +191,7 @@ popcount_lu(unsigned long bitmap) {
* place of bit. bitmap *must not* be 0. * place of bit. bitmap *must not* be 0.
*/ */
BIT_UTIL_INLINE size_t static inline size_t
cfs_lu(unsigned long* bitmap) { cfs_lu(unsigned long* bitmap) {
util_assume(*bitmap != 0); util_assume(*bitmap != 0);
size_t bit = ffs_lu(*bitmap); size_t bit = ffs_lu(*bitmap);
@ -56,101 +199,102 @@ cfs_lu(unsigned long* bitmap) {
return bit; return bit;
} }
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_zu(size_t bitmap) { ffs_zu(size_t x) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT #if LG_SIZEOF_PTR == LG_SIZEOF_INT
return ffs_u(bitmap); return ffs_u(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
return ffs_lu(bitmap); return ffs_lu(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
return ffs_llu(bitmap); return ffs_llu(x);
#else #else
#error No implementation for size_t ffs() #error No implementation for size_t ffs()
#endif #endif
} }
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_u64(uint64_t bitmap) { fls_zu(size_t x) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
return fls_u(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
return fls_lu(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
return fls_llu(x);
#else
#error No implementation for size_t fls()
#endif
}
static inline unsigned
ffs_u64(uint64_t x) {
#if LG_SIZEOF_LONG == 3 #if LG_SIZEOF_LONG == 3
return ffs_lu(bitmap); return ffs_lu(x);
#elif LG_SIZEOF_LONG_LONG == 3 #elif LG_SIZEOF_LONG_LONG == 3
return ffs_llu(bitmap); return ffs_llu(x);
#else #else
#error No implementation for 64-bit ffs() #error No implementation for 64-bit ffs()
#endif #endif
} }
BIT_UTIL_INLINE unsigned static inline unsigned
ffs_u32(uint32_t bitmap) { fls_u64(uint64_t x) {
#if LG_SIZEOF_LONG == 3
return fls_lu(x);
#elif LG_SIZEOF_LONG_LONG == 3
return fls_llu(x);
#else
#error No implementation for 64-bit fls()
#endif
}
static inline unsigned
ffs_u32(uint32_t x) {
#if LG_SIZEOF_INT == 2 #if LG_SIZEOF_INT == 2
return ffs_u(bitmap); return ffs_u(x);
#else #else
#error No implementation for 32-bit ffs() #error No implementation for 32-bit ffs()
#endif #endif
return ffs_u(bitmap); return ffs_u(x);
} }
BIT_UTIL_INLINE uint64_t static inline unsigned
fls_u32(uint32_t x) {
#if LG_SIZEOF_INT == 2
return fls_u(x);
#else
#error No implementation for 32-bit fls()
#endif
return fls_u(x);
}
static inline uint64_t
pow2_ceil_u64(uint64_t x) { pow2_ceil_u64(uint64_t x) {
#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) if (unlikely(x <= 1)) {
if(unlikely(x <= 1)) {
return x; return x;
} }
size_t msb_on_index; size_t msb_on_index = fls_u64(x - 1);
#if (defined(__amd64__) || defined(__x86_64__)) /*
asm ("bsrq %1, %0" * Range-check; it's on the callers to ensure that the result of this
: "=r"(msb_on_index) // Outputs. * call won't overflow.
: "r"(x-1) // Inputs. */
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (63 ^ __builtin_clzll(x - 1));
#endif
assert(msb_on_index < 63); assert(msb_on_index < 63);
return 1ULL << (msb_on_index + 1); return 1ULL << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x |= x >> 32;
x++;
return x;
#endif
} }
BIT_UTIL_INLINE uint32_t static inline uint32_t
pow2_ceil_u32(uint32_t x) { pow2_ceil_u32(uint32_t x) {
#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__))) if (unlikely(x <= 1)) {
if(unlikely(x <= 1)) { return x;
return x;
} }
size_t msb_on_index; size_t msb_on_index = fls_u32(x - 1);
#if (defined(__i386__)) /* As above. */
asm ("bsr %1, %0"
: "=r"(msb_on_index) // Outputs.
: "r"(x-1) // Inputs.
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (31 ^ __builtin_clz(x - 1));
#endif
assert(msb_on_index < 31); assert(msb_on_index < 31);
return 1U << (msb_on_index + 1); return 1U << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
#endif
} }
/* Compute the smallest power of 2 that is >= x. */ /* Compute the smallest power of 2 that is >= x. */
BIT_UTIL_INLINE size_t static inline size_t
pow2_ceil_zu(size_t x) { pow2_ceil_zu(size_t x) {
#if (LG_SIZEOF_PTR == 3) #if (LG_SIZEOF_PTR == 3)
return pow2_ceil_u64(x); return pow2_ceil_u64(x);
@ -159,77 +303,21 @@ pow2_ceil_zu(size_t x) {
#endif #endif
} }
#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) static inline unsigned
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) { lg_floor(size_t x) {
size_t ret; util_assume(x != 0);
assert(x != 0);
asm ("bsr %1, %0"
: "=r"(ret) // Outputs.
: "r"(x) // Inputs.
);
assert(ret < UINT_MAX);
return (unsigned)ret;
}
#elif (defined(_MSC_VER))
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
unsigned long ret;
assert(x != 0);
#if (LG_SIZEOF_PTR == 3) #if (LG_SIZEOF_PTR == 3)
_BitScanReverse64(&ret, x); return fls_u64(x);
#elif (LG_SIZEOF_PTR == 2)
_BitScanReverse(&ret, x);
#else #else
# error "Unsupported type size for lg_floor()" return fls_u32(x);
#endif
assert(ret < UINT_MAX);
return (unsigned)ret;
}
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
assert(x != 0);
#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
#else
# error "Unsupported type size for lg_floor()"
#endif #endif
} }
#else
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
assert(x != 0);
x |= (x >> 1); static inline unsigned
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
#if (LG_SIZEOF_PTR == 3)
x |= (x >> 32);
#endif
if (x == SIZE_T_MAX) {
return (8 << LG_SIZEOF_PTR) - 1;
}
x++;
return ffs_zu(x) - 1;
}
#endif
BIT_UTIL_INLINE unsigned
lg_ceil(size_t x) { lg_ceil(size_t x) {
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1); return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
} }
#undef BIT_UTIL_INLINE
/* A compile-time version of lg_floor and lg_ceil. */ /* A compile-time version of lg_floor and lg_ceil. */
#define LG_FLOOR_1(x) 0 #define LG_FLOOR_1(x) 0
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1)) #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))

View File

@ -120,7 +120,6 @@ TEST_BEGIN(test_ffs_u) {
} }
TEST_END TEST_END
TEST_BEGIN(test_ffs_lu) { TEST_BEGIN(test_ffs_lu) {
TEST_FFS(unsigned long, lu, lu, "lu"); TEST_FFS(unsigned long, lu, lu, "lu");
} }
@ -136,7 +135,6 @@ TEST_BEGIN(test_ffs_u32) {
} }
TEST_END TEST_END
TEST_BEGIN(test_ffs_u64) { TEST_BEGIN(test_ffs_u64) {
TEST_FFS(uint64_t, u64, u64, FMTu64); TEST_FFS(uint64_t, u64, u64, FMTu64);
} }
@ -147,9 +145,69 @@ TEST_BEGIN(test_ffs_zu) {
} }
TEST_END TEST_END
#define TEST_FLS(t, suf, test_suf, pri) do { \
for (unsigned i = 0; i < sizeof(t) * 8; i++) { \
for (unsigned j = 0; j <= i; j++) { \
for (unsigned k = 0; k <= j; k++) { \
t x = (t)1 << i; \
x |= (t)1 << j; \
x |= (t)1 << k; \
expect_##test_suf##_eq(fls_##suf(x), i, \
"Unexpected result, x=%"pri, x); \
} \
} \
} \
} while(0)
TEST_BEGIN(test_fls_u) {
TEST_FLS(unsigned, u, u,"u");
}
TEST_END
TEST_BEGIN(test_fls_lu) {
TEST_FLS(unsigned long, lu, lu, "lu");
}
TEST_END
TEST_BEGIN(test_fls_llu) {
TEST_FLS(unsigned long long, llu, qd, "llu");
}
TEST_END
TEST_BEGIN(test_fls_u32) {
TEST_FLS(uint32_t, u32, u32, FMTu32);
}
TEST_END
TEST_BEGIN(test_fls_u64) {
TEST_FLS(uint64_t, u64, u64, FMTu64);
}
TEST_END
TEST_BEGIN(test_fls_zu) {
TEST_FLS(size_t, zu, zu, "zu");
}
TEST_END
TEST_BEGIN(test_fls_u_slow) {
TEST_FLS(unsigned, u_slow, u,"u");
}
TEST_END
TEST_BEGIN(test_fls_lu_slow) {
TEST_FLS(unsigned long, lu_slow, lu, "lu");
}
TEST_END
TEST_BEGIN(test_fls_llu_slow) {
TEST_FLS(unsigned long long, llu_slow, qd, "llu");
}
TEST_END
int int
main(void) { main(void) {
return test( return test_no_reentrancy(
test_pow2_ceil_u64, test_pow2_ceil_u64,
test_pow2_ceil_u32, test_pow2_ceil_u32,
test_pow2_ceil_zu, test_pow2_ceil_zu,
@ -159,5 +217,14 @@ main(void) {
test_ffs_llu, test_ffs_llu,
test_ffs_u32, test_ffs_u32,
test_ffs_u64, test_ffs_u64,
test_ffs_zu); test_ffs_zu,
test_fls_u,
test_fls_lu,
test_fls_llu,
test_fls_u32,
test_fls_u64,
test_fls_zu,
test_fls_u_slow,
test_fls_lu_slow,
test_fls_llu_slow);
} }