bit_util: Add fls_ functions; "find last set".

These simplify a lot of the bit_util module, which had grown bits and pieces of
this functionality across a variety of places over the years.

While we're here, kill off BIT_UTIL_INLINE and don't do reentrancy testing for
bit_util.
This commit is contained in:
David Goldblatt 2020-07-22 07:10:06 -07:00 committed by David Goldblatt
parent 1ed0288d9c
commit 22da836094
3 changed files with 292 additions and 133 deletions

View File

@ -2118,7 +2118,7 @@ esac
fi
dnl ============================================================================
dnl Check for __builtin_clz() and __builtin_clzl().
dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
AC_CACHE_CHECK([for __builtin_clz],
[je_cv_builtin_clz],
@ -2132,6 +2132,10 @@ AC_CACHE_CHECK([for __builtin_clz],
unsigned long x = 0;
int y = __builtin_clzl(x);
}
{
unsigned long long x = 0;
int y = __builtin_clzll(x);
}
])],
[je_cv_builtin_clz=yes],
[je_cv_builtin_clz=no])])

View File

@ -3,8 +3,6 @@
#include "jemalloc/internal/assert.h"
#define BIT_UTIL_INLINE static inline
/* Sanity check. */
#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
|| !defined(JEMALLOC_INTERNAL_FFS)
@ -18,26 +16,171 @@
* value of zero as a sentinel. This tends to simplify logic in callers, and
* allows for consistency with the builtins we build fls on top of.
*/
BIT_UTIL_INLINE unsigned
static inline unsigned
ffs_llu(unsigned long long x) {
util_assume(x != 0);
return JEMALLOC_INTERNAL_FFSLL(x) - 1;
}
BIT_UTIL_INLINE unsigned
static inline unsigned
ffs_lu(unsigned long x) {
util_assume(x != 0);
return JEMALLOC_INTERNAL_FFSL(x) - 1;
}
BIT_UTIL_INLINE unsigned
static inline unsigned
ffs_u(unsigned x) {
util_assume(x != 0);
return JEMALLOC_INTERNAL_FFS(x) - 1;
}
#define DO_FLS_SLOW(x, suffix) do { \
util_assume(x != 0); \
x |= (x >> 1); \
x |= (x >> 2); \
x |= (x >> 4); \
x |= (x >> 8); \
x |= (x >> 16); \
if (sizeof(x) > 4) { \
/* \
* If sizeof(x) is 4, then the expression "x >> 32" \
* will generate compiler warnings even if the code \
* never executes. This circumvents the warning, and \
* gets compiled out in optimized builds. \
*/ \
int constant_32 = sizeof(x) * 4; \
x |= (x >> constant_32); \
} \
x++; \
if (x == 0) { \
return 8 * sizeof(x) - 1; \
} \
return ffs_##suffix(x) - 1; \
} while(0)
static inline unsigned
fls_llu_slow(unsigned long long x) {
DO_FLS_SLOW(x, llu);
}
static inline unsigned
fls_lu_slow(unsigned long x) {
DO_FLS_SLOW(x, lu);
}
static inline unsigned
fls_u_slow(unsigned x) {
DO_FLS_SLOW(x, u);
}
#undef DO_FLS_SLOW
#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
static inline unsigned
fls_llu(unsigned long long x) {
util_assume(x != 0);
/*
* Note that the xor here is more naturally written as subtraction; the
* last bit set is the number of bits in the type minus the number of
* leading zero bits. But GCC implements that as:
* bsr edi, edi
* mov eax, 31
* xor edi, 31
* sub eax, edi
* If we write it as xor instead, then we get
* bsr eax, edi
* as desired.
*/
return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
}
static inline unsigned
fls_lu(unsigned long x) {
util_assume(x != 0);
return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
}
static inline unsigned
fls_u(unsigned x) {
util_assume(x != 0);
return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
}
#elif defined(_MSC_VER)
#if LG_SIZEOF_PTR == 3
#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
#else
/*
* This never actually runs; we're just dodging a compiler error for the
* never-taken branch where sizeof(void *) == 8.
*/
#define DO_BSR64(bit, x) bit = 0; unreachable()
#endif
#define DO_FLS(x) do { \
if (x == 0) { \
return 8 * sizeof(x); \
} \
unsigned long bit; \
if (sizeof(x) == 4) { \
_BitScanReverse(&bit, (unsigned)x); \
return (unsigned)bit; \
} \
if (sizeof(x) == 8 && sizeof(void *) == 8) { \
DO_BSR64(bit, x); \
return (unsigned)bit; \
} \
if (sizeof(x) == 8 && sizeof(void *) == 4) { \
/* Dodge a compiler warning, as above. */ \
int constant_32 = sizeof(x) * 4; \
if (_BitScanReverse(&bit, \
(unsigned)(x >> constant_32))) { \
return 32 + (unsigned)bit; \
} else { \
_BitScanReverse(&bit, (unsigned)x); \
return (unsigned)bit; \
} \
} \
unreachable(); \
} while (0)
static inline unsigned
fls_llu(unsigned long long x) {
DO_FLS(x);
}
static inline unsigned
fls_lu(unsigned long x) {
DO_FLS(x);
}
static inline unsigned
fls_u(unsigned x) {
DO_FLS(x);
}
#undef DO_FLS
#undef DO_BSR64
#else
static inline unsigned
fls_llu(unsigned long long x) {
return fls_llu_slow(x);
}
static inline unsigned
fls_lu(unsigned long x) {
return fls_lu_slow(x);
}
static inline unsigned
fls_u(unsigned x) {
return fls_u_slow(x);
}
#endif
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
BIT_UTIL_INLINE unsigned
static inline unsigned
popcount_lu(unsigned long bitmap) {
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
}
@ -48,7 +191,7 @@ popcount_lu(unsigned long bitmap) {
* place of bit. bitmap *must not* be 0.
*/
BIT_UTIL_INLINE size_t
static inline size_t
cfs_lu(unsigned long* bitmap) {
util_assume(*bitmap != 0);
size_t bit = ffs_lu(*bitmap);
@ -56,101 +199,102 @@ cfs_lu(unsigned long* bitmap) {
return bit;
}
BIT_UTIL_INLINE unsigned
ffs_zu(size_t bitmap) {
static inline unsigned
ffs_zu(size_t x) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
return ffs_u(bitmap);
return ffs_u(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
return ffs_lu(bitmap);
return ffs_lu(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
return ffs_llu(bitmap);
return ffs_llu(x);
#else
#error No implementation for size_t ffs()
#endif
}
BIT_UTIL_INLINE unsigned
ffs_u64(uint64_t bitmap) {
static inline unsigned
fls_zu(size_t x) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
return fls_u(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
return fls_lu(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
return fls_llu(x);
#else
#error No implementation for size_t fls()
#endif
}
static inline unsigned
ffs_u64(uint64_t x) {
#if LG_SIZEOF_LONG == 3
return ffs_lu(bitmap);
return ffs_lu(x);
#elif LG_SIZEOF_LONG_LONG == 3
return ffs_llu(bitmap);
return ffs_llu(x);
#else
#error No implementation for 64-bit ffs()
#endif
}
BIT_UTIL_INLINE unsigned
ffs_u32(uint32_t bitmap) {
static inline unsigned
fls_u64(uint64_t x) {
#if LG_SIZEOF_LONG == 3
return fls_lu(x);
#elif LG_SIZEOF_LONG_LONG == 3
return fls_llu(x);
#else
#error No implementation for 64-bit fls()
#endif
}
static inline unsigned
ffs_u32(uint32_t x) {
#if LG_SIZEOF_INT == 2
return ffs_u(bitmap);
return ffs_u(x);
#else
#error No implementation for 32-bit ffs()
#endif
return ffs_u(bitmap);
return ffs_u(x);
}
BIT_UTIL_INLINE uint64_t
static inline unsigned
fls_u32(uint32_t x) {
#if LG_SIZEOF_INT == 2
return fls_u(x);
#else
#error No implementation for 32-bit fls()
#endif
return fls_u(x);
}
static inline uint64_t
pow2_ceil_u64(uint64_t x) {
#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
if(unlikely(x <= 1)) {
if (unlikely(x <= 1)) {
return x;
}
size_t msb_on_index;
#if (defined(__amd64__) || defined(__x86_64__))
asm ("bsrq %1, %0"
: "=r"(msb_on_index) // Outputs.
: "r"(x-1) // Inputs.
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (63 ^ __builtin_clzll(x - 1));
#endif
size_t msb_on_index = fls_u64(x - 1);
/*
* Range-check; it's on the callers to ensure that the result of this
* call won't overflow.
*/
assert(msb_on_index < 63);
return 1ULL << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x |= x >> 32;
x++;
return x;
#endif
}
BIT_UTIL_INLINE uint32_t
static inline uint32_t
pow2_ceil_u32(uint32_t x) {
#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
if(unlikely(x <= 1)) {
if (unlikely(x <= 1)) {
return x;
}
size_t msb_on_index;
#if (defined(__i386__))
asm ("bsr %1, %0"
: "=r"(msb_on_index) // Outputs.
: "r"(x-1) // Inputs.
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (31 ^ __builtin_clz(x - 1));
#endif
size_t msb_on_index = fls_u32(x - 1);
/* As above. */
assert(msb_on_index < 31);
return 1U << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return x;
#endif
}
/* Compute the smallest power of 2 that is >= x. */
BIT_UTIL_INLINE size_t
static inline size_t
pow2_ceil_zu(size_t x) {
#if (LG_SIZEOF_PTR == 3)
return pow2_ceil_u64(x);
@ -159,77 +303,21 @@ pow2_ceil_zu(size_t x) {
#endif
}
#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
BIT_UTIL_INLINE unsigned
static inline unsigned
lg_floor(size_t x) {
size_t ret;
assert(x != 0);
asm ("bsr %1, %0"
: "=r"(ret) // Outputs.
: "r"(x) // Inputs.
);
assert(ret < UINT_MAX);
return (unsigned)ret;
}
#elif (defined(_MSC_VER))
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
unsigned long ret;
assert(x != 0);
util_assume(x != 0);
#if (LG_SIZEOF_PTR == 3)
_BitScanReverse64(&ret, x);
#elif (LG_SIZEOF_PTR == 2)
_BitScanReverse(&ret, x);
return fls_u64(x);
#else
# error "Unsupported type size for lg_floor()"
#endif
assert(ret < UINT_MAX);
return (unsigned)ret;
}
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
assert(x != 0);
#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
#else
# error "Unsupported type size for lg_floor()"
return fls_u32(x);
#endif
}
#else
BIT_UTIL_INLINE unsigned
lg_floor(size_t x) {
assert(x != 0);
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
#if (LG_SIZEOF_PTR == 3)
x |= (x >> 32);
#endif
if (x == SIZE_T_MAX) {
return (8 << LG_SIZEOF_PTR) - 1;
}
x++;
return ffs_zu(x) - 1;
}
#endif
BIT_UTIL_INLINE unsigned
static inline unsigned
lg_ceil(size_t x) {
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
}
#undef BIT_UTIL_INLINE
/* A compile-time version of lg_floor and lg_ceil. */
#define LG_FLOOR_1(x) 0
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))

View File

@ -120,7 +120,6 @@ TEST_BEGIN(test_ffs_u) {
}
TEST_END
TEST_BEGIN(test_ffs_lu) {
TEST_FFS(unsigned long, lu, lu, "lu");
}
@ -136,7 +135,6 @@ TEST_BEGIN(test_ffs_u32) {
}
TEST_END
TEST_BEGIN(test_ffs_u64) {
TEST_FFS(uint64_t, u64, u64, FMTu64);
}
@ -147,9 +145,69 @@ TEST_BEGIN(test_ffs_zu) {
}
TEST_END
#define TEST_FLS(t, suf, test_suf, pri) do { \
for (unsigned i = 0; i < sizeof(t) * 8; i++) { \
for (unsigned j = 0; j <= i; j++) { \
for (unsigned k = 0; k <= j; k++) { \
t x = (t)1 << i; \
x |= (t)1 << j; \
x |= (t)1 << k; \
expect_##test_suf##_eq(fls_##suf(x), i, \
"Unexpected result, x=%"pri, x); \
} \
} \
} \
} while(0)
TEST_BEGIN(test_fls_u) {
TEST_FLS(unsigned, u, u,"u");
}
TEST_END
TEST_BEGIN(test_fls_lu) {
TEST_FLS(unsigned long, lu, lu, "lu");
}
TEST_END
TEST_BEGIN(test_fls_llu) {
TEST_FLS(unsigned long long, llu, qd, "llu");
}
TEST_END
TEST_BEGIN(test_fls_u32) {
TEST_FLS(uint32_t, u32, u32, FMTu32);
}
TEST_END
TEST_BEGIN(test_fls_u64) {
TEST_FLS(uint64_t, u64, u64, FMTu64);
}
TEST_END
TEST_BEGIN(test_fls_zu) {
TEST_FLS(size_t, zu, zu, "zu");
}
TEST_END
TEST_BEGIN(test_fls_u_slow) {
TEST_FLS(unsigned, u_slow, u,"u");
}
TEST_END
TEST_BEGIN(test_fls_lu_slow) {
TEST_FLS(unsigned long, lu_slow, lu, "lu");
}
TEST_END
TEST_BEGIN(test_fls_llu_slow) {
TEST_FLS(unsigned long long, llu_slow, qd, "llu");
}
TEST_END
int
main(void) {
return test(
return test_no_reentrancy(
test_pow2_ceil_u64,
test_pow2_ceil_u32,
test_pow2_ceil_zu,
@ -159,5 +217,14 @@ main(void) {
test_ffs_llu,
test_ffs_u32,
test_ffs_u64,
test_ffs_zu);
test_ffs_zu,
test_fls_u,
test_fls_lu,
test_fls_llu,
test_fls_u32,
test_fls_u64,
test_fls_zu,
test_fls_u_slow,
test_fls_lu_slow,
test_fls_llu_slow);
}