bit_util: Add fls_ functions; "find last set".
These simplify a lot of the bit_util module, which had grown bits and pieces of this functionality across a variety of places over the years. While we're here, kill off BIT_UTIL_INLINE and don't do reentrancy testing for bit_util.
This commit is contained in:
parent
1ed0288d9c
commit
22da836094
@ -2118,7 +2118,7 @@ esac
|
||||
fi
|
||||
|
||||
dnl ============================================================================
|
||||
dnl Check for __builtin_clz() and __builtin_clzl().
|
||||
dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
|
||||
|
||||
AC_CACHE_CHECK([for __builtin_clz],
|
||||
[je_cv_builtin_clz],
|
||||
@ -2132,6 +2132,10 @@ AC_CACHE_CHECK([for __builtin_clz],
|
||||
unsigned long x = 0;
|
||||
int y = __builtin_clzl(x);
|
||||
}
|
||||
{
|
||||
unsigned long long x = 0;
|
||||
int y = __builtin_clzll(x);
|
||||
}
|
||||
])],
|
||||
[je_cv_builtin_clz=yes],
|
||||
[je_cv_builtin_clz=no])])
|
||||
|
@ -3,8 +3,6 @@
|
||||
|
||||
#include "jemalloc/internal/assert.h"
|
||||
|
||||
#define BIT_UTIL_INLINE static inline
|
||||
|
||||
/* Sanity check. */
|
||||
#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
|
||||
|| !defined(JEMALLOC_INTERNAL_FFS)
|
||||
@ -18,26 +16,171 @@
|
||||
* value of zero as a sentinel. This tends to simplify logic in callers, and
|
||||
* allows for consistency with the builtins we build fls on top of.
|
||||
*/
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
ffs_llu(unsigned long long x) {
|
||||
util_assume(x != 0);
|
||||
return JEMALLOC_INTERNAL_FFSLL(x) - 1;
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
ffs_lu(unsigned long x) {
|
||||
util_assume(x != 0);
|
||||
return JEMALLOC_INTERNAL_FFSL(x) - 1;
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
ffs_u(unsigned x) {
|
||||
util_assume(x != 0);
|
||||
return JEMALLOC_INTERNAL_FFS(x) - 1;
|
||||
}
|
||||
|
||||
#define DO_FLS_SLOW(x, suffix) do { \
|
||||
util_assume(x != 0); \
|
||||
x |= (x >> 1); \
|
||||
x |= (x >> 2); \
|
||||
x |= (x >> 4); \
|
||||
x |= (x >> 8); \
|
||||
x |= (x >> 16); \
|
||||
if (sizeof(x) > 4) { \
|
||||
/* \
|
||||
* If sizeof(x) is 4, then the expression "x >> 32" \
|
||||
* will generate compiler warnings even if the code \
|
||||
* never executes. This circumvents the warning, and \
|
||||
* gets compiled out in optimized builds. \
|
||||
*/ \
|
||||
int constant_32 = sizeof(x) * 4; \
|
||||
x |= (x >> constant_32); \
|
||||
} \
|
||||
x++; \
|
||||
if (x == 0) { \
|
||||
return 8 * sizeof(x) - 1; \
|
||||
} \
|
||||
return ffs_##suffix(x) - 1; \
|
||||
} while(0)
|
||||
|
||||
static inline unsigned
|
||||
fls_llu_slow(unsigned long long x) {
|
||||
DO_FLS_SLOW(x, llu);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_lu_slow(unsigned long x) {
|
||||
DO_FLS_SLOW(x, lu);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_u_slow(unsigned x) {
|
||||
DO_FLS_SLOW(x, u);
|
||||
}
|
||||
|
||||
#undef DO_FLS_SLOW
|
||||
|
||||
#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
|
||||
static inline unsigned
|
||||
fls_llu(unsigned long long x) {
|
||||
util_assume(x != 0);
|
||||
/*
|
||||
* Note that the xor here is more naturally written as subtraction; the
|
||||
* last bit set is the number of bits in the type minus the number of
|
||||
* leading zero bits. But GCC implements that as:
|
||||
* bsr edi, edi
|
||||
* mov eax, 31
|
||||
* xor edi, 31
|
||||
* sub eax, edi
|
||||
* If we write it as xor instead, then we get
|
||||
* bsr eax, edi
|
||||
* as desired.
|
||||
*/
|
||||
return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_lu(unsigned long x) {
|
||||
util_assume(x != 0);
|
||||
return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_u(unsigned x) {
|
||||
util_assume(x != 0);
|
||||
return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
|
||||
}
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#if LG_SIZEOF_PTR == 3
|
||||
#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
|
||||
#else
|
||||
/*
|
||||
* This never actually runs; we're just dodging a compiler error for the
|
||||
* never-taken branch where sizeof(void *) == 8.
|
||||
*/
|
||||
#define DO_BSR64(bit, x) bit = 0; unreachable()
|
||||
#endif
|
||||
|
||||
#define DO_FLS(x) do { \
|
||||
if (x == 0) { \
|
||||
return 8 * sizeof(x); \
|
||||
} \
|
||||
unsigned long bit; \
|
||||
if (sizeof(x) == 4) { \
|
||||
_BitScanReverse(&bit, (unsigned)x); \
|
||||
return (unsigned)bit; \
|
||||
} \
|
||||
if (sizeof(x) == 8 && sizeof(void *) == 8) { \
|
||||
DO_BSR64(bit, x); \
|
||||
return (unsigned)bit; \
|
||||
} \
|
||||
if (sizeof(x) == 8 && sizeof(void *) == 4) { \
|
||||
/* Dodge a compiler warning, as above. */ \
|
||||
int constant_32 = sizeof(x) * 4; \
|
||||
if (_BitScanReverse(&bit, \
|
||||
(unsigned)(x >> constant_32))) { \
|
||||
return 32 + (unsigned)bit; \
|
||||
} else { \
|
||||
_BitScanReverse(&bit, (unsigned)x); \
|
||||
return (unsigned)bit; \
|
||||
} \
|
||||
} \
|
||||
unreachable(); \
|
||||
} while (0)
|
||||
|
||||
static inline unsigned
|
||||
fls_llu(unsigned long long x) {
|
||||
DO_FLS(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_lu(unsigned long x) {
|
||||
DO_FLS(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_u(unsigned x) {
|
||||
DO_FLS(x);
|
||||
}
|
||||
|
||||
#undef DO_FLS
|
||||
#undef DO_BSR64
|
||||
#else
|
||||
|
||||
static inline unsigned
|
||||
fls_llu(unsigned long long x) {
|
||||
return fls_llu_slow(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_lu(unsigned long x) {
|
||||
return fls_lu_slow(x);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
fls_u(unsigned x) {
|
||||
return fls_u_slow(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
popcount_lu(unsigned long bitmap) {
|
||||
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
|
||||
}
|
||||
@ -48,7 +191,7 @@ popcount_lu(unsigned long bitmap) {
|
||||
* place of bit. bitmap *must not* be 0.
|
||||
*/
|
||||
|
||||
BIT_UTIL_INLINE size_t
|
||||
static inline size_t
|
||||
cfs_lu(unsigned long* bitmap) {
|
||||
util_assume(*bitmap != 0);
|
||||
size_t bit = ffs_lu(*bitmap);
|
||||
@ -56,101 +199,102 @@ cfs_lu(unsigned long* bitmap) {
|
||||
return bit;
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
ffs_zu(size_t bitmap) {
|
||||
static inline unsigned
|
||||
ffs_zu(size_t x) {
|
||||
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
||||
return ffs_u(bitmap);
|
||||
return ffs_u(x);
|
||||
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
|
||||
return ffs_lu(bitmap);
|
||||
return ffs_lu(x);
|
||||
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
|
||||
return ffs_llu(bitmap);
|
||||
return ffs_llu(x);
|
||||
#else
|
||||
#error No implementation for size_t ffs()
|
||||
#endif
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
ffs_u64(uint64_t bitmap) {
|
||||
static inline unsigned
|
||||
fls_zu(size_t x) {
|
||||
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
||||
return fls_u(x);
|
||||
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
|
||||
return fls_lu(x);
|
||||
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
|
||||
return fls_llu(x);
|
||||
#else
|
||||
#error No implementation for size_t fls()
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned
|
||||
ffs_u64(uint64_t x) {
|
||||
#if LG_SIZEOF_LONG == 3
|
||||
return ffs_lu(bitmap);
|
||||
return ffs_lu(x);
|
||||
#elif LG_SIZEOF_LONG_LONG == 3
|
||||
return ffs_llu(bitmap);
|
||||
return ffs_llu(x);
|
||||
#else
|
||||
#error No implementation for 64-bit ffs()
|
||||
#endif
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
ffs_u32(uint32_t bitmap) {
|
||||
static inline unsigned
|
||||
fls_u64(uint64_t x) {
|
||||
#if LG_SIZEOF_LONG == 3
|
||||
return fls_lu(x);
|
||||
#elif LG_SIZEOF_LONG_LONG == 3
|
||||
return fls_llu(x);
|
||||
#else
|
||||
#error No implementation for 64-bit fls()
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
ffs_u32(uint32_t x) {
|
||||
#if LG_SIZEOF_INT == 2
|
||||
return ffs_u(bitmap);
|
||||
return ffs_u(x);
|
||||
#else
|
||||
#error No implementation for 32-bit ffs()
|
||||
#endif
|
||||
return ffs_u(bitmap);
|
||||
return ffs_u(x);
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE uint64_t
|
||||
static inline unsigned
|
||||
fls_u32(uint32_t x) {
|
||||
#if LG_SIZEOF_INT == 2
|
||||
return fls_u(x);
|
||||
#else
|
||||
#error No implementation for 32-bit fls()
|
||||
#endif
|
||||
return fls_u(x);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
pow2_ceil_u64(uint64_t x) {
|
||||
#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
|
||||
if(unlikely(x <= 1)) {
|
||||
if (unlikely(x <= 1)) {
|
||||
return x;
|
||||
}
|
||||
size_t msb_on_index;
|
||||
#if (defined(__amd64__) || defined(__x86_64__))
|
||||
asm ("bsrq %1, %0"
|
||||
: "=r"(msb_on_index) // Outputs.
|
||||
: "r"(x-1) // Inputs.
|
||||
);
|
||||
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
|
||||
msb_on_index = (63 ^ __builtin_clzll(x - 1));
|
||||
#endif
|
||||
size_t msb_on_index = fls_u64(x - 1);
|
||||
/*
|
||||
* Range-check; it's on the callers to ensure that the result of this
|
||||
* call won't overflow.
|
||||
*/
|
||||
assert(msb_on_index < 63);
|
||||
return 1ULL << (msb_on_index + 1);
|
||||
#else
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x |= x >> 32;
|
||||
x++;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
BIT_UTIL_INLINE uint32_t
|
||||
static inline uint32_t
|
||||
pow2_ceil_u32(uint32_t x) {
|
||||
#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
|
||||
if(unlikely(x <= 1)) {
|
||||
if (unlikely(x <= 1)) {
|
||||
return x;
|
||||
}
|
||||
size_t msb_on_index;
|
||||
#if (defined(__i386__))
|
||||
asm ("bsr %1, %0"
|
||||
: "=r"(msb_on_index) // Outputs.
|
||||
: "r"(x-1) // Inputs.
|
||||
);
|
||||
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
|
||||
msb_on_index = (31 ^ __builtin_clz(x - 1));
|
||||
#endif
|
||||
size_t msb_on_index = fls_u32(x - 1);
|
||||
/* As above. */
|
||||
assert(msb_on_index < 31);
|
||||
return 1U << (msb_on_index + 1);
|
||||
#else
|
||||
x--;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x++;
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Compute the smallest power of 2 that is >= x. */
|
||||
BIT_UTIL_INLINE size_t
|
||||
static inline size_t
|
||||
pow2_ceil_zu(size_t x) {
|
||||
#if (LG_SIZEOF_PTR == 3)
|
||||
return pow2_ceil_u64(x);
|
||||
@ -159,77 +303,21 @@ pow2_ceil_zu(size_t x) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
lg_floor(size_t x) {
|
||||
size_t ret;
|
||||
assert(x != 0);
|
||||
|
||||
asm ("bsr %1, %0"
|
||||
: "=r"(ret) // Outputs.
|
||||
: "r"(x) // Inputs.
|
||||
);
|
||||
assert(ret < UINT_MAX);
|
||||
return (unsigned)ret;
|
||||
}
|
||||
#elif (defined(_MSC_VER))
|
||||
BIT_UTIL_INLINE unsigned
|
||||
lg_floor(size_t x) {
|
||||
unsigned long ret;
|
||||
|
||||
assert(x != 0);
|
||||
|
||||
util_assume(x != 0);
|
||||
#if (LG_SIZEOF_PTR == 3)
|
||||
_BitScanReverse64(&ret, x);
|
||||
#elif (LG_SIZEOF_PTR == 2)
|
||||
_BitScanReverse(&ret, x);
|
||||
return fls_u64(x);
|
||||
#else
|
||||
# error "Unsupported type size for lg_floor()"
|
||||
#endif
|
||||
assert(ret < UINT_MAX);
|
||||
return (unsigned)ret;
|
||||
}
|
||||
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
|
||||
BIT_UTIL_INLINE unsigned
|
||||
lg_floor(size_t x) {
|
||||
assert(x != 0);
|
||||
|
||||
#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
|
||||
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
|
||||
#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
|
||||
return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
|
||||
#else
|
||||
# error "Unsupported type size for lg_floor()"
|
||||
return fls_u32(x);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
BIT_UTIL_INLINE unsigned
|
||||
lg_floor(size_t x) {
|
||||
assert(x != 0);
|
||||
|
||||
x |= (x >> 1);
|
||||
x |= (x >> 2);
|
||||
x |= (x >> 4);
|
||||
x |= (x >> 8);
|
||||
x |= (x >> 16);
|
||||
#if (LG_SIZEOF_PTR == 3)
|
||||
x |= (x >> 32);
|
||||
#endif
|
||||
if (x == SIZE_T_MAX) {
|
||||
return (8 << LG_SIZEOF_PTR) - 1;
|
||||
}
|
||||
x++;
|
||||
return ffs_zu(x) - 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
BIT_UTIL_INLINE unsigned
|
||||
static inline unsigned
|
||||
lg_ceil(size_t x) {
|
||||
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
#undef BIT_UTIL_INLINE
|
||||
|
||||
/* A compile-time version of lg_floor and lg_ceil. */
|
||||
#define LG_FLOOR_1(x) 0
|
||||
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
|
||||
|
@ -120,7 +120,6 @@ TEST_BEGIN(test_ffs_u) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
||||
TEST_BEGIN(test_ffs_lu) {
|
||||
TEST_FFS(unsigned long, lu, lu, "lu");
|
||||
}
|
||||
@ -136,7 +135,6 @@ TEST_BEGIN(test_ffs_u32) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
||||
TEST_BEGIN(test_ffs_u64) {
|
||||
TEST_FFS(uint64_t, u64, u64, FMTu64);
|
||||
}
|
||||
@ -147,9 +145,69 @@ TEST_BEGIN(test_ffs_zu) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
#define TEST_FLS(t, suf, test_suf, pri) do { \
|
||||
for (unsigned i = 0; i < sizeof(t) * 8; i++) { \
|
||||
for (unsigned j = 0; j <= i; j++) { \
|
||||
for (unsigned k = 0; k <= j; k++) { \
|
||||
t x = (t)1 << i; \
|
||||
x |= (t)1 << j; \
|
||||
x |= (t)1 << k; \
|
||||
expect_##test_suf##_eq(fls_##suf(x), i, \
|
||||
"Unexpected result, x=%"pri, x); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
TEST_BEGIN(test_fls_u) {
|
||||
TEST_FLS(unsigned, u, u,"u");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_lu) {
|
||||
TEST_FLS(unsigned long, lu, lu, "lu");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_llu) {
|
||||
TEST_FLS(unsigned long long, llu, qd, "llu");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_u32) {
|
||||
TEST_FLS(uint32_t, u32, u32, FMTu32);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_u64) {
|
||||
TEST_FLS(uint64_t, u64, u64, FMTu64);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_zu) {
|
||||
TEST_FLS(size_t, zu, zu, "zu");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_u_slow) {
|
||||
TEST_FLS(unsigned, u_slow, u,"u");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_lu_slow) {
|
||||
TEST_FLS(unsigned long, lu_slow, lu, "lu");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_fls_llu_slow) {
|
||||
TEST_FLS(unsigned long long, llu_slow, qd, "llu");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(
|
||||
return test_no_reentrancy(
|
||||
test_pow2_ceil_u64,
|
||||
test_pow2_ceil_u32,
|
||||
test_pow2_ceil_zu,
|
||||
@ -159,5 +217,14 @@ main(void) {
|
||||
test_ffs_llu,
|
||||
test_ffs_u32,
|
||||
test_ffs_u64,
|
||||
test_ffs_zu);
|
||||
test_ffs_zu,
|
||||
test_fls_u,
|
||||
test_fls_lu,
|
||||
test_fls_llu,
|
||||
test_fls_u32,
|
||||
test_fls_u64,
|
||||
test_fls_zu,
|
||||
test_fls_u_slow,
|
||||
test_fls_lu_slow,
|
||||
test_fls_llu_slow);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user