2017-03-04 02:10:08 +08:00
|
|
|
#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
|
|
|
|
#define JEMALLOC_INTERNAL_BIT_UTIL_H
|
|
|
|
|
|
|
|
#include "jemalloc/internal/assert.h"
|
2012-03-07 06:57:45 +08:00
|
|
|
|
2014-12-09 06:40:14 +08:00
|
|
|
/* Sanity check. */
|
2016-02-21 15:41:33 +08:00
|
|
|
#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
|
|
|
|
|| !defined(JEMALLOC_INTERNAL_FFS)
|
|
|
|
# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
|
2014-05-29 10:37:02 +08:00
|
|
|
#endif
|
|
|
|
|
2020-07-22 23:07:12 +08:00
|
|
|
/*
|
|
|
|
* Unlike the builtins and posix ffs functions, our ffs requires a non-zero
|
|
|
|
* input, and returns the position of the lowest bit set (as opposed to the
|
|
|
|
* posix versions, which return 1 larger than that position and use a return
|
|
|
|
* value of zero as a sentinel. This tends to simplify logic in callers, and
|
|
|
|
* allows for consistency with the builtins we build fls on top of.
|
|
|
|
*/
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2020-07-22 23:07:12 +08:00
|
|
|
ffs_llu(unsigned long long x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
return JEMALLOC_INTERNAL_FFSLL(x) - 1;
|
2016-02-21 15:41:33 +08:00
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2020-07-22 23:07:12 +08:00
|
|
|
ffs_lu(unsigned long x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
return JEMALLOC_INTERNAL_FFSL(x) - 1;
|
2014-05-29 10:37:02 +08:00
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2020-07-22 23:07:12 +08:00
|
|
|
ffs_u(unsigned x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
return JEMALLOC_INTERNAL_FFS(x) - 1;
|
2014-05-29 10:37:02 +08:00
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
#define DO_FLS_SLOW(x, suffix) do { \
|
|
|
|
util_assume(x != 0); \
|
|
|
|
x |= (x >> 1); \
|
|
|
|
x |= (x >> 2); \
|
|
|
|
x |= (x >> 4); \
|
|
|
|
x |= (x >> 8); \
|
|
|
|
x |= (x >> 16); \
|
|
|
|
if (sizeof(x) > 4) { \
|
|
|
|
/* \
|
|
|
|
* If sizeof(x) is 4, then the expression "x >> 32" \
|
|
|
|
* will generate compiler warnings even if the code \
|
|
|
|
* never executes. This circumvents the warning, and \
|
|
|
|
* gets compiled out in optimized builds. \
|
|
|
|
*/ \
|
|
|
|
int constant_32 = sizeof(x) * 4; \
|
|
|
|
x |= (x >> constant_32); \
|
|
|
|
} \
|
|
|
|
x++; \
|
|
|
|
if (x == 0) { \
|
|
|
|
return 8 * sizeof(x) - 1; \
|
|
|
|
} \
|
|
|
|
return ffs_##suffix(x) - 1; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_llu_slow(unsigned long long x) {
|
|
|
|
DO_FLS_SLOW(x, llu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_lu_slow(unsigned long x) {
|
|
|
|
DO_FLS_SLOW(x, lu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_u_slow(unsigned x) {
|
|
|
|
DO_FLS_SLOW(x, u);
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef DO_FLS_SLOW
|
|
|
|
|
|
|
|
#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
|
|
|
|
static inline unsigned
|
|
|
|
fls_llu(unsigned long long x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
/*
|
|
|
|
* Note that the xor here is more naturally written as subtraction; the
|
|
|
|
* last bit set is the number of bits in the type minus the number of
|
|
|
|
* leading zero bits. But GCC implements that as:
|
|
|
|
* bsr edi, edi
|
|
|
|
* mov eax, 31
|
|
|
|
* xor edi, 31
|
|
|
|
* sub eax, edi
|
|
|
|
* If we write it as xor instead, then we get
|
|
|
|
* bsr eax, edi
|
|
|
|
* as desired.
|
|
|
|
*/
|
|
|
|
return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_lu(unsigned long x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_u(unsigned x) {
|
|
|
|
util_assume(x != 0);
|
|
|
|
return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
|
|
|
|
}
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
|
|
|
|
#if LG_SIZEOF_PTR == 3
|
|
|
|
#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
|
|
|
|
#else
|
|
|
|
/*
|
|
|
|
* This never actually runs; we're just dodging a compiler error for the
|
|
|
|
* never-taken branch where sizeof(void *) == 8.
|
|
|
|
*/
|
|
|
|
#define DO_BSR64(bit, x) bit = 0; unreachable()
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define DO_FLS(x) do { \
|
|
|
|
if (x == 0) { \
|
|
|
|
return 8 * sizeof(x); \
|
|
|
|
} \
|
|
|
|
unsigned long bit; \
|
|
|
|
if (sizeof(x) == 4) { \
|
|
|
|
_BitScanReverse(&bit, (unsigned)x); \
|
|
|
|
return (unsigned)bit; \
|
|
|
|
} \
|
|
|
|
if (sizeof(x) == 8 && sizeof(void *) == 8) { \
|
|
|
|
DO_BSR64(bit, x); \
|
|
|
|
return (unsigned)bit; \
|
|
|
|
} \
|
|
|
|
if (sizeof(x) == 8 && sizeof(void *) == 4) { \
|
|
|
|
/* Dodge a compiler warning, as above. */ \
|
|
|
|
int constant_32 = sizeof(x) * 4; \
|
|
|
|
if (_BitScanReverse(&bit, \
|
|
|
|
(unsigned)(x >> constant_32))) { \
|
|
|
|
return 32 + (unsigned)bit; \
|
|
|
|
} else { \
|
|
|
|
_BitScanReverse(&bit, (unsigned)x); \
|
|
|
|
return (unsigned)bit; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
unreachable(); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_llu(unsigned long long x) {
|
|
|
|
DO_FLS(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_lu(unsigned long x) {
|
|
|
|
DO_FLS(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_u(unsigned x) {
|
|
|
|
DO_FLS(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef DO_FLS
|
|
|
|
#undef DO_BSR64
|
|
|
|
#else
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_llu(unsigned long long x) {
|
|
|
|
return fls_llu_slow(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_lu(unsigned long x) {
|
|
|
|
return fls_lu_slow(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
fls_u(unsigned x) {
|
|
|
|
return fls_u_slow(x);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-12-03 02:04:32 +08:00
|
|
|
#if LG_SIZEOF_LONG_LONG > 3
|
|
|
|
# error "Haven't implemented popcount for 16-byte ints."
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define DO_POPCOUNT(x, type) do { \
|
|
|
|
/* \
|
|
|
|
* Algorithm from an old AMD optimization reference manual. \
|
|
|
|
* We're putting a little bit more work than you might expect \
|
|
|
|
* into the no-instrinsic case, since we only support the \
|
|
|
|
* GCC intrinsics spelling of popcount (for now). Detecting \
|
|
|
|
* whether or not the popcount builtin is actually useable in \
|
|
|
|
* MSVC is nontrivial. \
|
|
|
|
*/ \
|
|
|
|
\
|
|
|
|
type bmul = (type)0x0101010101010101ULL; \
|
|
|
|
\
|
|
|
|
/* \
|
|
|
|
* Replace each 2 bits with the sideways sum of the original \
|
|
|
|
* values. 0x5 = 0b0101. \
|
|
|
|
* \
|
|
|
|
* You might expect this to be: \
|
|
|
|
* x = (x & 0x55...) + ((x >> 1) & 0x55...). \
|
|
|
|
* That costs an extra mask relative to this, though. \
|
|
|
|
*/ \
|
|
|
|
x = x - ((x >> 1) & (0x55U * bmul)); \
|
|
|
|
/* Replace each 4 bits with their sideays sum. 0x3 = 0b0011. */\
|
|
|
|
x = (x & (bmul * 0x33U)) + ((x >> 2) & (bmul * 0x33U)); \
|
|
|
|
/* \
|
|
|
|
* Replace each 8 bits with their sideways sum. Note that we \
|
|
|
|
* can't overflow within each 4-bit sum here, so we can skip \
|
|
|
|
* the initial mask. \
|
|
|
|
*/ \
|
|
|
|
x = (x + (x >> 4)) & (bmul * 0x0FU); \
|
|
|
|
/* \
|
|
|
|
* None of the partial sums in this multiplication (viewed in \
|
|
|
|
* base-256) can overflow into the next digit. So the least \
|
|
|
|
* significant byte of the product will be the least \
|
|
|
|
* significant byte of the original value, the second least \
|
|
|
|
* significant byte will be the sum of the two least \
|
|
|
|
* significant bytes of the original value, and so on. \
|
|
|
|
* Importantly, the high byte will be the byte-wise sum of all \
|
|
|
|
* the bytes of the original value. \
|
|
|
|
*/ \
|
|
|
|
x = x * bmul; \
|
|
|
|
x >>= ((sizeof(x) - 1) * 8); \
|
|
|
|
return (unsigned)x; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
popcount_u_slow(unsigned bitmap) {
|
|
|
|
DO_POPCOUNT(bitmap, unsigned);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
popcount_lu_slow(unsigned long bitmap) {
|
|
|
|
DO_POPCOUNT(bitmap, unsigned long);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
popcount_llu_slow(unsigned long long bitmap) {
|
|
|
|
DO_POPCOUNT(bitmap, unsigned long long);
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef DO_POPCOUNT
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
popcount_u(unsigned bitmap) {
|
|
|
|
#ifdef JEMALLOC_INTERNAL_POPCOUNT
|
|
|
|
return JEMALLOC_INTERNAL_POPCOUNT(bitmap);
|
|
|
|
#else
|
|
|
|
return popcount_u_slow(bitmap);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2018-10-30 07:01:09 +08:00
|
|
|
popcount_lu(unsigned long bitmap) {
|
2020-12-03 02:04:32 +08:00
|
|
|
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
|
|
|
|
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
|
|
|
|
#else
|
|
|
|
return popcount_lu_slow(bitmap);
|
|
|
|
#endif
|
2018-10-30 07:01:09 +08:00
|
|
|
}
|
2020-12-03 02:04:32 +08:00
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
popcount_llu(unsigned long long bitmap) {
|
|
|
|
#ifdef JEMALLOC_INTERNAL_POPCOUNTLL
|
|
|
|
return JEMALLOC_INTERNAL_POPCOUNTLL(bitmap);
|
|
|
|
#else
|
|
|
|
return popcount_llu_slow(bitmap);
|
2018-10-30 07:01:09 +08:00
|
|
|
#endif
|
2020-12-03 02:04:32 +08:00
|
|
|
}
|
2018-10-30 07:01:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Clears first unset bit in bitmap, and returns
|
|
|
|
* place of bit. bitmap *must not* be 0.
|
|
|
|
*/
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline size_t
|
2018-10-30 07:01:09 +08:00
|
|
|
cfs_lu(unsigned long* bitmap) {
|
2020-07-22 23:07:12 +08:00
|
|
|
util_assume(*bitmap != 0);
|
|
|
|
size_t bit = ffs_lu(*bitmap);
|
2018-10-30 07:01:09 +08:00
|
|
|
*bitmap ^= ZU(1) << bit;
|
|
|
|
return bit;
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
|
|
|
ffs_zu(size_t x) {
|
2016-02-25 06:01:47 +08:00
|
|
|
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_u(x);
|
2016-02-25 06:01:47 +08:00
|
|
|
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_lu(x);
|
2016-02-25 06:01:47 +08:00
|
|
|
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_llu(x);
|
2016-02-25 02:32:45 +08:00
|
|
|
#else
|
|
|
|
#error No implementation for size_t ffs()
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
|
|
|
fls_zu(size_t x) {
|
|
|
|
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
|
|
|
return fls_u(x);
|
|
|
|
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
|
|
|
|
return fls_lu(x);
|
|
|
|
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
|
|
|
|
return fls_llu(x);
|
|
|
|
#else
|
|
|
|
#error No implementation for size_t fls()
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
ffs_u64(uint64_t x) {
|
2016-02-25 02:32:45 +08:00
|
|
|
#if LG_SIZEOF_LONG == 3
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_lu(x);
|
2016-02-25 02:32:45 +08:00
|
|
|
#elif LG_SIZEOF_LONG_LONG == 3
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_llu(x);
|
2016-02-25 02:32:45 +08:00
|
|
|
#else
|
|
|
|
#error No implementation for 64-bit ffs()
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
|
|
|
fls_u64(uint64_t x) {
|
|
|
|
#if LG_SIZEOF_LONG == 3
|
|
|
|
return fls_lu(x);
|
|
|
|
#elif LG_SIZEOF_LONG_LONG == 3
|
|
|
|
return fls_llu(x);
|
|
|
|
#else
|
|
|
|
#error No implementation for 64-bit fls()
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
ffs_u32(uint32_t x) {
|
2016-02-25 02:32:45 +08:00
|
|
|
#if LG_SIZEOF_INT == 2
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_u(x);
|
2016-02-25 02:32:45 +08:00
|
|
|
#else
|
|
|
|
#error No implementation for 32-bit ffs()
|
|
|
|
#endif
|
2020-07-22 22:10:06 +08:00
|
|
|
return ffs_u(x);
|
2016-02-25 02:32:45 +08:00
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
|
|
|
fls_u32(uint32_t x) {
|
|
|
|
#if LG_SIZEOF_INT == 2
|
|
|
|
return fls_u(x);
|
|
|
|
#else
|
|
|
|
#error No implementation for 32-bit fls()
|
|
|
|
#endif
|
|
|
|
return fls_u(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t
|
2017-01-16 08:56:30 +08:00
|
|
|
pow2_ceil_u64(uint64_t x) {
|
2020-07-22 22:10:06 +08:00
|
|
|
if (unlikely(x <= 1)) {
|
2018-08-11 11:27:35 +08:00
|
|
|
return x;
|
|
|
|
}
|
2020-07-22 22:10:06 +08:00
|
|
|
size_t msb_on_index = fls_u64(x - 1);
|
|
|
|
/*
|
|
|
|
* Range-check; it's on the callers to ensure that the result of this
|
|
|
|
* call won't overflow.
|
|
|
|
*/
|
2018-08-11 11:27:35 +08:00
|
|
|
assert(msb_on_index < 63);
|
|
|
|
return 1ULL << (msb_on_index + 1);
|
2012-03-07 06:57:45 +08:00
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline uint32_t
|
2017-01-16 08:56:30 +08:00
|
|
|
pow2_ceil_u32(uint32_t x) {
|
2020-07-22 22:10:06 +08:00
|
|
|
if (unlikely(x <= 1)) {
|
|
|
|
return x;
|
2018-08-11 11:27:35 +08:00
|
|
|
}
|
2020-07-22 22:10:06 +08:00
|
|
|
size_t msb_on_index = fls_u32(x - 1);
|
|
|
|
/* As above. */
|
2018-08-11 11:27:35 +08:00
|
|
|
assert(msb_on_index < 31);
|
|
|
|
return 1U << (msb_on_index + 1);
|
2016-02-10 08:28:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Compute the smallest power of 2 that is >= x. */
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline size_t
|
2017-01-16 08:56:30 +08:00
|
|
|
pow2_ceil_zu(size_t x) {
|
2016-02-10 08:28:40 +08:00
|
|
|
#if (LG_SIZEOF_PTR == 3)
|
2017-01-20 10:15:45 +08:00
|
|
|
return pow2_ceil_u64(x);
|
2016-02-10 08:28:40 +08:00
|
|
|
#else
|
2017-01-20 10:15:45 +08:00
|
|
|
return pow2_ceil_u32(x);
|
2016-02-10 08:28:40 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2017-01-16 08:56:30 +08:00
|
|
|
lg_floor(size_t x) {
|
2020-07-22 22:10:06 +08:00
|
|
|
util_assume(x != 0);
|
2014-09-22 22:54:33 +08:00
|
|
|
#if (LG_SIZEOF_PTR == 3)
|
2020-07-22 22:10:06 +08:00
|
|
|
return fls_u64(x);
|
2014-05-29 07:11:55 +08:00
|
|
|
#else
|
2020-07-22 22:10:06 +08:00
|
|
|
return fls_u32(x);
|
2014-05-29 07:11:55 +08:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-07-22 22:10:06 +08:00
|
|
|
static inline unsigned
|
2017-12-23 07:14:44 +08:00
|
|
|
lg_ceil(size_t x) {
|
|
|
|
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* A compile-time version of lg_floor and lg_ceil. */
|
|
|
|
#define LG_FLOOR_1(x) 0
|
|
|
|
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
|
|
|
|
#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
|
|
|
|
#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
|
|
|
|
#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
|
|
|
|
#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
|
|
|
|
#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
|
|
|
|
#if LG_SIZEOF_PTR == 2
|
|
|
|
# define LG_FLOOR(x) LG_FLOOR_32((x))
|
|
|
|
#else
|
|
|
|
# define LG_FLOOR(x) LG_FLOOR_64((x))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
|
2017-12-15 04:46:39 +08:00
|
|
|
|
2017-03-04 02:10:08 +08:00
|
|
|
#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
|