diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index a309e962..14542aa1 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -1,7 +1,6 @@ #ifndef JEMALLOC_INTERNAL_PRNG_H #define JEMALLOC_INTERNAL_PRNG_H -#include "jemalloc/internal/atomic.h" #include "jemalloc/internal/bit_util.h" /* @@ -59,66 +58,38 @@ prng_state_next_zu(size_t state) { /* * The prng_lg_range functions give a uniform int in the half-open range [0, - * 2**lg_range). If atomic is true, they do so safely from multiple threads. - * Multithreaded 64-bit prngs aren't supported. + * 2**lg_range). */ JEMALLOC_ALWAYS_INLINE uint32_t -prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) { - uint32_t ret, state0, state1; - +prng_lg_range_u32(uint32_t *state, unsigned lg_range) { assert(lg_range > 0); assert(lg_range <= 32); - state0 = atomic_load_u32(state, ATOMIC_RELAXED); - - if (atomic) { - do { - state1 = prng_state_next_u32(state0); - } while (!atomic_compare_exchange_weak_u32(state, &state0, - state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); - } else { - state1 = prng_state_next_u32(state0); - atomic_store_u32(state, state1, ATOMIC_RELAXED); - } - ret = state1 >> (32 - lg_range); + *state = prng_state_next_u32(*state); + uint32_t ret = *state >> (32 - lg_range); return ret; } JEMALLOC_ALWAYS_INLINE uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range) { - uint64_t ret, state1; - assert(lg_range > 0); assert(lg_range <= 64); - state1 = prng_state_next_u64(*state); - *state = state1; - ret = state1 >> (64 - lg_range); + *state = prng_state_next_u64(*state); + uint64_t ret = *state >> (64 - lg_range); return ret; } JEMALLOC_ALWAYS_INLINE size_t -prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) { - size_t ret, state0, state1; - +prng_lg_range_zu(size_t *state, unsigned lg_range) { assert(lg_range > 0); assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); - state0 = atomic_load_zu(state, ATOMIC_RELAXED); - - if (atomic) { - do { - state1 = prng_state_next_zu(state0); - } while (atomic_compare_exchange_weak_zu(state, &state0, - state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); - } else { - state1 = prng_state_next_zu(state0); - atomic_store_zu(state, state1, ATOMIC_RELAXED); - } - ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + *state = prng_state_next_zu(*state); + size_t ret = *state >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); return ret; } @@ -129,20 +100,24 @@ prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) { */ JEMALLOC_ALWAYS_INLINE uint32_t -prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) { - uint32_t ret; - unsigned lg_range; - +prng_range_u32(uint32_t *state, uint32_t range) { + assert(range != 0); + /* + * If range were 1, lg_range would be 0, so the shift in + * prng_lg_range_u32 would be a shift of a 32-bit variable by 32 bits, + * which is UB. Just handle this case as a one-off. + */ if (range == 1) { return 0; } /* Compute the ceiling of lg(range). */ - lg_range = ffs_u32(pow2_ceil_u32(range)); + unsigned lg_range = ffs_u32(pow2_ceil_u32(range)); /* Generate a result in [0..range) via repeated trial. */ + uint32_t ret; do { - ret = prng_lg_range_u32(state, lg_range, atomic); + ret = prng_lg_range_u32(state, lg_range); } while (ret >= range); return ret; @@ -150,17 +125,18 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) { JEMALLOC_ALWAYS_INLINE uint64_t prng_range_u64(uint64_t *state, uint64_t range) { - uint64_t ret; - unsigned lg_range; + assert(range != 0); + /* See the note in prng_range_u32. */ if (range == 1) { return 0; } /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)); + unsigned lg_range = ffs_u64(pow2_ceil_u64(range)); /* Generate a result in [0..range) via repeated trial. */ + uint64_t ret; do { ret = prng_lg_range_u64(state, lg_range); } while (ret >= range); @@ -169,20 +145,21 @@ prng_range_u64(uint64_t *state, uint64_t range) { } JEMALLOC_ALWAYS_INLINE size_t -prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) { - size_t ret; - unsigned lg_range; +prng_range_zu(size_t *state, size_t range) { + assert(range != 0); + /* See the note in prng_range_u32. */ if (range == 1) { return 0; } /* Compute the ceiling of lg(range). */ - lg_range = ffs_u64(pow2_ceil_u64(range)); + unsigned lg_range = ffs_u64(pow2_ceil_u64(range)); /* Generate a result in [0..range) via repeated trial. */ + size_t ret; do { - ret = prng_lg_range_zu(state, lg_range, atomic); + ret = prng_lg_range_zu(state, lg_range); } while (ret >= range); return ret; diff --git a/test/unit/prng.c b/test/unit/prng.c index 915b3504..baf43d96 100644 --- a/test/unit/prng.c +++ b/test/unit/prng.c @@ -1,34 +1,34 @@ #include "test/jemalloc_test.h" static void -test_prng_lg_range_u32(bool atomic) { - atomic_u32_t sa, sb; +test_prng_lg_range_u32() { + uint32_t sa, sb; uint32_t ra, rb; unsigned lg_range; - atomic_store_u32(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_u32(&sa, 32, atomic); - atomic_store_u32(&sa, 42, ATOMIC_RELAXED); - rb = prng_lg_range_u32(&sa, 32, atomic); + sa = 42; + ra = prng_lg_range_u32(&sa, 32); + sa = 42; + rb = prng_lg_range_u32(&sa, 32); expect_u32_eq(ra, rb, "Repeated generation should produce repeated results"); - atomic_store_u32(&sb, 42, ATOMIC_RELAXED); - rb = prng_lg_range_u32(&sb, 32, atomic); + sb = 42; + rb = prng_lg_range_u32(&sb, 32); expect_u32_eq(ra, rb, "Equivalent generation should produce equivalent results"); - atomic_store_u32(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_u32(&sa, 32, atomic); - rb = prng_lg_range_u32(&sa, 32, atomic); + sa = 42; + ra = prng_lg_range_u32(&sa, 32); + rb = prng_lg_range_u32(&sa, 32); expect_u32_ne(ra, rb, "Full-width results must not immediately repeat"); - atomic_store_u32(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_u32(&sa, 32, atomic); + sa = 42; + ra = prng_lg_range_u32(&sa, 32); for (lg_range = 31; lg_range > 0; lg_range--) { - atomic_store_u32(&sb, 42, ATOMIC_RELAXED); - rb = prng_lg_range_u32(&sb, lg_range, atomic); + sb = 42; + rb = prng_lg_range_u32(&sb, lg_range); expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), 0, "High order bits should be 0, lg_range=%u", lg_range); expect_u32_eq(rb, (ra >> (32 - lg_range)), @@ -74,35 +74,35 @@ test_prng_lg_range_u64(void) { } static void -test_prng_lg_range_zu(bool atomic) { - atomic_zu_t sa, sb; +test_prng_lg_range_zu() { + size_t sa, sb; size_t ra, rb; unsigned lg_range; - atomic_store_zu(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - atomic_store_zu(&sa, 42, ATOMIC_RELAXED); - rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR)); + sa = 42; + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR)); expect_zu_eq(ra, rb, "Repeated generation should produce repeated results"); - atomic_store_zu(&sb, 42, ATOMIC_RELAXED); - rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sb = 42; + rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR)); expect_zu_eq(ra, rb, "Equivalent generation should produce equivalent results"); - atomic_store_zu(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); - rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR)); + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR)); expect_zu_ne(ra, rb, "Full-width results must not immediately repeat"); - atomic_store_zu(&sa, 42, ATOMIC_RELAXED); - ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR)); for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0; lg_range--) { - atomic_store_zu(&sb, 42, ATOMIC_RELAXED); - rb = prng_lg_range_zu(&sb, lg_range, atomic); + sb = 42; + rb = prng_lg_range_zu(&sb, lg_range); expect_zu_eq((rb & (SIZE_T_MAX << lg_range)), 0, "High order bits should be 0, lg_range=%u", lg_range); expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - @@ -112,12 +112,12 @@ test_prng_lg_range_zu(bool atomic) { } TEST_BEGIN(test_prng_lg_range_u32_nonatomic) { - test_prng_lg_range_u32(false); + test_prng_lg_range_u32(); } TEST_END TEST_BEGIN(test_prng_lg_range_u32_atomic) { - test_prng_lg_range_u32(true); + test_prng_lg_range_u32(); } TEST_END @@ -127,29 +127,29 @@ TEST_BEGIN(test_prng_lg_range_u64_nonatomic) { TEST_END TEST_BEGIN(test_prng_lg_range_zu_nonatomic) { - test_prng_lg_range_zu(false); + test_prng_lg_range_zu(); } TEST_END TEST_BEGIN(test_prng_lg_range_zu_atomic) { - test_prng_lg_range_zu(true); + test_prng_lg_range_zu(); } TEST_END static void -test_prng_range_u32(bool atomic) { +test_prng_range_u32() { uint32_t range; #define MAX_RANGE 10000000 #define RANGE_STEP 97 #define NREPS 10 for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { - atomic_u32_t s; + uint32_t s; unsigned rep; - atomic_store_u32(&s, range, ATOMIC_RELAXED); + s = range; for (rep = 0; rep < NREPS; rep++) { - uint32_t r = prng_range_u32(&s, range, atomic); + uint32_t r = prng_range_u32(&s, range); expect_u32_lt(r, range, "Out of range"); } @@ -177,19 +177,19 @@ test_prng_range_u64(void) { } static void -test_prng_range_zu(bool atomic) { +test_prng_range_zu() { size_t range; #define MAX_RANGE 10000000 #define RANGE_STEP 97 #define NREPS 10 for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { - atomic_zu_t s; + size_t s; unsigned rep; - atomic_store_zu(&s, range, ATOMIC_RELAXED); + s = range; for (rep = 0; rep < NREPS; rep++) { - size_t r = prng_range_zu(&s, range, atomic); + size_t r = prng_range_zu(&s, range); expect_zu_lt(r, range, "Out of range"); }