From 5d6cb6eb66b05261cccd2b416f50ad98d1735229 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 7 Nov 2016 10:52:44 -0800 Subject: [PATCH] Refactor prng to not use 64-bit atomics on 32-bit platforms. This resolves #495. --- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/private_symbols.txt | 11 +- include/jemalloc/internal/prng.h | 152 +++++++++++- src/arena.c | 7 +- src/ckh.c | 5 +- src/prof.c | 2 +- test/unit/prng.c | 219 +++++++++++++++++- 7 files changed, 365 insertions(+), 33 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1277d080..f39ce54b 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -370,7 +370,7 @@ struct arena_s { * PRNG state for cache index randomization of large allocation base * pointers. */ - uint64_t offset_state; + size_t offset_state; dss_prec_t dss_prec; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 8972b37b..87c8c9b7 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -405,8 +405,15 @@ pind2sz_tab pow2_ceil_u32 pow2_ceil_u64 pow2_ceil_zu -prng_lg_range -prng_range +prng_lg_range_u32 +prng_lg_range_u64 +prng_lg_range_zu +prng_range_u32 +prng_range_u64 +prng_range_zu +prng_state_next_u32 +prng_state_next_u64 +prng_state_next_zu prof_active prof_active_get prof_active_get_unlocked diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 5830f8b7..c2bda19c 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -19,8 +19,12 @@ * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. */ -#define PRNG_A UINT64_C(6364136223846793005) -#define PRNG_C UINT64_C(1442695040888963407) + +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -35,28 +39,133 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -uint64_t prng_lg_range(uint64_t *state, unsigned lg_range); -uint64_t prng_range(uint64_t *state, uint64_t range); +uint32_t prng_state_next_u32(uint32_t state); +uint64_t prng_state_next_u64(uint64_t state); +size_t prng_state_next_zu(size_t state); + +uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range, + bool atomic); +uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range); +size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic); + +uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic); +uint64_t prng_range_u64(uint64_t *state, uint64_t range); +size_t prng_range_zu(size_t *state, size_t range, bool atomic); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) -JEMALLOC_ALWAYS_INLINE uint64_t -prng_lg_range(uint64_t *state, unsigned lg_range) +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) { - uint64_t ret; + + return ((state * PRNG_A_32) + PRNG_C_32); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) +{ + + return ((state * PRNG_A_64) + PRNG_C_64); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) +{ + +#if LG_SIZEOF_PTR == 2 + return ((state * PRNG_A_32) + PRNG_C_32); +#elif LG_SIZEOF_PTR == 3 + return ((state * PRNG_A_64) + PRNG_C_64); +#else +#error Unsupported pointer size +#endif +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) +{ + uint32_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + if (atomic) { + uint32_t state0; + + do { + state0 = atomic_read_uint32(state); + state1 = prng_state_next_u32(state0); + } while (atomic_cas_uint32(state, state0, state1)); + } else { + state1 = prng_state_next_u32(*state); + *state = state1; + } + ret = state1 >> (32 - lg_range); + + return (ret); +} + +/* 64-bit atomic operations cannot be supported on all relevant platforms. */ +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) +{ + uint64_t ret, state1; assert(lg_range > 0); assert(lg_range <= 64); - ret = (*state * PRNG_A) + PRNG_C; - *state = ret; - ret >>= (64 - lg_range); + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) +{ + size_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + if (atomic) { + size_t state0; + + do { + state0 = atomic_read_z(state); + state1 = prng_state_next_zu(state0); + } while (atomic_cas_z(state, state0, state1)); + } else { + state1 = prng_state_next_zu(*state); + *state = state1; + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(uint32_t *state, uint32_t range, bool atomic) +{ + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); return (ret); } JEMALLOC_ALWAYS_INLINE uint64_t -prng_range(uint64_t *state, uint64_t range) +prng_range_u64(uint64_t *state, uint64_t range) { uint64_t ret; unsigned lg_range; @@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range) /* Generate a result in [0..range) via repeated trial. */ do { - ret = prng_lg_range(state, lg_range); + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(size_t *state, size_t range, bool atomic) +{ + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); } while (ret >= range); return (ret); diff --git a/src/arena.c b/src/arena.c index 49f04931..e196b133 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1199,7 +1199,7 @@ arena_decay_deadline_init(arena_t *arena) if (arena->decay.time > 0) { nstime_t jitter; - nstime_init(&jitter, prng_range(&arena->decay.jitter_state, + nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state, nstime_ns(&arena->decay.interval))); nstime_add(&arena->decay.deadline, &jitter); } @@ -2565,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE); + r = prng_lg_range_zu(&arena->offset_state, LG_PAGE - + LG_CACHELINE, false); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; @@ -3503,7 +3504,7 @@ arena_new(tsdn_t *tsdn, unsigned ind) * deterministic seed. */ arena->offset_state = config_debug ? ind : - (uint64_t)(uintptr_t)arena; + (size_t)(uintptr_t)arena; } arena->dss_prec = chunk_dss_prec_get(); diff --git a/src/ckh.c b/src/ckh.c index 3be671c3..159bd8ae 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); + offset = (unsigned)prng_lg_range_u64(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - i = (unsigned)prng_lg_range(&ckh->prng_state, + i = (unsigned)prng_lg_range_u64(&ckh->prng_state, LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); diff --git a/src/prof.c b/src/prof.c index 140d5b22..c89dade1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -874,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - r = prng_lg_range(&tdata->prng_state, 53); + r = prng_lg_range_u64(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) diff --git a/test/unit/prng.c b/test/unit/prng.c index b22bd2f5..80c9d733 100644 --- a/test/unit/prng.c +++ b/test/unit/prng.c @@ -1,33 +1,71 @@ #include "test/jemalloc_test.h" -TEST_BEGIN(test_prng_lg_range) +static void +test_prng_lg_range_u32(bool atomic) +{ + uint32_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + sa = 42; + rb = prng_lg_range_u32(&sa, 32, atomic); + assert_u32_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range_u32(&sb, 32, atomic); + assert_u32_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + rb = prng_lg_range_u32(&sa, 32, atomic); + assert_u32_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range_u32(&sa, 32, atomic); + for (lg_range = 31; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range_u32(&sb, lg_range, atomic); + assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u32_eq(rb, (ra >> (32 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} + +static void +test_prng_lg_range_u64(void) { uint64_t sa, sb, ra, rb; unsigned lg_range; sa = 42; - ra = prng_lg_range(&sa, 64); + ra = prng_lg_range_u64(&sa, 64); sa = 42; - rb = prng_lg_range(&sa, 64); + rb = prng_lg_range_u64(&sa, 64); assert_u64_eq(ra, rb, "Repeated generation should produce repeated results"); sb = 42; - rb = prng_lg_range(&sb, 64); + rb = prng_lg_range_u64(&sb, 64); assert_u64_eq(ra, rb, "Equivalent generation should produce equivalent results"); sa = 42; - ra = prng_lg_range(&sa, 64); - rb = prng_lg_range(&sa, 64); + ra = prng_lg_range_u64(&sa, 64); + rb = prng_lg_range_u64(&sa, 64); assert_u64_ne(ra, rb, "Full-width results must not immediately repeat"); sa = 42; - ra = prng_lg_range(&sa, 64); + ra = prng_lg_range_u64(&sa, 64); for (lg_range = 63; lg_range > 0; lg_range--) { sb = 42; - rb = prng_lg_range(&sb, lg_range); + rb = prng_lg_range_u64(&sb, lg_range); assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), 0, "High order bits should be 0, lg_range=%u", lg_range); assert_u64_eq(rb, (ra >> (64 - lg_range)), @@ -35,9 +73,103 @@ TEST_BEGIN(test_prng_lg_range) "lg_range=%u", lg_range); } } + +static void +test_prng_lg_range_zu(bool atomic) +{ + size_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + sa = 42; + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + assert_zu_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic); + for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0; + lg_range--) { + sb = 42; + rb = prng_lg_range_zu(&sb, lg_range, atomic); + assert_zu_eq((rb & (SIZE_T_MAX << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - + lg_range)), "Expected high order bits of full-width " + "result, lg_range=%u", lg_range); + } +} + +TEST_BEGIN(test_prng_lg_range_u32_nonatomic) +{ + + test_prng_lg_range_u32(false); +} TEST_END -TEST_BEGIN(test_prng_range) +TEST_BEGIN(test_prng_lg_range_u32_atomic) +{ + + test_prng_lg_range_u32(true); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_u64_nonatomic) +{ + + test_prng_lg_range_u64(); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_zu_nonatomic) +{ + + test_prng_lg_range_zu(false); +} +TEST_END + +TEST_BEGIN(test_prng_lg_range_zu_atomic) +{ + + test_prng_lg_range_zu(true); +} +TEST_END + +static void +test_prng_range_u32(bool atomic) +{ + uint32_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint32_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint32_t r = prng_range_u32(&s, range, atomic); + + assert_u32_lt(r, range, "Out of range"); + } + } +} + +static void +test_prng_range_u64(void) { uint64_t range; #define MAX_RANGE 10000000 @@ -50,12 +182,67 @@ TEST_BEGIN(test_prng_range) s = range; for (rep = 0; rep < NREPS; rep++) { - uint64_t r = prng_range(&s, range); + uint64_t r = prng_range_u64(&s, range); assert_u64_lt(r, range, "Out of range"); } } } + +static void +test_prng_range_zu(bool atomic) +{ + size_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + size_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + size_t r = prng_range_zu(&s, range, atomic); + + assert_zu_lt(r, range, "Out of range"); + } + } +} + +TEST_BEGIN(test_prng_range_u32_nonatomic) +{ + + test_prng_range_u32(false); +} +TEST_END + +TEST_BEGIN(test_prng_range_u32_atomic) +{ + + test_prng_range_u32(true); +} +TEST_END + +TEST_BEGIN(test_prng_range_u64_nonatomic) +{ + + test_prng_range_u64(); +} +TEST_END + +TEST_BEGIN(test_prng_range_zu_nonatomic) +{ + + test_prng_range_zu(false); +} +TEST_END + +TEST_BEGIN(test_prng_range_zu_atomic) +{ + + test_prng_range_zu(true); +} TEST_END int @@ -63,6 +250,14 @@ main(void) { return (test( - test_prng_lg_range, - test_prng_range)); + test_prng_lg_range_u32_nonatomic, + test_prng_lg_range_u32_atomic, + test_prng_lg_range_u64_nonatomic, + test_prng_lg_range_zu_nonatomic, + test_prng_lg_range_zu_atomic, + test_prng_range_u32_nonatomic, + test_prng_range_u32_atomic, + test_prng_range_u64_nonatomic, + test_prng_range_zu_nonatomic, + test_prng_range_zu_atomic)); }