From 34676d33690f6cc6885ff769e537ca940aacf886 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Feb 2016 16:28:40 -0800 Subject: [PATCH] Refactor prng* from cpp macros into inline functions. Remove 32-bit variant, convert prng64() to prng_lg_range(), and add prng_range(). --- Makefile.in | 8 ++- include/jemalloc/internal/ckh.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- include/jemalloc/internal/private_symbols.txt | 4 +- include/jemalloc/internal/prng.h | 67 +++++++++++------- include/jemalloc/internal/util.h | 37 ++++++++-- src/arena.c | 4 +- src/ckh.c | 4 +- src/prng.c | 2 + src/prof.c | 3 +- test/unit/prng.c | 68 ++++++++++++++++++ test/unit/util.c | 69 ++++++++++++------- 12 files changed, 205 insertions(+), 69 deletions(-) create mode 100644 src/prng.c create mode 100644 test/unit/prng.c diff --git a/Makefile.in b/Makefile.in index f3c2e4bd..6b210fee 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,9 +84,10 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ - $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/ticker.c \ - $(srcroot)src/time.c $(srcroot)src/tsd.c $(srcroot)src/util.c + $(srcroot)src/prng.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c $(srcroot)src/time.c $(srcroot)src/tsd.c \ + $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -129,6 +130,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 75c1c979..45fb3455 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -40,9 +40,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; + uint64_t prng_state; /* Total number of items. */ size_t count; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 12d51be2..616eb9f3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -547,7 +547,7 @@ size2index_compute(size_t size) #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif @@ -644,7 +644,7 @@ s2u_compute(size_t size) #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 216367e5..d910202d 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -348,7 +348,9 @@ pages_map pages_purge pages_trim pages_unmap -pow2_ceil +pow2_ceil_u32 +pow2_ceil_u64 +pow2_ceil_zu prof_active_get prof_active_get_unlocked prof_active_set diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 216d0ef4..83c90906 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -18,31 +18,9 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - (lg_range)); \ -} while (false) - -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - (lg_range)); \ -} while (false) +#define PRNG_A UINT64_C(6364136223846793005) +#define PRNG_C UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -56,5 +34,46 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t prng_lg_range(uint64_t *state, unsigned lg_range); +uint64_t prng_range(uint64_t *state, uint64_t range); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range(uint64_t *state, unsigned lg_range) +{ + uint64_t ret; + + assert(lg_range > 0); + assert(lg_range <= 64); + + ret = (*state * PRNG_A) + PRNG_C; + *state = ret; + ret >>= (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = jemalloc_ffsl(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range(state, lg_range); + } while (ret >= range); + + return (ret); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 0bccea24..dfe5c93c 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -123,7 +123,9 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifndef JEMALLOC_ENABLE_INLINE int jemalloc_ffsl(long bitmap); int jemalloc_ffs(int bitmap); -size_t pow2_ceil(size_t x); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); size_t lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); @@ -150,9 +152,8 @@ jemalloc_ffs(int bitmap) return (JEMALLOC_INTERNAL_FFS(bitmap)); } -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { x--; @@ -161,13 +162,37 @@ pow2_ceil(size_t x) x |= x >> 4; x |= x >> 8; x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) x |= x >> 32; -#endif x++; return (x); } +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE size_t lg_floor(size_t x) diff --git a/src/arena.c b/src/arena.c index 143afb9a..aa787f99 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2196,9 +2196,7 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, - UINT64_C(6364136223846793009), - UINT64_C(1442695040888963409)); + r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; diff --git a/src/ckh.c b/src/ckh.c index e4328d22..08fc433d 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + offset = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + i = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); diff --git a/src/prng.c b/src/prng.c new file mode 100644 index 00000000..76646a2a --- /dev/null +++ b/src/prng.c @@ -0,0 +1,2 @@ +#define JEMALLOC_PRNG_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/prof.c b/src/prof.c index 199e63e4..31f5e601 100644 --- a/src/prof.c +++ b/src/prof.c @@ -871,8 +871,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), - UINT64_C(1442695040888963407)); + r = prng_lg_range(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) diff --git a/test/unit/prng.c b/test/unit/prng.c new file mode 100644 index 00000000..b22bd2f5 --- /dev/null +++ b/test/unit/prng.c @@ -0,0 +1,68 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_prng_lg_range) +{ + uint64_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range(&sa, 64); + sa = 42; + rb = prng_lg_range(&sa, 64); + assert_u64_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range(&sb, 64); + assert_u64_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + rb = prng_lg_range(&sa, 64); + assert_u64_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + for (lg_range = 63; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range(&sb, lg_range); + assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u64_eq(rb, (ra >> (64 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} +TEST_END + +TEST_BEGIN(test_prng_range) +{ + uint64_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint64_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint64_t r = prng_range(&s, range); + + assert_u64_lt(r, range, "Out of range"); + } + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_prng_lg_range, + test_prng_range)); +} diff --git a/test/unit/util.c b/test/unit/util.c index 8ab39a45..2f65aad2 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -1,33 +1,54 @@ #include "test/jemalloc_test.h" -TEST_BEGIN(test_pow2_ceil) +#define TEST_POW2_CEIL(t, suf, pri) do { \ + unsigned i, pow2; \ + t x; \ + \ + assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \ + \ + for (i = 0; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i, \ + "Unexpected result"); \ + } \ + \ + for (i = 2; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1), \ + ((t)1) << i, "Unexpected result"); \ + } \ + \ + for (i = 0; i < sizeof(t) * 8 - 1; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1), \ + ((t)1) << (i+1), "Unexpected result"); \ + } \ + \ + for (pow2 = 1; pow2 < 25; pow2++) { \ + for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \ + x++) { \ + assert_zu_eq(pow2_ceil_##suf(x), \ + ((t)1) << pow2, \ + "Unexpected result, x=%"pri, x); \ + } \ + } \ +} while (0) + +TEST_BEGIN(test_pow2_ceil_u64) { - unsigned i, pow2; - size_t x; - assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); + TEST_POW2_CEIL(uint64_t, u64, FMTu64); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_u32) +{ - for (i = 2; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, - "Unexpected result"); - } + TEST_POW2_CEIL(uint32_t, u32, FMTu32); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_zu) +{ - for (pow2 = 1; pow2 < 25; pow2++) { - for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { - assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, - "Unexpected result, x=%zu", x); - } - } + TEST_POW2_CEIL(size_t, zu, "zu"); } TEST_END @@ -286,7 +307,9 @@ main(void) { return (test( - test_pow2_ceil, + test_pow2_ceil_u64, + test_pow2_ceil_u32, + test_pow2_ceil_zu, test_malloc_strtoumax_no_endptr, test_malloc_strtoumax, test_malloc_snprintf_truncated,