From 21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Tue, 19 Dec 2017 17:30:50 -0800 Subject: [PATCH] Add the div module, which allows fast division by dynamic values. --- Makefile.in | 2 ++ include/jemalloc/internal/div.h | 41 ++++++++++++++++++++++++ src/div.c | 55 +++++++++++++++++++++++++++++++++ src/sz.c | 3 +- test/unit/div.c | 29 +++++++++++++++++ 5 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 include/jemalloc/internal/div.h create mode 100644 src/div.c create mode 100644 test/unit/div.c diff --git a/Makefile.in b/Makefile.in index 2f0b3b24..96c4ae03 100644 --- a/Makefile.in +++ b/Makefile.in @@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/bitmap.c \ $(srcroot)src/ckh.c \ $(srcroot)src/ctl.c \ + $(srcroot)src/div.c \ $(srcroot)src/extent.c \ $(srcroot)src/extent_dss.c \ $(srcroot)src/extent_mmap.c \ @@ -165,6 +166,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/decay.c \ + $(srcroot)test/unit/div.c \ $(srcroot)test/unit/extent_quantize.c \ $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h new file mode 100644 index 00000000..aebae939 --- /dev/null +++ b/include/jemalloc/internal/div.h @@ -0,0 +1,41 @@ +#ifndef JEMALLOC_INTERNAL_DIV_H +#define JEMALLOC_INTERNAL_DIV_H + +#include "jemalloc/internal/assert.h" + +/* + * This module does the division that computes the index of a region in a slab, + * given its offset relative to the base. + * That is, given a divisor d, an n = i * d (all integers), we'll return i. + * We do some pre-computation to do this more quickly than a CPU division + * instruction. + * We bound n < 2^32, and don't support dividing by one. + */ + +typedef struct div_info_s div_info_t; +struct div_info_s { + uint32_t magic; +#ifdef JEMALLOC_DEBUG + size_t d; +#endif +}; + +void div_init(div_info_t *div_info, size_t divisor); + +static inline size_t +div_compute(div_info_t *div_info, size_t n) { + assert(n <= (uint32_t)-1); + /* + * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, + * the compilers I tried were all smart enough to turn this into the + * appropriate "get the high 32 bits of the result of a multiply" (e.g. + * mul; mov edx eax; on x86, umull on arm, etc.). + */ + size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; +#ifdef JEMALLOC_DEBUG + assert(i * div_info->d == n); +#endif + return i; +} + +#endif /* JEMALLOC_INTERNAL_DIV_H */ diff --git a/src/div.c b/src/div.c new file mode 100644 index 00000000..808892a1 --- /dev/null +++ b/src/div.c @@ -0,0 +1,55 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/div.h" + +#include "jemalloc/internal/assert.h" + +/* + * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d. + * + * For any k, we have (here, all division is exact; not C-style rounding): + * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where + * r = (-2^k) mod d. + * + * Expanding this out: + * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k) + * = floor(n / d + (r / d) * (n / 2^k)). + * + * The fractional part of n / d is 0 (because of the assumption that d divides n + * exactly), so we have: + * ... = n / d + floor((r / d) * (n / 2^k)) + * + * So that our initial expression is equal to the quantity we seek, so long as + * (r / d) * (n / 2^k) < 1. + * + * r is a remainder mod d, so r < d and r / d < 1 always. We can make + * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works. + */ + +void +div_init(div_info_t *div_info, size_t d) { + /* Nonsensical. */ + assert(d != 0); + /* + * This would make the value of magic too high to fit into a uint32_t + * (we would want magic = 2^32 exactly). This would mess with code gen + * on 32-bit machines. + */ + assert(d != 1); + + uint64_t two_to_k = ((uint64_t)1 << 32); + uint32_t magic = (uint32_t)(two_to_k / d); + + /* + * We want magic = ceil(2^k / d), but C gives us floor. We have to + * increment it unless the result was exact (i.e. unless d is a power of + * two). + */ + if (two_to_k % d != 0) { + magic++; + } + div_info->magic = magic; +#ifdef JEMALLOC_DEBUG + div_info->d = d; +#endif +} diff --git a/src/sz.c b/src/sz.c index 0986615f..9de77e45 100644 --- a/src/sz.c +++ b/src/sz.c @@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = { JEMALLOC_ALIGNED(CACHELINE) const uint8_t sz_size2index_tab[] = { #if LG_TINY_MIN == 0 -#warning "Dangerous LG_TINY_MIN" +/* The div module doesn't support division by 1. */ +#error "Unsupported LG_TINY_MIN" #define S2B_0(i) i, #elif LG_TINY_MIN == 1 #warning "Dangerous LG_TINY_MIN" diff --git a/test/unit/div.c b/test/unit/div.c new file mode 100644 index 00000000..b47f10b2 --- /dev/null +++ b/test/unit/div.c @@ -0,0 +1,29 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/div.h" + +TEST_BEGIN(test_div_exhaustive) { + for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) { + div_info_t div_info; + div_init(&div_info, divisor); + size_t max = 1000 * divisor; + if (max < 1000 * 1000) { + max = 1000 * 1000; + } + for (size_t dividend = 0; dividend < 1000 * divisor; + dividend += divisor) { + size_t quotient = div_compute( + &div_info, dividend); + assert_zu_eq(dividend, quotient * divisor, + "With divisor = %zu, dividend = %zu, " + "got quotient %zu", divisor, dividend, quotient); + } + } +} +TEST_END + +int +main(void) { + return test_no_reentrancy( + test_div_exhaustive); +}