Add the div module, which allows fast division by dynamic values.

2017-12-19 17:30:50 -08:00 · 2017-12-19 17:30:50 -08:00 · 21f7c13d0b
commit 21f7c13d0b
parent 7f1b02e3fa
5 changed files with 129 additions and 1 deletions
--- a/Makefile.in
+++ b/Makefile.in
@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
@ -165,6 +166,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
--- a/include/jemalloc/internal/div.h
+++ b/include/jemalloc/internal/div.h
@ -0,0 +1,41 @@
 #ifndef JEMALLOC_INTERNAL_DIV_H
 #define JEMALLOC_INTERNAL_DIV_H
 #include "jemalloc/internal/assert.h"
 /*
 * This module does the division that computes the index of a region in a slab,
 * given its offset relative to the base.
 * That is, given a divisor d, an n = i * d (all integers), we'll return i.
 * We do some pre-computation to do this more quickly than a CPU division
 * instruction.
 * We bound n < 2^32, and don't support dividing by one.
 */
 typedef struct div_info_s div_info_t;
 struct div_info_s {
 	uint32_t magic;
 #ifdef JEMALLOC_DEBUG
 	size_t d;
 #endif
 };
 void div_init(div_info_t *div_info, size_t divisor);
 static inline size_t
 div_compute(div_info_t *div_info, size_t n) {
 	assert(n <= (uint32_t)-1);
 	/*
 	 * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
 	 * the compilers I tried were all smart enough to turn this into the
 	 * appropriate "get the high 32 bits of the result of a multiply" (e.g.
 	 * mul; mov edx eax; on x86, umull on arm, etc.).
 	 */
 	size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
 #ifdef JEMALLOC_DEBUG
 	assert(i * div_info->d == n);
 #endif
 	return i;
 }
 #endif /* JEMALLOC_INTERNAL_DIV_H */
--- a/src/div.c
+++ b/src/div.c
@ -0,0 +1,55 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/assert.h"
 /*
 * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
 *
 * For any k, we have (here, all division is exact; not C-style rounding):
 * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
 * r = (-2^k) mod d.
 *
 * Expanding this out:
 * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
 *     = floor(n / d + (r / d) * (n / 2^k)).
 *
 * The fractional part of n / d is 0 (because of the assumption that d divides n
 * exactly), so we have:
 * ... = n / d + floor((r / d) * (n / 2^k))
 *
 * So that our initial expression is equal to the quantity we seek, so long as
 * (r / d) * (n / 2^k) < 1.
 *
 * r is a remainder mod d, so r < d and r / d < 1 always. We can make
 * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
 */
 void
 div_init(div_info_t *div_info, size_t d) {
 	/* Nonsensical. */
 	assert(d != 0);
 	/*
 	 * This would make the value of magic too high to fit into a uint32_t
 	 * (we would want magic = 2^32 exactly). This would mess with code gen
 	 * on 32-bit machines.
 	 */
 	assert(d != 1);
 	uint64_t two_to_k = ((uint64_t)1 << 32);
 	uint32_t magic = (uint32_t)(two_to_k / d);
 	/*
 	 * We want magic = ceil(2^k / d), but C gives us floor. We have to
 	 * increment it unless the result was exact (i.e. unless d is a power of
 	 * two).
 	 */
 	if (two_to_k % d != 0) {
 		magic++;
 	}
 	div_info->magic = magic;
 #ifdef JEMALLOC_DEBUG
 	div_info->d = d;
 #endif
 }
--- a/src/sz.c
+++ b/src/sz.c
@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t sz_size2index_tab[] = {
 #if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
+/* The div module doesn't support division by 1. */
 #error "Unsupported LG_TINY_MIN"
 #define S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
 #warning "Dangerous LG_TINY_MIN"
--- a/test/unit/div.c
+++ b/test/unit/div.c
@ -0,0 +1,29 @@
 #include "test/jemalloc_test.h"
 #include "jemalloc/internal/div.h"
 TEST_BEGIN(test_div_exhaustive) {
 	for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
 		div_info_t div_info;
 		div_init(&div_info, divisor);
 		size_t max = 1000 * divisor;
 		if (max < 1000 * 1000) {
 			max = 1000 * 1000;
 		}
 		for (size_t dividend = 0; dividend < 1000 * divisor;
 		    dividend += divisor) {
 			size_t quotient = div_compute(
 			    &div_info, dividend);
 			assert_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
 			    "got quotient %zu", divisor, dividend, quotient);
 		}
 	}
 }
 TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
 	    test_div_exhaustive);
 }