Add the div module, which allows fast division by dynamic values.
This commit is contained in:
parent
7f1b02e3fa
commit
21f7c13d0b
@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
|
|||||||
$(srcroot)src/bitmap.c \
|
$(srcroot)src/bitmap.c \
|
||||||
$(srcroot)src/ckh.c \
|
$(srcroot)src/ckh.c \
|
||||||
$(srcroot)src/ctl.c \
|
$(srcroot)src/ctl.c \
|
||||||
|
$(srcroot)src/div.c \
|
||||||
$(srcroot)src/extent.c \
|
$(srcroot)src/extent.c \
|
||||||
$(srcroot)src/extent_dss.c \
|
$(srcroot)src/extent_dss.c \
|
||||||
$(srcroot)src/extent_mmap.c \
|
$(srcroot)src/extent_mmap.c \
|
||||||
@ -165,6 +166,7 @@ TESTS_UNIT := \
|
|||||||
$(srcroot)test/unit/bitmap.c \
|
$(srcroot)test/unit/bitmap.c \
|
||||||
$(srcroot)test/unit/ckh.c \
|
$(srcroot)test/unit/ckh.c \
|
||||||
$(srcroot)test/unit/decay.c \
|
$(srcroot)test/unit/decay.c \
|
||||||
|
$(srcroot)test/unit/div.c \
|
||||||
$(srcroot)test/unit/extent_quantize.c \
|
$(srcroot)test/unit/extent_quantize.c \
|
||||||
$(srcroot)test/unit/fork.c \
|
$(srcroot)test/unit/fork.c \
|
||||||
$(srcroot)test/unit/hash.c \
|
$(srcroot)test/unit/hash.c \
|
||||||
|
41
include/jemalloc/internal/div.h
Normal file
41
include/jemalloc/internal/div.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#ifndef JEMALLOC_INTERNAL_DIV_H
|
||||||
|
#define JEMALLOC_INTERNAL_DIV_H
|
||||||
|
|
||||||
|
#include "jemalloc/internal/assert.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This module does the division that computes the index of a region in a slab,
|
||||||
|
* given its offset relative to the base.
|
||||||
|
* That is, given a divisor d, an n = i * d (all integers), we'll return i.
|
||||||
|
* We do some pre-computation to do this more quickly than a CPU division
|
||||||
|
* instruction.
|
||||||
|
* We bound n < 2^32, and don't support dividing by one.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct div_info_s div_info_t;
|
||||||
|
struct div_info_s {
|
||||||
|
uint32_t magic;
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
size_t d;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
void div_init(div_info_t *div_info, size_t divisor);
|
||||||
|
|
||||||
|
static inline size_t
|
||||||
|
div_compute(div_info_t *div_info, size_t n) {
|
||||||
|
assert(n <= (uint32_t)-1);
|
||||||
|
/*
|
||||||
|
* This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
|
||||||
|
* the compilers I tried were all smart enough to turn this into the
|
||||||
|
* appropriate "get the high 32 bits of the result of a multiply" (e.g.
|
||||||
|
* mul; mov edx eax; on x86, umull on arm, etc.).
|
||||||
|
*/
|
||||||
|
size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
assert(i * div_info->d == n);
|
||||||
|
#endif
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* JEMALLOC_INTERNAL_DIV_H */
|
55
src/div.c
Normal file
55
src/div.c
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||||
|
|
||||||
|
#include "jemalloc/internal/div.h"
|
||||||
|
|
||||||
|
#include "jemalloc/internal/assert.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
|
||||||
|
*
|
||||||
|
* For any k, we have (here, all division is exact; not C-style rounding):
|
||||||
|
* floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
|
||||||
|
* r = (-2^k) mod d.
|
||||||
|
*
|
||||||
|
* Expanding this out:
|
||||||
|
* ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
|
||||||
|
* = floor(n / d + (r / d) * (n / 2^k)).
|
||||||
|
*
|
||||||
|
* The fractional part of n / d is 0 (because of the assumption that d divides n
|
||||||
|
* exactly), so we have:
|
||||||
|
* ... = n / d + floor((r / d) * (n / 2^k))
|
||||||
|
*
|
||||||
|
* So that our initial expression is equal to the quantity we seek, so long as
|
||||||
|
* (r / d) * (n / 2^k) < 1.
|
||||||
|
*
|
||||||
|
* r is a remainder mod d, so r < d and r / d < 1 always. We can make
|
||||||
|
* n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
div_init(div_info_t *div_info, size_t d) {
|
||||||
|
/* Nonsensical. */
|
||||||
|
assert(d != 0);
|
||||||
|
/*
|
||||||
|
* This would make the value of magic too high to fit into a uint32_t
|
||||||
|
* (we would want magic = 2^32 exactly). This would mess with code gen
|
||||||
|
* on 32-bit machines.
|
||||||
|
*/
|
||||||
|
assert(d != 1);
|
||||||
|
|
||||||
|
uint64_t two_to_k = ((uint64_t)1 << 32);
|
||||||
|
uint32_t magic = (uint32_t)(two_to_k / d);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want magic = ceil(2^k / d), but C gives us floor. We have to
|
||||||
|
* increment it unless the result was exact (i.e. unless d is a power of
|
||||||
|
* two).
|
||||||
|
*/
|
||||||
|
if (two_to_k % d != 0) {
|
||||||
|
magic++;
|
||||||
|
}
|
||||||
|
div_info->magic = magic;
|
||||||
|
#ifdef JEMALLOC_DEBUG
|
||||||
|
div_info->d = d;
|
||||||
|
#endif
|
||||||
|
}
|
3
src/sz.c
3
src/sz.c
@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
|
|||||||
JEMALLOC_ALIGNED(CACHELINE)
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
const uint8_t sz_size2index_tab[] = {
|
const uint8_t sz_size2index_tab[] = {
|
||||||
#if LG_TINY_MIN == 0
|
#if LG_TINY_MIN == 0
|
||||||
#warning "Dangerous LG_TINY_MIN"
|
/* The div module doesn't support division by 1. */
|
||||||
|
#error "Unsupported LG_TINY_MIN"
|
||||||
#define S2B_0(i) i,
|
#define S2B_0(i) i,
|
||||||
#elif LG_TINY_MIN == 1
|
#elif LG_TINY_MIN == 1
|
||||||
#warning "Dangerous LG_TINY_MIN"
|
#warning "Dangerous LG_TINY_MIN"
|
||||||
|
29
test/unit/div.c
Normal file
29
test/unit/div.c
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#include "test/jemalloc_test.h"
|
||||||
|
|
||||||
|
#include "jemalloc/internal/div.h"
|
||||||
|
|
||||||
|
TEST_BEGIN(test_div_exhaustive) {
|
||||||
|
for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
|
||||||
|
div_info_t div_info;
|
||||||
|
div_init(&div_info, divisor);
|
||||||
|
size_t max = 1000 * divisor;
|
||||||
|
if (max < 1000 * 1000) {
|
||||||
|
max = 1000 * 1000;
|
||||||
|
}
|
||||||
|
for (size_t dividend = 0; dividend < 1000 * divisor;
|
||||||
|
dividend += divisor) {
|
||||||
|
size_t quotient = div_compute(
|
||||||
|
&div_info, dividend);
|
||||||
|
assert_zu_eq(dividend, quotient * divisor,
|
||||||
|
"With divisor = %zu, dividend = %zu, "
|
||||||
|
"got quotient %zu", divisor, dividend, quotient);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TEST_END
|
||||||
|
|
||||||
|
int
|
||||||
|
main(void) {
|
||||||
|
return test_no_reentrancy(
|
||||||
|
test_div_exhaustive);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user