Simplify arena_slab_regind().

Rewrite arena_slab_regind() to provide sufficient constant data for
the compiler to perform division strength reduction.  This replaces
more general manual strength reduction that was implemented before
arena_bin_info was compile-time-constant.  It would be possible to
slightly improve on the compiler-generated division code by taking
advantage of range limits that the compiler doesn't know about.
This commit is contained in:
Jason Evans 2016-12-21 12:33:17 -08:00
parent 194d6f9de8
commit bacb6afc6c
5 changed files with 66 additions and 59 deletions

View File

@ -184,6 +184,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/rtree.c \
$(srcroot)test/unit/SFMT.c \
$(srcroot)test/unit/size_classes.c \
$(srcroot)test/unit/slab.c \
$(srcroot)test/unit/smoothstep.c \
$(srcroot)test/unit/stats.c \
$(srcroot)test/unit/ticker.c \

View File

@ -271,6 +271,9 @@ void arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, bool cache);
void arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, bool cache);
#ifdef JEMALLOC_JET
size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
#endif
extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
size_t usize, size_t alignment, bool *zero);
void arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,

View File

@ -66,6 +66,7 @@ arena_ralloc_no_move
arena_reset
arena_salloc
arena_sdalloc
arena_slab_regind
arena_stats_merge
arena_tcache_fill_small
arena_tdata_get

View File

@ -138,73 +138,41 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
return (ret);
}
JEMALLOC_INLINE_C size_t
arena_slab_regind(extent_t *slab, const arena_bin_info_t *bin_info,
const void *ptr)
#ifndef JEMALLOC_JET
JEMALLOC_INLINE_C
#endif
size_t
arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr)
{
size_t diff, interval, shift, regind;
size_t diff, regind;
/* Freeing a pointer outside the slab can cause assertion failure. */
assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
/* Freeing an interior pointer can cause assertion failure. */
assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
(uintptr_t)bin_info->reg_size == 0);
(uintptr_t)arena_bin_info[binind].reg_size == 0);
/*
* Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%!
*/
/* Avoid doing division with a variable divisor. */
diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
/* Rescale (factor powers of 2 out of the numerator and denominator). */
interval = bin_info->reg_size;
shift = ffs_zu(interval) - 1;
diff >>= shift;
interval >>= shift;
if (interval == 1) {
/* The divisor was a power of 2. */
regind = diff;
} else {
/*
* To divide by a number D that is not a power of two we
* multiply by (2^21 / D) and then right shift by 21 positions.
*
* X / D
*
* becomes
*
* (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
*
* We can omit the first three elements, because we never
* divide by 0, and 1 and 2 are both powers of two, which are
* handled above.
*/
#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_SLAB_MAXREGS)
#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
static const size_t interval_invs[] = {
SIZE_INV(3),
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
};
if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
+ 2))) {
regind = (diff * interval_invs[interval - 3]) >>
SIZE_INV_SHIFT;
} else
regind = diff / interval;
#undef SIZE_INV
#undef SIZE_INV_SHIFT
switch (binind) {
#define REGIND_bin_yes(index, reg_size) \
case index: \
regind = diff / (reg_size); \
assert(diff == regind * (reg_size)); \
break;
#define REGIND_bin_no(index, reg_size)
#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \
lg_delta_lookup) \
REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta))
SIZE_CLASSES
#undef REGIND_bin_yes
#undef REGIND_bin_no
#undef SC
default: not_reached();
}
assert(diff == regind * interval);
assert(regind < bin_info->nregs);
assert(regind < arena_bin_info[binind].nregs);
return (regind);
}
@ -215,7 +183,7 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
{
szind_t binind = slab_data->binind;
const arena_bin_info_t *bin_info = &arena_bin_info[binind];
size_t regind = arena_slab_regind(slab, bin_info, ptr);
size_t regind = arena_slab_regind(slab, binind, ptr);
assert(slab_data->nfree < bin_info->nregs);
/* Freeing an unallocated pointer can cause assertion failure. */
@ -1022,7 +990,6 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
const arena_bin_info_t *bin_info;
extent_t *slab;
bin_info = &arena_bin_info[binind];
if (bin->slabcur != NULL) {
arena_bin_slabs_full_insert(bin, bin->slabcur);

35
test/unit/slab.c Normal file
View File

@ -0,0 +1,35 @@
#include "test/jemalloc_test.h"
TEST_BEGIN(test_arena_slab_regind)
{
szind_t binind;
for (binind = 0; binind < NBINS; binind++) {
size_t regind;
extent_t slab;
const arena_bin_info_t *bin_info = &arena_bin_info[binind];
extent_init(&slab, NULL, mallocx(bin_info->slab_size,
MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, 0, 0, true,
false, true, true);
assert_ptr_not_null(extent_addr_get(&slab),
"Unexpected malloc() failure");
for (regind = 0; regind < bin_info->nregs; regind++) {
void *reg = (void *)((uintptr_t)extent_addr_get(&slab) +
(bin_info->reg_size * regind));
assert_zu_eq(arena_slab_regind(&slab, binind, reg),
regind,
"Incorrect region index computed for size %zu",
bin_info->reg_size);
}
free(extent_addr_get(&slab));
}
}
TEST_END
int
main(void)
{
return (test(
test_arena_slab_regind));
}