Add a fastpath for arena_slab_reg_alloc_batch

Also adds a configure.ac check for __builtin_popcount, which is used
in the new fastpath.
This commit is contained in:
Dave Watson 2018-10-29 16:01:09 -07:00
parent 17aa470760
commit 13c237c7ef
4 changed files with 78 additions and 11 deletions

View File

@ -1429,6 +1429,21 @@ else
fi fi
fi fi
JE_COMPILABLE([a program using __builtin_popcountl], [
#include <stdio.h>
#include <strings.h>
#include <string.h>
], [
{
int rv = __builtin_popcountl(0x08);
printf("%d\n", rv);
}
], [je_cv_gcc_builtin_popcountl])
if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
fi
AC_ARG_WITH([lg_quantum], AC_ARG_WITH([lg_quantum],
[AS_HELP_STRING([--with-lg-quantum=<lg-quantum>], [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
[Base 2 log of minimum allocation alignment])], [Base 2 log of minimum allocation alignment])],

View File

@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) {
return JEMALLOC_INTERNAL_FFS(bitmap); return JEMALLOC_INTERNAL_FFS(bitmap);
} }
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
BIT_UTIL_INLINE unsigned
popcount_lu(unsigned long bitmap) {
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
}
#endif
/*
* Clears first unset bit in bitmap, and returns
* place of bit. bitmap *must not* be 0.
*/
BIT_UTIL_INLINE size_t
cfs_lu(unsigned long* bitmap) {
size_t bit = ffs_lu(*bitmap) - 1;
*bitmap ^= ZU(1) << bit;
return bit;
}
BIT_UTIL_INLINE unsigned BIT_UTIL_INLINE unsigned
ffs_zu(size_t bitmap) { ffs_zu(size_t bitmap) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT #if LG_SIZEOF_PTR == LG_SIZEOF_INT

View File

@ -236,6 +236,12 @@
#undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFSL
#undef JEMALLOC_INTERNAL_FFS #undef JEMALLOC_INTERNAL_FFS
/*
* popcount*() functions to use for bitmapping.
*/
#undef JEMALLOC_INTERNAL_POPCOUNTL
#undef JEMALLOC_INTERNAL_POPCOUNT
/* /*
* If defined, explicitly attempt to more uniformly distribute large allocation * If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices. * pointer alignments across all cache indices.

View File

@ -273,19 +273,46 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
unsigned cnt, void** ptrs) { unsigned cnt, void** ptrs) {
arena_slab_data_t *slab_data = extent_slab_data_get(slab); arena_slab_data_t *slab_data = extent_slab_data_get(slab);
assert(extent_nfree_get(slab) > 0); assert(extent_nfree_get(slab) >= cnt);
assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
size_t regind = 0; #if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
for (unsigned i = 0; i < cnt; i++) { for (unsigned i = 0; i < cnt; i++) {
void *ret; size_t regind = bitmap_sfu(slab_data->bitmap,
&bin_info->bitmap_info);
regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info); *(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) +
ret = (void *)((uintptr_t)extent_addr_get(slab) +
(uintptr_t)(bin_info->reg_size * regind)); (uintptr_t)(bin_info->reg_size * regind));
*(ptrs + i) = ret;
} }
#else
unsigned group = 0;
bitmap_t g = slab_data->bitmap[group];
unsigned i = 0;
while (i < cnt) {
while (g == 0) {
g = slab_data->bitmap[++group];
}
size_t shift = group << LG_BITMAP_GROUP_NBITS;
size_t pop = popcount_lu(g);
if (pop > (cnt - i)) {
pop = cnt - i;
}
/*
* Load from memory locations only once, outside the
* hot loop below.
*/
uintptr_t base = (uintptr_t)extent_addr_get(slab);
uintptr_t regsize = (uintptr_t)bin_info->reg_size;
while (pop--) {
size_t bit = cfs_lu(&g);
size_t regind = shift + bit;
*(ptrs + i) = (void *)(base + regsize * regind);
i++;
}
slab_data->bitmap[group] = g;
}
#endif
extent_nfree_sub(slab, cnt); extent_nfree_sub(slab, cnt);
} }
@ -1331,7 +1358,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
} else { } else {
cnt = 1; cnt = 1;
void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
binind); binind);
/* /*
* OOM. tbin->avail isn't yet filled down to its first * OOM. tbin->avail isn't yet filled down to its first
* element, so the successful allocations (if any) must * element, so the successful allocations (if any) must
@ -1352,7 +1379,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
for (unsigned j = 0; j < cnt; j++) { for (unsigned j = 0; j < cnt; j++) {
void* ptr = *(tbin->avail - nfill + i + j); void* ptr = *(tbin->avail - nfill + i + j);
arena_alloc_junk_small(ptr, &bin_infos[binind], arena_alloc_junk_small(ptr, &bin_infos[binind],
true); true);
} }
} }
} }