Add a fastpath for arena_slab_reg_alloc_batch
Also adds a configure.ac check for __builtin_popcount, which is used in the new fastpath.
This commit is contained in:
parent
17aa470760
commit
13c237c7ef
15
configure.ac
15
configure.ac
@ -1429,6 +1429,21 @@ else
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
JE_COMPILABLE([a program using __builtin_popcountl], [
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <strings.h>
|
||||||
|
#include <string.h>
|
||||||
|
], [
|
||||||
|
{
|
||||||
|
int rv = __builtin_popcountl(0x08);
|
||||||
|
printf("%d\n", rv);
|
||||||
|
}
|
||||||
|
], [je_cv_gcc_builtin_popcountl])
|
||||||
|
if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
|
||||||
|
AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
|
||||||
|
AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
|
||||||
|
fi
|
||||||
|
|
||||||
AC_ARG_WITH([lg_quantum],
|
AC_ARG_WITH([lg_quantum],
|
||||||
[AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
|
[AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
|
||||||
[Base 2 log of minimum allocation alignment])],
|
[Base 2 log of minimum allocation alignment])],
|
||||||
|
@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) {
|
|||||||
return JEMALLOC_INTERNAL_FFS(bitmap);
|
return JEMALLOC_INTERNAL_FFS(bitmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
|
||||||
|
BIT_UTIL_INLINE unsigned
|
||||||
|
popcount_lu(unsigned long bitmap) {
|
||||||
|
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Clears first unset bit in bitmap, and returns
|
||||||
|
* place of bit. bitmap *must not* be 0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
BIT_UTIL_INLINE size_t
|
||||||
|
cfs_lu(unsigned long* bitmap) {
|
||||||
|
size_t bit = ffs_lu(*bitmap) - 1;
|
||||||
|
*bitmap ^= ZU(1) << bit;
|
||||||
|
return bit;
|
||||||
|
}
|
||||||
|
|
||||||
BIT_UTIL_INLINE unsigned
|
BIT_UTIL_INLINE unsigned
|
||||||
ffs_zu(size_t bitmap) {
|
ffs_zu(size_t bitmap) {
|
||||||
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
|
||||||
|
@ -236,6 +236,12 @@
|
|||||||
#undef JEMALLOC_INTERNAL_FFSL
|
#undef JEMALLOC_INTERNAL_FFSL
|
||||||
#undef JEMALLOC_INTERNAL_FFS
|
#undef JEMALLOC_INTERNAL_FFS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* popcount*() functions to use for bitmapping.
|
||||||
|
*/
|
||||||
|
#undef JEMALLOC_INTERNAL_POPCOUNTL
|
||||||
|
#undef JEMALLOC_INTERNAL_POPCOUNT
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If defined, explicitly attempt to more uniformly distribute large allocation
|
* If defined, explicitly attempt to more uniformly distribute large allocation
|
||||||
* pointer alignments across all cache indices.
|
* pointer alignments across all cache indices.
|
||||||
|
47
src/arena.c
47
src/arena.c
@ -273,19 +273,46 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
|
|||||||
unsigned cnt, void** ptrs) {
|
unsigned cnt, void** ptrs) {
|
||||||
arena_slab_data_t *slab_data = extent_slab_data_get(slab);
|
arena_slab_data_t *slab_data = extent_slab_data_get(slab);
|
||||||
|
|
||||||
assert(extent_nfree_get(slab) > 0);
|
assert(extent_nfree_get(slab) >= cnt);
|
||||||
assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
|
assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
|
||||||
|
|
||||||
size_t regind = 0;
|
#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
|
||||||
for (unsigned i = 0; i < cnt; i++) {
|
for (unsigned i = 0; i < cnt; i++) {
|
||||||
void *ret;
|
size_t regind = bitmap_sfu(slab_data->bitmap,
|
||||||
|
&bin_info->bitmap_info);
|
||||||
regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
|
*(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) +
|
||||||
ret = (void *)((uintptr_t)extent_addr_get(slab) +
|
|
||||||
(uintptr_t)(bin_info->reg_size * regind));
|
(uintptr_t)(bin_info->reg_size * regind));
|
||||||
|
|
||||||
*(ptrs + i) = ret;
|
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
unsigned group = 0;
|
||||||
|
bitmap_t g = slab_data->bitmap[group];
|
||||||
|
unsigned i = 0;
|
||||||
|
while (i < cnt) {
|
||||||
|
while (g == 0) {
|
||||||
|
g = slab_data->bitmap[++group];
|
||||||
|
}
|
||||||
|
size_t shift = group << LG_BITMAP_GROUP_NBITS;
|
||||||
|
size_t pop = popcount_lu(g);
|
||||||
|
if (pop > (cnt - i)) {
|
||||||
|
pop = cnt - i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Load from memory locations only once, outside the
|
||||||
|
* hot loop below.
|
||||||
|
*/
|
||||||
|
uintptr_t base = (uintptr_t)extent_addr_get(slab);
|
||||||
|
uintptr_t regsize = (uintptr_t)bin_info->reg_size;
|
||||||
|
while (pop--) {
|
||||||
|
size_t bit = cfs_lu(&g);
|
||||||
|
size_t regind = shift + bit;
|
||||||
|
*(ptrs + i) = (void *)(base + regsize * regind);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
slab_data->bitmap[group] = g;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
extent_nfree_sub(slab, cnt);
|
extent_nfree_sub(slab, cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1331,7 +1358,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
|||||||
} else {
|
} else {
|
||||||
cnt = 1;
|
cnt = 1;
|
||||||
void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
|
void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
|
||||||
binind);
|
binind);
|
||||||
/*
|
/*
|
||||||
* OOM. tbin->avail isn't yet filled down to its first
|
* OOM. tbin->avail isn't yet filled down to its first
|
||||||
* element, so the successful allocations (if any) must
|
* element, so the successful allocations (if any) must
|
||||||
@ -1352,7 +1379,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
|||||||
for (unsigned j = 0; j < cnt; j++) {
|
for (unsigned j = 0; j < cnt; j++) {
|
||||||
void* ptr = *(tbin->avail - nfill + i + j);
|
void* ptr = *(tbin->avail - nfill + i + j);
|
||||||
arena_alloc_junk_small(ptr, &bin_infos[binind],
|
arena_alloc_junk_small(ptr, &bin_infos[binind],
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user