diff --git a/configure.ac b/configure.ac index e27ea912..5cfe9af3 100644 --- a/configure.ac +++ b/configure.ac @@ -1429,6 +1429,21 @@ else fi fi +JE_COMPILABLE([a program using __builtin_popcountl], [ +#include +#include +#include +], [ + { + int rv = __builtin_popcountl(0x08); + printf("%d\n", rv); + } +], [je_cv_gcc_builtin_popcountl]) +if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount]) + AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl]) +fi + AC_ARG_WITH([lg_quantum], [AS_HELP_STRING([--with-lg-quantum=], [Base 2 log of minimum allocation alignment])], @@ -1901,7 +1916,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then if test "x${je_cv_madv_dontdump}" = "xyes" ; then AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ]) fi - + dnl Check for madvise(..., MADV_[NO]HUGEPAGE). JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [ #include diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h index 8c59c39e..c045eb86 100644 --- a/include/jemalloc/internal/bit_util.h +++ b/include/jemalloc/internal/bit_util.h @@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) { return JEMALLOC_INTERNAL_FFS(bitmap); } +#ifdef JEMALLOC_INTERNAL_POPCOUNTL +BIT_UTIL_INLINE unsigned +popcount_lu(unsigned long bitmap) { + return JEMALLOC_INTERNAL_POPCOUNTL(bitmap); +} +#endif + +/* + * Clears first unset bit in bitmap, and returns + * place of bit. bitmap *must not* be 0. + */ + +BIT_UTIL_INLINE size_t +cfs_lu(unsigned long* bitmap) { + size_t bit = ffs_lu(*bitmap) - 1; + *bitmap ^= ZU(1) << bit; + return bit; +} + BIT_UTIL_INLINE unsigned ffs_zu(size_t bitmap) { #if LG_SIZEOF_PTR == LG_SIZEOF_INT diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index c1eb8edc..3eac2754 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -236,6 +236,12 @@ #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS +/* + * popcount*() functions to use for bitmapping. + */ +#undef JEMALLOC_INTERNAL_POPCOUNTL +#undef JEMALLOC_INTERNAL_POPCOUNT + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. diff --git a/src/arena.c b/src/arena.c index 841f2950..5fc90c54 100644 --- a/src/arena.c +++ b/src/arena.c @@ -273,19 +273,46 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, unsigned cnt, void** ptrs) { arena_slab_data_t *slab_data = extent_slab_data_get(slab); - assert(extent_nfree_get(slab) > 0); + assert(extent_nfree_get(slab) >= cnt); assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); - size_t regind = 0; +#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE) for (unsigned i = 0; i < cnt; i++) { - void *ret; - - regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)extent_addr_get(slab) + + size_t regind = bitmap_sfu(slab_data->bitmap, + &bin_info->bitmap_info); + *(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) + (uintptr_t)(bin_info->reg_size * regind)); - - *(ptrs + i) = ret; } +#else + unsigned group = 0; + bitmap_t g = slab_data->bitmap[group]; + unsigned i = 0; + while (i < cnt) { + while (g == 0) { + g = slab_data->bitmap[++group]; + } + size_t shift = group << LG_BITMAP_GROUP_NBITS; + size_t pop = popcount_lu(g); + if (pop > (cnt - i)) { + pop = cnt - i; + } + + /* + * Load from memory locations only once, outside the + * hot loop below. + */ + uintptr_t base = (uintptr_t)extent_addr_get(slab); + uintptr_t regsize = (uintptr_t)bin_info->reg_size; + while (pop--) { + size_t bit = cfs_lu(&g); + size_t regind = shift + bit; + *(ptrs + i) = (void *)(base + regsize * regind); + + i++; + } + slab_data->bitmap[group] = g; + } +#endif extent_nfree_sub(slab, cnt); } @@ -1331,7 +1358,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } else { cnt = 1; void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, - binind); + binind); /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must @@ -1352,7 +1379,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, for (unsigned j = 0; j < cnt; j++) { void* ptr = *(tbin->avail - nfill + i + j); arena_alloc_junk_small(ptr, &bin_infos[binind], - true); + true); } } }