From d04047cc29bbc9d1f87a9346d1601e3dd87b6ca0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 28 May 2014 16:11:55 -0700 Subject: [PATCH] Add size class computation capability. Add size class computation capability, currently used only as validation of the size class lookup tables. Generalize the size class spacing used for bins, for eventual use throughout the full range of allocation sizes. --- configure.ac | 23 ++ include/jemalloc/internal/arena.h | 137 ++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- .../internal/jemalloc_internal_defs.h.in | 5 + .../internal/jemalloc_internal_macros.h | 6 + include/jemalloc/internal/private_symbols.txt | 8 + include/jemalloc/internal/size_classes.sh | 261 ++++++++++++++---- include/jemalloc/internal/util.h | 47 ++++ src/arena.c | 62 +++-- 9 files changed, 462 insertions(+), 91 deletions(-) diff --git a/configure.ac b/configure.ac index 58f6289d..58522499 100644 --- a/configure.ac +++ b/configure.ac @@ -1200,6 +1200,29 @@ if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) fi +dnl ============================================================================ +dnl Check for __builtin_clz() and __builtin_clzl(). + +AC_CACHE_CHECK([for __builtin_clz], + [je_cv_builtin_clz], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], + [ + { + unsigned x = 0; + int y = __builtin_clz(x); + } + { + unsigned long x = 0; + int y = __builtin_clzl(x); + } + ])], + [je_cv_builtin_clz=yes], + [je_cv_builtin_clz=no])]) + +if test "x${je_cv_builtin_clz}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ]) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 598a89b0..2dc9501d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -463,8 +463,15 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +size_t small_size2bin_compute(size_t size); +size_t small_size2bin_lookup(size_t size); size_t small_size2bin(size_t size); +size_t small_bin2size_compute(size_t binind); +size_t small_bin2size_lookup(size_t binind); size_t small_bin2size(size_t binind); +size_t small_s2u_compute(size_t size); +size_t small_s2u_lookup(size_t size); +size_t small_s2u(size_t size); arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); @@ -507,18 +514,144 @@ void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE size_t +small_size2bin_compute(size_t size) +{ +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t bin = NTBINS + grp + mod; + return (bin); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_size2bin_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(small_size2bin_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == small_size2bin_compute(size)); + return (ret); + } +} + JEMALLOC_ALWAYS_INLINE size_t small_size2bin(size_t size) { - return ((size_t)(small_size2bin_tab[(size-1) >> LG_TINY_MIN])); + assert(size > 0); + if (size <= LOOKUP_MAXCLASS) + return (small_size2bin_lookup(size)); + else + return (small_size2bin_compute(size)); +} + +JEMALLOC_INLINE size_t +small_bin2size_compute(size_t binind) +{ +#if (NTBINS > 0) + if (binind < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind)); + else +#endif + { + size_t reduced_binind = binind - NTBINS; + size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_bin2size_lookup(size_t binind) +{ + + assert(binind < NBINS); + { + size_t ret = ((size_t)(small_bin2size_tab[binind])); + assert(ret == small_bin2size_compute(binind)); + return (ret); + } } JEMALLOC_ALWAYS_INLINE size_t small_bin2size(size_t binind) { - return ((size_t)(small_bin2size_tab[binind])); + return (small_bin2size_lookup(binind)); +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u_compute(size_t size) +{ +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u_lookup(size_t size) +{ + size_t ret = (small_bin2size(small_size2bin(size))); + + assert(ret == small_s2u_compute(size)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +small_s2u(size_t size) +{ + + assert(size > 0); + if (size <= LOOKUP_MAXCLASS) + return (small_s2u_lookup(size)); + else + return (small_s2u_compute(size)); } # endif /* JEMALLOC_ARENA_INLINE_A */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index cf20f1f9..491345c9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -475,7 +475,7 @@ s2u(size_t size) { if (size <= SMALL_MAXCLASS) - return (small_bin2size(small_size2bin(size))); + return (small_s2u(size)); if (size <= arena_maxclass) return (PAGE_CEILING(size)); return (CHUNK_CEILING(size)); @@ -518,7 +518,7 @@ sa2u(size_t size, size_t alignment) if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) - return (small_bin2size(small_size2bin(usize))); + return (small_s2u(usize)); return (PAGE_CEILING(usize)); } else { size_t run_size; diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 09ddd4f3..a9a50f14 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -47,6 +47,11 @@ */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#undef JEMALLOC_HAVE_BUILTIN_CLZ + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h index 4e239230..38e28861 100644 --- a/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/include/jemalloc/internal/jemalloc_internal_macros.h @@ -39,9 +39,15 @@ #endif #define ZU(z) ((size_t)z) +#define ZI(z) ((ssize_t)z) #define QU(q) ((uint64_t)q) #define QI(q) ((int64_t)q) +#define KZU(z) ZU(z##ULL) +#define KZI(z) ZI(z##ULL) +#define KQU(q) QU(q##ULL) +#define KQI(q) QI(q##ULL) + #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index f6c4fbcc..3401301c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -234,6 +234,7 @@ ixalloc jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork +lg_floor malloc_cprintf malloc_mutex_init malloc_mutex_lock @@ -348,8 +349,15 @@ s2u sa2u set_errno small_bin2size +small_bin2size_compute +small_bin2size_lookup small_bin2size_tab +small_s2u +small_s2u_compute +small_s2u_lookup small_size2bin +small_size2bin_compute +small_size2bin_lookup small_size2bin_tab stats_cactive stats_cactive_add diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 960674aa..3edebf23 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -2,16 +2,23 @@ # The following limits are chosen such that they cover all supported platforms. -# Range of quanta. -lg_qmin=3 -lg_qmax=4 +# Pointer sizes. +lg_zarr="2 3" + +# Quanta. +lg_qarr="3 4" # The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. lg_tmin=3 -# Range of page sizes. -lg_pmin=12 -lg_pmax=16 +# Maximum lookup size. +lg_kmax=12 + +# Page sizes. +lg_parr="12 13 16" + +# Size class group size (number of size classes for each size doubling). +lg_g=2 pow2() { e=$1 @@ -22,68 +29,206 @@ pow2() { done } +lg() { + x=$1 + lg_result=0 + while [ ${x} -gt 1 ] ; do + lg_result=$((${lg_result} + 1)) + x=$((${x} / 2)) + done +} + +size_class() { + index=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + lg_p=$5 + lg_kmax=$6 + + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} + if [ ${pow2_result} -lt ${ndelta} ] ; then + rem="yes" + else + rem="no" + fi + + lg_size=${lg_grp} + if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then + lg_size=$((${lg_grp} + 1)) + else + lg_size=${lg_grp} + rem="yes" + fi + + if [ ${lg_size} -lt ${lg_p} ] ; then + bin="yes" + else + bin="no" + fi + if [ ${lg_size} -lt ${lg_kmax} \ + -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then + lg_delta_lookup=${lg_delta} + else + lg_delta_lookup="no" + fi + printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + # Defined upon return: + # - lg_delta_lookup (${lg_delta} or "no") + # - bin ("yes" or "no") +} + +sep_line() { + echo " \\" +} + +size_classes() { + lg_z=$1 + lg_q=$2 + lg_t=$3 + lg_p=$4 + lg_g=$5 + + pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} + pow2 ${lg_g}; g=${pow2_result} + + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + + ntbins=0 + nlbins=0 + lg_tiny_maxclass='"NA"' + nbins=0 + + # Tiny size classes. + ndelta=0 + index=0 + lg_grp=${lg_t} + lg_delta=${lg_grp} + while [ ${lg_grp} -lt ${lg_q} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + fi + ntbins=$((${ntbins} + 1)) + lg_tiny_maxclass=${lg_grp} # Final written value is correct. + index=$((${index} + 1)) + lg_delta=${lg_grp} + lg_grp=$((${lg_grp} + 1)) + done + + # First non-tiny group. + if [ ${ntbins} -gt 0 ] ; then + sep_line + # The first size class has an unusual encoding, because the size has to be + # split between grp and delta*ndelta. + lg_grp=$((${lg_grp} - 1)) + ndelta=1 + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + fi + while [ ${ndelta} -lt ${g} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + + # All remaining groups. + lg_grp=$((${lg_grp} + ${lg_g})) + while [ ${lg_grp} -lt ${ptr_bits} ] ; do + sep_line + ndelta=1 + if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + ndelta_limit=$((${g} - 1)) + else + ndelta_limit=${g} + fi + while [ ${ndelta} -le ${ndelta_limit} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + # Final written value is correct: + lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + # Final written value is correct: + small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + done + echo + + # Defined upon completion: + # - ntbins + # - nlbins + # - nbins + # - lg_tiny_maxclass + # - lookup_maxclass + # - small_maxclass +} + cat <> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) + x |= (x >> 32); + return (65 - ffsl(~x)); +#elif (LG_SIZEOF_PTR == 2) + return (33 - ffs(~x)); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#endif + /* Sets error code */ JEMALLOC_INLINE void set_errno(int errnum) diff --git a/src/arena.c b/src/arena.c index f5d7d062..c392419e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -9,40 +9,39 @@ arena_bin_info_t arena_bin_info[NBINS]; JEMALLOC_ALIGNED(CACHELINE) const uint32_t small_bin2size_tab[NBINS] = { -#define SIZE_CLASS(bin, delta, size) \ +#define B2S_bin_yes(size) \ size, +#define B2S_bin_no(size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + B2S_bin_##bin((ZU(1)<reg_size = size; \ prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); +#define BIN_INFO_INIT_bin_no(index, size) +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + BIN_INFO_INIT_bin_##bin(index, (ZU(1)<