From ef8897b4b938111fcc9b54725067f1dbb33a4c20 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 14:30:52 -0800 Subject: [PATCH] Make 8-byte tiny size class non-optional. When tiny size class support was first added, it was intended to support truly tiny size classes (even 2 bytes). However, this wasn't very useful in practice, so the minimum tiny size class has been limited to sizeof(void *) for a long time now. This is too small to be standards compliant, but other commonly used malloc implementations do not even bother using a 16-byte quantum on systems with vector units (SSE2+, AltiVEC, etc.). As such, it is safe in practice to support an 8-byte tiny size class on 64-bit systems that support 16-byte types. --- INSTALL | 7 -- configure.ac | 17 --- doc/jemalloc.xml.in | 23 ++-- include/jemalloc/internal/arena.h | 19 ++-- .../jemalloc/internal/jemalloc_internal.h.in | 7 -- include/jemalloc/jemalloc_defs.h.in | 6 -- src/arena.c | 101 +++++------------- src/ctl.c | 9 +- 8 files changed, 44 insertions(+), 145 deletions(-) diff --git a/INSTALL b/INSTALL index a210ec5a..9124ac34 100644 --- a/INSTALL +++ b/INSTALL @@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure': Statically link against the specified libunwind.a rather than dynamically linking with -lunwind. ---disable-tiny - Disable tiny (sub-quantum-sized) object support. Technically it is not - legal for a malloc implementation to allocate objects with less than - quantum alignment (8 or 16 bytes, depending on architecture), but in - practice it never causes any problems if, for example, 4-byte allocations - are 4-byte-aligned. - --disable-tcache Disable thread-specific caches for small objects. Objects are cached and released in bulk, thus reducing the total number of mutex operations. See diff --git a/configure.ac b/configure.ac index e818f65a..fdbf1bad 100644 --- a/configure.ac +++ b/configure.ac @@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then fi AC_SUBST([enable_prof]) -dnl Enable tiny allocations by default. -AC_ARG_ENABLE([tiny], - [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], -[if test "x$enable_tiny" = "xno" ; then - enable_tiny="0" -else - enable_tiny="1" -fi -], -[enable_tiny="1"] -) -if test "x$enable_tiny" = "x1" ; then - AC_DEFINE([JEMALLOC_TINY], [ ]) -fi -AC_SUBST([enable_tiny]) - dnl Enable thread-specific caching by default. AC_ARG_ENABLE([tcache], [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], @@ -934,7 +918,6 @@ AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) -AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index dc11642f..f9f14750 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) { allocations in constant time. Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Unless - is specified during configuration, - allocation requests that are no more than half the quantum (8 or 16, - depending on architecture) are rounded up to the nearest power of two that - is at least sizeof(void *). - Allocation requests that are more than half the quantum, but no more than - the minimum cacheline-multiple size class (see the sizeof(void *). Allocation requests + that are more than half the quantum, but no more than the minimum + cacheline-multiple size class (see the opt.lg_qspace_max option) are rounded up to the nearest multiple of the quantum. Allocation requests that are more than the minimum cacheline-multiple size class, but @@ -680,16 +679,6 @@ for (i = 0; i < nbins; i++) { during build configuration. - - - config.tiny - (bool) - r- - - was not specified - during build configuration. - - config.tls diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b8de12be..cacb03f8 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -17,7 +17,7 @@ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) /* Smallest size class to support. */ -#define LG_TINY_MIN LG_SIZEOF_PTR +#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* @@ -418,18 +418,13 @@ extern uint8_t const *small_size2bin; extern arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ -#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ -# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -#else -# define ntbins 0 -#endif + /* Number of (2^n)-spaced tiny bins. */ +#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) extern unsigned nqbins; /* Number of quantum-spaced bins. */ extern unsigned ncbins; /* Number of cacheline-spaced bins. */ extern unsigned nsbins; /* Number of subpage-spaced bins. */ extern unsigned nbins; -#ifdef JEMALLOC_TINY -# define tspace_max ((size_t)(QUANTUM >> 1)) -#endif +#define tspace_max ((size_t)(QUANTUM >> 1)) #define qspace_min QUANTUM extern size_t qspace_max; extern size_t cspace_min; @@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) JEMALLOC_INLINE void * arena_malloc(size_t size, bool zero) { - tcache_t *tcache = tcache_get(); + tcache_t *tcache; assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= small_maxclass) { - if (tcache != NULL) + if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else return (arena_malloc_small(choose_arena(), size, zero)); } else { - if (tcache != NULL && size <= tcache_maxclass) + if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) return (tcache_alloc_large(tcache, size, zero)); else return (arena_malloc_large(choose_arena(), size, zero)); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 44415370..971336ec 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -118,13 +118,6 @@ static const bool config_tcache = false #endif ; -static const bool config_tiny = -#ifdef JEMALLOC_TINY - true -#else - false -#endif - ; static const bool config_tls = #ifdef JEMALLOC_TLS true diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f78028b0..66da6f3d 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -79,12 +79,6 @@ /* Use gcc intrinsics for profile backtracing if defined. */ #undef JEMALLOC_PROF_GCC -/* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - /* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking diff --git a/src/arena.c b/src/arena.c index 8a158df2..32afd0cf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,14 +28,7 @@ size_t mspace_mask; * const_small_size2bin is a static constant lookup table that in the common * case can be used as-is for small_size2bin. */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, -#define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) #define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) @@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) const uint8_t const_small_size2bin[] = { #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ - S2B_8(2) /* 16 */ -# define S2B_QMIN 2 -# elif (LG_TINY_MIN == 3) - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_16(0) /* 16 */ -# define S2B_QMIN 0 -# endif + S2B_8(0) /* 8 */ + S2B_8(1) /* 16 */ +# define S2B_QMIN 1 S2B_16(S2B_QMIN + 1) /* 32 */ S2B_16(S2B_QMIN + 2) /* 48 */ S2B_16(S2B_QMIN + 3) /* 64 */ @@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) # define S2B_CMIN (S2B_QMIN + 8) #else /* 8-byte quantum ***********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_8(0) /* 8 */ -# define S2B_QMIN 0 -# endif +# define S2B_QMIN 0 + S2B_8(S2B_QMIN + 0) /* 8 */ S2B_8(S2B_QMIN + 1) /* 16 */ S2B_8(S2B_QMIN + 2) /* 24 */ S2B_8(S2B_QMIN + 3) /* 32 */ @@ -2153,17 +2122,15 @@ small_size2bin_validate(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } + for (; i < TINY_MIN; i++) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); } /* Quantum-spaced. */ for (; i <= qspace_max; i++) { @@ -2223,17 +2190,15 @@ small_size2bin_init_hard(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } + for (; i < TINY_MIN; i += TINY_MIN) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; } /* Quantum-spaced. */ for (; i <= qspace_max; i += TINY_MIN) { @@ -2398,17 +2363,12 @@ bin_info_init(void) prev_run_size = PAGE_SIZE; i = 0; /* (2^n)-spaced tiny bins. */ - if (config_tiny) { - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, - prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, - bin_info->nregs); - } + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { bin_info = &arena_bin_info[i]; @@ -2416,7 +2376,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Cacheline-spaced bins. */ for (; i < ntbins + nqbins + ncbins; i++) { bin_info = &arena_bin_info[i]; @@ -2425,7 +2384,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Subpage-spaced bins. */ for (; i < nbins; i++) { bin_info = &arena_bin_info[i]; @@ -2456,8 +2414,7 @@ arena_boot(void) assert(sspace_min < PAGE_SIZE); sspace_max = PAGE_SIZE - SUBPAGE; - if (config_tiny) - assert(LG_QUANTUM >= LG_TINY_MIN); + assert(LG_QUANTUM >= LG_TINY_MIN); assert(ntbins <= LG_QUANTUM); nqbins = qspace_max >> LG_QUANTUM; ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; diff --git a/src/ctl.c b/src/ctl.c index 2ac2f66e..6d0423fa 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) -CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) @@ -199,7 +198,6 @@ static const ctl_node_t config_node[] = { {NAME("stats"), CTL(config_stats)}, {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, - {NAME("tiny"), CTL(config_tiny)}, {NAME("tls"), CTL(config_tls)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ bool oldval; \ \ - if (n == false) \ - return (ENOENT); \ READONLY(); \ oldval = n; \ READ(oldval, bool); \ @@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tiny) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) @@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t) +CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) +CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)