Make 8-byte tiny size class non-optional.

When tiny size class support was first added, it was intended to support truly tiny size classes (even 2 bytes). However, this wasn't very useful in practice, so the minimum tiny size class has been limited to sizeof(void *) for a long time now. This is too small to be standards compliant, but other commonly used malloc implementations do not even bother using a 16-byte quantum on systems with vector units (SSE2+, AltiVEC, etc.). As such, it is safe in practice to support an 8-byte tiny size class on 64-bit systems that support 16-byte types.
2012-02-13 14:30:52 -08:00
parent 0fee70d718
commit ef8897b4b9
8 changed files with 44 additions and 145 deletions
--- a/7
+++ b/7
@@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure':
    Statically link against the specified libunwind.a rather than dynamically
    linking with -lunwind.

--disable-tiny
-    Disable tiny (sub-quantum-sized) object support.  Technically it is not
-    legal for a malloc implementation to allocate objects with less than
-    quantum alignment (8 or 16 bytes, depending on architecture), but in
-    practice it never causes any problems if, for example, 4-byte allocations
-    are 4-byte-aligned.
-
 --disable-tcache
    Disable thread-specific caches for small objects.  Objects are cached and
    released in bulk, thus reducing the total number of mutex operations.  See
--- a/configure.ac
+++ b/configure.ac
@@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then
 fi
 AC_SUBST([enable_prof])

-dnl Enable tiny allocations by default.
-AC_ARG_ENABLE([tiny],
-  [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
-[if test "x$enable_tiny" = "xno" ; then
-  enable_tiny="0"
-else
-  enable_tiny="1"
-fi
-],
-[enable_tiny="1"]
-)
-if test "x$enable_tiny" = "x1" ; then
-  AC_DEFINE([JEMALLOC_TINY], [ ])
-fi
-AC_SUBST([enable_tiny])
-
 dnl Enable thread-specific caching by default.
 AC_ARG_ENABLE([tcache],
  [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
@@ -934,7 +918,6 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
-AC_MSG_RESULT([tiny               : ${enable_tiny}])
 AC_MSG_RESULT([tcache             : ${enable_tcache}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) {
    allocations in constant time.</para>

    <para>Small objects are managed in groups by page runs.  Each run maintains
-    a frontier and free list to track which regions are in use.  Unless
-    <option>--disable-tiny</option> is specified during configuration,
-    allocation requests that are no more than half the quantum (8 or 16,
-    depending on architecture) are rounded up to the nearest power of two that
-    is at least <code language="C">sizeof(<type>void *</type>)</code>.
-    Allocation requests that are more than half the quantum, but no more than
-    the minimum cacheline-multiple size class (see the <link
+    a frontier and free list to track which regions are in use.  Allocation
+    requests that are no more than half the quantum (8 or 16, depending on
+    architecture) are rounded up to the nearest power of two that is at least
+    <code language="C">sizeof(<type>void *</type>)</code>.  Allocation requests
+    that are more than half the quantum, but no more than the minimum
+    cacheline-multiple size class (see the <link
    linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link>
    option) are rounded up to the nearest multiple of the quantum.  Allocation
    requests that are more than the minimum cacheline-multiple size class, but
@@ -680,16 +679,6 @@ for (i = 0; i < nbins; i++) {
        during build configuration.</para></listitem>
      </varlistentry>

-      <varlistentry>
-        <term>
-          <mallctl>config.tiny</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-tiny</option> was not specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
      <varlistentry>
        <term>
          <mallctl>config.tls</mallctl>
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -17,7 +17,7 @@
 	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)

 /* Smallest size class to support. */
-#define	LG_TINY_MIN		LG_SIZEOF_PTR
+#define	LG_TINY_MIN		3
 #define	TINY_MIN		(1U << LG_TINY_MIN)

 /*
@@ -418,18 +418,13 @@ extern uint8_t const	*small_size2bin;
 extern arena_bin_info_t	*arena_bin_info;

 /* Various bin-related settings. */
-#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
+				/* Number of (2^n)-spaced tiny bins. */
 #define			ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
-#else
-#  define		ntbins	0
-#endif
 extern unsigned		nqbins; /* Number of quantum-spaced bins. */
 extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
 extern unsigned		nsbins; /* Number of subpage-spaced bins. */
 extern unsigned		nbins;
-#ifdef JEMALLOC_TINY
 #define			tspace_max	((size_t)(QUANTUM >> 1))
-#endif
 #define			qspace_min	QUANTUM
 extern size_t		qspace_max;
 extern size_t		cspace_min;
@@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 JEMALLOC_INLINE void *
 arena_malloc(size_t size, bool zero)
 {
-	tcache_t *tcache = tcache_get();
+	tcache_t *tcache;

 	assert(size != 0);
 	assert(QUANTUM_CEILING(size) <= arena_maxclass);

 	if (size <= small_maxclass) {
-		if (tcache != NULL)
+		if ((tcache = tcache_get()) != NULL)
 			return (tcache_alloc_small(tcache, size, zero));
 		else
 			return (arena_malloc_small(choose_arena(), size, zero));
 	} else {
-		if (tcache != NULL && size <= tcache_maxclass)
+		if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL)
 			return (tcache_alloc_large(tcache, size, zero));
 		else
 			return (arena_malloc_large(choose_arena(), size, zero));
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -118,13 +118,6 @@ static const bool config_tcache =
    false
 #endif
    ;
-static const bool config_tiny =
-#ifdef JEMALLOC_TINY
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
    true
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -79,12 +79,6 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC

-/*
- * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
- * quantum.
- */
-#undef JEMALLOC_TINY
-
 /*
 * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
 * This makes it possible to allocate/deallocate objects without any locking
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,14 +28,7 @@ size_t		mspace_mask;
 * const_small_size2bin is a static constant lookup table that in the common
 * case can be used as-is for small_size2bin.
 */
-#if (LG_TINY_MIN == 2)
-#define	S2B_4(i)	i,
-#define	S2B_8(i)	S2B_4(i) S2B_4(i)
-#elif (LG_TINY_MIN == 3)
 #define	S2B_8(i)	i,
-#else
-#  error "Unsupported LG_TINY_MIN"
-#endif
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
 #define	S2B_32(i)	S2B_16(i) S2B_16(i)
 #define	S2B_64(i)	S2B_32(i) S2B_32(i)
@@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
    const uint8_t	const_small_size2bin[] = {
 #if (LG_QUANTUM == 4)
 /* 16-byte quantum **********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-       S2B_8(2)			/*   16 */
-#      define S2B_QMIN 2
-#    elif (LG_TINY_MIN == 3)
 	S2B_8(0)		/*    8 */
 	S2B_8(1)		/*   16 */
 #  define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_16(0)		/*   16 */
-#    define S2B_QMIN 0
-#  endif
 	S2B_16(S2B_QMIN + 1)	/*   32 */
 	S2B_16(S2B_QMIN + 2)	/*   48 */
 	S2B_16(S2B_QMIN + 3)	/*   64 */
@@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
 #  define S2B_CMIN (S2B_QMIN + 8)
 #else
 /* 8-byte quantum ***********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-#      define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_8(0)		/*    8 */
 #  define S2B_QMIN 0
-#  endif
+	S2B_8(S2B_QMIN + 0)	/*    8 */
 	S2B_8(S2B_QMIN + 1)	/*   16 */
 	S2B_8(S2B_QMIN + 2)	/*   24 */
 	S2B_8(S2B_QMIN + 3)	/*   32 */
@@ -2153,9 +2122,8 @@ small_size2bin_validate(void)

 	i = 1;
 	/* Tiny. */
-	if (config_tiny) {
-		for (; i < (1U << LG_TINY_MIN); i++) {
-			size = pow2_ceil(1U << LG_TINY_MIN);
+	for (; i < TINY_MIN; i++) {
+		size = TINY_MIN;
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
 		assert(SMALL_SIZE2BIN(i) == binind);
 	}
@@ -2164,7 +2132,6 @@ small_size2bin_validate(void)
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
 		assert(SMALL_SIZE2BIN(i) == binind);
 	}
-	}
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i++) {
 		size = QUANTUM_CEILING(i);
@@ -2223,9 +2190,8 @@ small_size2bin_init_hard(void)

 	i = 1;
 	/* Tiny. */
-	if (config_tiny) {
-		for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
-			size = pow2_ceil(1U << LG_TINY_MIN);
+	for (; i < TINY_MIN; i += TINY_MIN) {
+		size = TINY_MIN;
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
 		CUSTOM_SMALL_SIZE2BIN(i) = binind;
 	}
@@ -2234,7 +2200,6 @@ small_size2bin_init_hard(void)
 		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
 		CUSTOM_SMALL_SIZE2BIN(i) = binind;
 	}
-	}
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i += TINY_MIN) {
 		size = QUANTUM_CEILING(i);
@@ -2398,17 +2363,12 @@ bin_info_init(void)
 	prev_run_size = PAGE_SIZE;
 	i = 0;
 	/* (2^n)-spaced tiny bins. */
-	if (config_tiny) {
 	for (; i < ntbins; i++) {
 		bin_info = &arena_bin_info[i];
 		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
-			prev_run_size = bin_info_run_size_calc(bin_info,
-			    prev_run_size);
-			bitmap_info_init(&bin_info->bitmap_info,
-			    bin_info->nregs);
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-	}
-
 	/* Quantum-spaced bins. */
 	for (; i < ntbins + nqbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2416,7 +2376,6 @@ bin_info_init(void)
 		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
 		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-
 	/* Cacheline-spaced bins. */
 	for (; i < ntbins + nqbins + ncbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2425,7 +2384,6 @@ bin_info_init(void)
 		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
 		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-
 	/* Subpage-spaced bins. */
 	for (; i < nbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2456,7 +2414,6 @@ arena_boot(void)
 	assert(sspace_min < PAGE_SIZE);
 	sspace_max = PAGE_SIZE - SUBPAGE;

-	if (config_tiny)
 	assert(LG_QUANTUM >= LG_TINY_MIN);
 	assert(ntbins <= LG_QUANTUM);
 	nqbins = qspace_max >> LG_QUANTUM;
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_sysv)
 CTL_PROTO(config_tcache)
-CTL_PROTO(config_tiny)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
@@ -199,7 +198,6 @@ static const ctl_node_t	config_node[] = {
 	{NAME("stats"),			CTL(config_stats)},
 	{NAME("sysv"),			CTL(config_sysv)},
 	{NAME("tcache"),		CTL(config_tcache)},
-	{NAME("tiny"),			CTL(config_tiny)},
 	{NAME("tls"),			CTL(config_tls)},
 	{NAME("xmalloc"),		CTL(config_xmalloc)}
 };
@@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	int ret;							\
 	bool oldval;							\
 									\
-	if (n == false)							\
-		return (ENOENT);					\
 	READONLY();							\
 	oldval = n;							\
 	READ(oldval, bool);						\
@@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
 CTL_RO_BOOL_CONFIG_GEN(config_stats)
 CTL_RO_BOOL_CONFIG_GEN(config_sysv)
 CTL_RO_BOOL_CONFIG_GEN(config_tcache)
-CTL_RO_BOOL_CONFIG_GEN(config_tiny)
 CTL_RO_BOOL_CONFIG_GEN(config_tls)
 CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)

@@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t)
 CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t)
 CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t)
 CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t)
-CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
-CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t)
+CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t)
+CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
 CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t)
 CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t)
 CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)