Make 8-byte tiny size class non-optional.

When tiny size class support was first added, it was intended to support
truly tiny size classes (even 2 bytes).  However, this wasn't very
useful in practice, so the minimum tiny size class has been limited to
sizeof(void *) for a long time now.  This is too small to be standards
compliant, but other commonly used malloc implementations do not even
bother using a 16-byte quantum  on systems with vector units (SSE2+,
AltiVEC, etc.).  As such, it is safe in practice to support an 8-byte
tiny size class on 64-bit systems that support 16-byte types.
This commit is contained in:
Jason Evans 2012-02-13 14:30:52 -08:00
parent 0fee70d718
commit ef8897b4b9
8 changed files with 44 additions and 145 deletions

View File

@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure':
Statically link against the specified libunwind.a rather than dynamically
linking with -lunwind.
--disable-tiny
Disable tiny (sub-quantum-sized) object support. Technically it is not
legal for a malloc implementation to allocate objects with less than
quantum alignment (8 or 16 bytes, depending on architecture), but in
practice it never causes any problems if, for example, 4-byte allocations
are 4-byte-aligned.
--disable-tcache
Disable thread-specific caches for small objects. Objects are cached and
released in bulk, thus reducing the total number of mutex operations. See

View File

@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then
fi
AC_SUBST([enable_prof])
dnl Enable tiny allocations by default.
AC_ARG_ENABLE([tiny],
[AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
[if test "x$enable_tiny" = "xno" ; then
enable_tiny="0"
else
enable_tiny="1"
fi
],
[enable_tiny="1"]
)
if test "x$enable_tiny" = "x1" ; then
AC_DEFINE([JEMALLOC_TINY], [ ])
fi
AC_SUBST([enable_tiny])
dnl Enable thread-specific caching by default.
AC_ARG_ENABLE([tcache],
[AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
@ -934,7 +918,6 @@ AC_MSG_RESULT([prof : ${enable_prof}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}])
AC_MSG_RESULT([tiny : ${enable_tiny}])
AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([fill : ${enable_fill}])
AC_MSG_RESULT([xmalloc : ${enable_xmalloc}])

View File

@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) {
allocations in constant time.</para>
<para>Small objects are managed in groups by page runs. Each run maintains
a frontier and free list to track which regions are in use. Unless
<option>--disable-tiny</option> is specified during configuration,
allocation requests that are no more than half the quantum (8 or 16,
depending on architecture) are rounded up to the nearest power of two that
is at least <code language="C">sizeof(<type>void *</type>)</code>.
Allocation requests that are more than half the quantum, but no more than
the minimum cacheline-multiple size class (see the <link
a frontier and free list to track which regions are in use. Allocation
requests that are no more than half the quantum (8 or 16, depending on
architecture) are rounded up to the nearest power of two that is at least
<code language="C">sizeof(<type>void *</type>)</code>. Allocation requests
that are more than half the quantum, but no more than the minimum
cacheline-multiple size class (see the <link
linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link>
option) are rounded up to the nearest multiple of the quantum. Allocation
requests that are more than the minimum cacheline-multiple size class, but
@ -680,16 +679,6 @@ for (i = 0; i < nbins; i++) {
during build configuration.</para></listitem>
</varlistentry>
<varlistentry>
<term>
<mallctl>config.tiny</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
<listitem><para><option>--disable-tiny</option> was not specified
during build configuration.</para></listitem>
</varlistentry>
<varlistentry>
<term>
<mallctl>config.tls</mallctl>

View File

@ -17,7 +17,7 @@
(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
/* Smallest size class to support. */
#define LG_TINY_MIN LG_SIZEOF_PTR
#define LG_TINY_MIN 3
#define TINY_MIN (1U << LG_TINY_MIN)
/*
@ -418,18 +418,13 @@ extern uint8_t const *small_size2bin;
extern arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */
#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
#else
# define ntbins 0
#endif
/* Number of (2^n)-spaced tiny bins. */
#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
extern unsigned nqbins; /* Number of quantum-spaced bins. */
extern unsigned ncbins; /* Number of cacheline-spaced bins. */
extern unsigned nsbins; /* Number of subpage-spaced bins. */
extern unsigned nbins;
#ifdef JEMALLOC_TINY
# define tspace_max ((size_t)(QUANTUM >> 1))
#endif
#define tspace_max ((size_t)(QUANTUM >> 1))
#define qspace_min QUANTUM
extern size_t qspace_max;
extern size_t cspace_min;
@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
JEMALLOC_INLINE void *
arena_malloc(size_t size, bool zero)
{
tcache_t *tcache = tcache_get();
tcache_t *tcache;
assert(size != 0);
assert(QUANTUM_CEILING(size) <= arena_maxclass);
if (size <= small_maxclass) {
if (tcache != NULL)
if ((tcache = tcache_get()) != NULL)
return (tcache_alloc_small(tcache, size, zero));
else
return (arena_malloc_small(choose_arena(), size, zero));
} else {
if (tcache != NULL && size <= tcache_maxclass)
if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL)
return (tcache_alloc_large(tcache, size, zero));
else
return (arena_malloc_large(choose_arena(), size, zero));

View File

@ -118,13 +118,6 @@ static const bool config_tcache =
false
#endif
;
static const bool config_tiny =
#ifdef JEMALLOC_TINY
true
#else
false
#endif
;
static const bool config_tls =
#ifdef JEMALLOC_TLS
true

View File

@ -79,12 +79,6 @@
/* Use gcc intrinsics for profile backtracing if defined. */
#undef JEMALLOC_PROF_GCC
/*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum.
*/
#undef JEMALLOC_TINY
/*
* JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
* This makes it possible to allocate/deallocate objects without any locking

View File

@ -28,14 +28,7 @@ size_t mspace_mask;
* const_small_size2bin is a static constant lookup table that in the common
* case can be used as-is for small_size2bin.
*/
#if (LG_TINY_MIN == 2)
#define S2B_4(i) i,
#define S2B_8(i) S2B_4(i) S2B_4(i)
#elif (LG_TINY_MIN == 3)
#define S2B_8(i) i,
#else
# error "Unsupported LG_TINY_MIN"
#endif
#define S2B_16(i) S2B_8(i) S2B_8(i)
#define S2B_32(i) S2B_16(i) S2B_16(i)
#define S2B_64(i) S2B_32(i) S2B_32(i)
@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
const uint8_t const_small_size2bin[] = {
#if (LG_QUANTUM == 4)
/* 16-byte quantum **********************/
# ifdef JEMALLOC_TINY
# if (LG_TINY_MIN == 2)
S2B_4(0) /* 4 */
S2B_4(1) /* 8 */
S2B_8(2) /* 16 */
# define S2B_QMIN 2
# elif (LG_TINY_MIN == 3)
S2B_8(0) /* 8 */
S2B_8(1) /* 16 */
# define S2B_QMIN 1
# else
# error "Unsupported LG_TINY_MIN"
# endif
# else
S2B_16(0) /* 16 */
# define S2B_QMIN 0
# endif
S2B_16(S2B_QMIN + 1) /* 32 */
S2B_16(S2B_QMIN + 2) /* 48 */
S2B_16(S2B_QMIN + 3) /* 64 */
@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
# define S2B_CMIN (S2B_QMIN + 8)
#else
/* 8-byte quantum ***********************/
# ifdef JEMALLOC_TINY
# if (LG_TINY_MIN == 2)
S2B_4(0) /* 4 */
S2B_4(1) /* 8 */
# define S2B_QMIN 1
# else
# error "Unsupported LG_TINY_MIN"
# endif
# else
S2B_8(0) /* 8 */
# define S2B_QMIN 0
# endif
S2B_8(S2B_QMIN + 0) /* 8 */
S2B_8(S2B_QMIN + 1) /* 16 */
S2B_8(S2B_QMIN + 2) /* 24 */
S2B_8(S2B_QMIN + 3) /* 32 */
@ -2153,9 +2122,8 @@ small_size2bin_validate(void)
i = 1;
/* Tiny. */
if (config_tiny) {
for (; i < (1U << LG_TINY_MIN); i++) {
size = pow2_ceil(1U << LG_TINY_MIN);
for (; i < TINY_MIN; i++) {
size = TINY_MIN;
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
assert(SMALL_SIZE2BIN(i) == binind);
}
@ -2164,7 +2132,6 @@ small_size2bin_validate(void)
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
assert(SMALL_SIZE2BIN(i) == binind);
}
}
/* Quantum-spaced. */
for (; i <= qspace_max; i++) {
size = QUANTUM_CEILING(i);
@ -2223,9 +2190,8 @@ small_size2bin_init_hard(void)
i = 1;
/* Tiny. */
if (config_tiny) {
for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
size = pow2_ceil(1U << LG_TINY_MIN);
for (; i < TINY_MIN; i += TINY_MIN) {
size = TINY_MIN;
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
@ -2234,7 +2200,6 @@ small_size2bin_init_hard(void)
binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
CUSTOM_SMALL_SIZE2BIN(i) = binind;
}
}
/* Quantum-spaced. */
for (; i <= qspace_max; i += TINY_MIN) {
size = QUANTUM_CEILING(i);
@ -2398,17 +2363,12 @@ bin_info_init(void)
prev_run_size = PAGE_SIZE;
i = 0;
/* (2^n)-spaced tiny bins. */
if (config_tiny) {
for (; i < ntbins; i++) {
bin_info = &arena_bin_info[i];
bin_info->reg_size = (1U << (LG_TINY_MIN + i));
prev_run_size = bin_info_run_size_calc(bin_info,
prev_run_size);
bitmap_info_init(&bin_info->bitmap_info,
bin_info->nregs);
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
}
/* Quantum-spaced bins. */
for (; i < ntbins + nqbins; i++) {
bin_info = &arena_bin_info[i];
@ -2416,7 +2376,6 @@ bin_info_init(void)
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
/* Cacheline-spaced bins. */
for (; i < ntbins + nqbins + ncbins; i++) {
bin_info = &arena_bin_info[i];
@ -2425,7 +2384,6 @@ bin_info_init(void)
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
}
/* Subpage-spaced bins. */
for (; i < nbins; i++) {
bin_info = &arena_bin_info[i];
@ -2456,7 +2414,6 @@ arena_boot(void)
assert(sspace_min < PAGE_SIZE);
sspace_max = PAGE_SIZE - SUBPAGE;
if (config_tiny)
assert(LG_QUANTUM >= LG_TINY_MIN);
assert(ntbins <= LG_QUANTUM);
nqbins = qspace_max >> LG_QUANTUM;

View File

@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind)
CTL_PROTO(config_stats)
CTL_PROTO(config_sysv)
CTL_PROTO(config_tcache)
CTL_PROTO(config_tiny)
CTL_PROTO(config_tls)
CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
@ -199,7 +198,6 @@ static const ctl_node_t config_node[] = {
{NAME("stats"), CTL(config_stats)},
{NAME("sysv"), CTL(config_sysv)},
{NAME("tcache"), CTL(config_tcache)},
{NAME("tiny"), CTL(config_tiny)},
{NAME("tls"), CTL(config_tls)},
{NAME("xmalloc"), CTL(config_xmalloc)}
};
@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
int ret; \
bool oldval; \
\
if (n == false) \
return (ENOENT); \
READONLY(); \
oldval = n; \
READ(oldval, bool); \
@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
CTL_RO_BOOL_CONFIG_GEN(config_stats)
CTL_RO_BOOL_CONFIG_GEN(config_sysv)
CTL_RO_BOOL_CONFIG_GEN(config_tcache)
CTL_RO_BOOL_CONFIG_GEN(config_tiny)
CTL_RO_BOOL_CONFIG_GEN(config_tls)
CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t)
CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t)
CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t)
CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t)
CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t)
CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t)
CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t)
CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t)
CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)