Optimize [nmd]alloc() fast paths.

Optimize [nmd]alloc() fast paths such that the (flags == 0) case is
streamlined, flags decoding only happens to the minimum degree
necessary, and no conditionals are repeated.
This commit is contained in:
Jason Evans
2014-09-07 14:40:19 -07:00
parent c21b05ea09
commit b718cf77e9
7 changed files with 172 additions and 131 deletions

View File

@@ -2108,7 +2108,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
copysize = (size < oldsize) ? size : oldsize;
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
memcpy(ret, ptr, copysize);
iqalloct(ptr, try_tcache_dalloc);
iqalloc(ptr, try_tcache_dalloc);
return (ret);
}

View File

@@ -129,7 +129,7 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(ret, ptr, copysize);
iqalloct(ptr, try_tcache_dalloc);
iqalloc(ptr, try_tcache_dalloc);
return (ret);
}

View File

@@ -870,7 +870,7 @@ imalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
p = imalloc(SMALL_MAXCLASS+1);
p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -950,9 +950,8 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment,
false);
assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -1077,7 +1076,7 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
p = icalloc(SMALL_MAXCLASS+1);
p = icalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -1174,12 +1173,12 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
p = iralloc(oldptr, LARGE_MINCLASS, 0, false);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
p = iralloc(oldptr, usize, 0, 0, false);
p = iralloc(oldptr, usize, 0, false);
return (p);
}
@@ -1194,7 +1193,7 @@ irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx)
if ((uintptr_t)tctx != (uintptr_t)1U)
p = irealloc_prof_sample(oldptr, usize, tctx);
else
p = iralloc(oldptr, usize, 0, 0, false);
p = iralloc(oldptr, usize, 0, false);
if (p == NULL)
return (NULL);
prof_realloc(p, usize, tctx, old_usize, old_tctx);
@@ -1203,7 +1202,7 @@ irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx)
}
JEMALLOC_INLINE_C void
ifree(void *ptr)
ifree(void *ptr, bool try_tcache)
{
size_t usize;
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
@@ -1220,7 +1219,7 @@ ifree(void *ptr)
thread_allocated_tsd_get()->deallocated += usize;
if (config_valgrind && in_valgrind)
rzsize = p2rz(ptr);
iqalloc(ptr);
iqalloc(ptr, try_tcache);
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}
@@ -1236,7 +1235,7 @@ je_realloc(void *ptr, size_t size)
if (ptr != NULL) {
/* realloc(ptr, 0) is equivalent to free(ptr). */
UTRACE(ptr, 0, 0);
ifree(ptr);
ifree(ptr, true);
return (NULL);
}
size = 1;
@@ -1261,7 +1260,7 @@ je_realloc(void *ptr, size_t size)
} else {
if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(size);
ret = iralloc(ptr, size, 0, 0, false);
ret = iralloc(ptr, size, 0, false);
}
} else {
/* realloc(NULL, size) is equivalent to malloc(size). */
@@ -1295,7 +1294,7 @@ je_free(void *ptr)
UTRACE(ptr, 0, 0);
if (ptr != NULL)
ifree(ptr);
ifree(ptr, true);
}
/*
@@ -1363,99 +1362,153 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
* Begin non-standard functions.
*/
JEMALLOC_ALWAYS_INLINE_C void
imallocx_flags_decode_hard(size_t size, int flags, size_t *usize,
size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena)
{
if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
*alignment = 0;
*usize = s2u(size);
} else {
*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
*usize = sa2u(size, *alignment);
}
*zero = MALLOCX_ZERO_GET(flags);
if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
*try_tcache = false;
*arena = arenas[arena_ind];
} else {
*try_tcache = true;
*arena = NULL;
}
}
JEMALLOC_ALWAYS_INLINE_C void
imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment,
bool *zero, bool *try_tcache, arena_t **arena)
{
if (flags == 0) {
*usize = s2u(size);
assert(usize != 0);
*alignment = 0;
*zero = false;
*try_tcache = true;
*arena = NULL;
} else {
imallocx_flags_decode_hard(size, flags, usize, alignment, zero,
try_tcache, arena);
}
}
JEMALLOC_ALWAYS_INLINE_C void *
imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache,
imallocx_flags(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena)
{
assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize,
alignment)));
if (alignment != 0)
return (ipalloct(usize, alignment, zero, try_tcache, arena));
else if (zero)
if (zero)
return (icalloct(usize, try_tcache, arena));
else
return (imalloct(usize, try_tcache, arena));
return (imalloct(usize, try_tcache, arena));
}
JEMALLOC_ALWAYS_INLINE_C void *
imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment,
bool zero, bool try_tcache, arena_t *arena)
{
if (flags == 0)
return (imalloc(size));
return (imallocx_flags(usize, alignment, zero, try_tcache, arena));
}
static void *
imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena, prof_tctx_t *tctx)
imallocx_prof_sample(size_t size, int flags, size_t usize, size_t alignment,
bool zero, bool try_tcache, arena_t *arena)
{
void *p;
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
size_t usize_promoted = (alignment == 0) ?
s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment);
assert(usize_promoted != 0);
p = imallocx(usize_promoted, alignment, zero, try_tcache,
arena);
assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
p = imallocx(usize, alignment, zero, try_tcache, arena);
} else {
p = imallocx_maybe_flags(size, flags, usize, alignment, zero,
try_tcache, arena);
}
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena, prof_tctx_t *tctx)
imallocx_prof(size_t size, int flags, size_t *usize)
{
void *p;
size_t alignment;
bool zero;
bool try_tcache;
arena_t *arena;
prof_tctx_t *tctx;
if ((uintptr_t)tctx != (uintptr_t)1U) {
p = imallocx_prof_sample(usize, alignment, zero, try_tcache,
arena, tctx);
imallocx_flags_decode(size, flags, usize, &alignment, &zero,
&try_tcache, &arena);
tctx = prof_alloc_prep(*usize);
if ((uintptr_t)tctx == (uintptr_t)1U) {
p = imallocx_maybe_flags(size, flags, *usize, alignment, zero,
try_tcache, arena);
} else if ((uintptr_t)tctx > (uintptr_t)1U) {
p = imallocx_prof_sample(size, flags, *usize, alignment, zero,
try_tcache, arena);
} else
p = imallocx(usize, alignment, zero, try_tcache, arena);
p = NULL;
if (p == NULL)
return (NULL);
prof_malloc(p, usize, tctx);
prof_malloc(p, *usize, tctx);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
imallocx_no_prof(size_t size, int flags, size_t *usize)
{
size_t alignment;
bool zero;
bool try_tcache;
arena_t *arena;
if (flags == 0) {
if (config_stats || (config_valgrind && in_valgrind))
*usize = s2u(size);
return (imalloc(size));
}
imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero,
&try_tcache, &arena);
return (imallocx_flags(*usize, alignment, zero, try_tcache, arena));
}
void *
je_mallocx(size_t size, int flags)
{
void *p;
size_t usize;
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
bool zero = flags & MALLOCX_ZERO;
unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
arena_t *arena;
bool try_tcache;
assert(size != 0);
if (malloc_init())
goto label_oom;
if (arena_ind != UINT_MAX) {
arena = arenas[arena_ind];
try_tcache = false;
} else {
arena = NULL;
try_tcache = true;
}
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
assert(usize != 0);
if (config_prof && opt_prof) {
prof_tctx_t *tctx;
tctx = prof_alloc_prep(usize);
p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
tctx);
} else
p = imallocx(usize, alignment, zero, try_tcache, arena);
if (config_prof && opt_prof)
p = imallocx_prof(size, flags, &usize);
else
p = imallocx_no_prof(size, flags, &usize);
if (p == NULL)
goto label_oom;
@@ -1464,7 +1517,7 @@ je_mallocx(size_t size, int flags)
thread_allocated_tsd_get()->allocated += usize;
}
UTRACE(0, size, p);
JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero);
JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
return (p);
label_oom:
if (config_xmalloc && opt_xmalloc) {
@@ -1485,15 +1538,14 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero,
p = iralloct(oldptr, LARGE_MINCLASS, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else {
p = iralloct(oldptr, size, 0, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena);
p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc,
try_tcache_dalloc, arena);
}
return (p);
@@ -1512,8 +1564,8 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, tctx);
else {
p = iralloct(oldptr, size, 0, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena);
p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc,
try_tcache_dalloc, arena);
}
if (p == NULL)
return (NULL);
@@ -1540,10 +1592,8 @@ je_rallocx(void *ptr, size_t size, int flags)
void *p;
size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
bool try_tcache_alloc, try_tcache_dalloc;
arena_t *arena;
@@ -1552,7 +1602,8 @@ je_rallocx(void *ptr, size_t size, int flags)
assert(malloc_initialized || IS_INITIALIZER);
malloc_thread_init();
if (arena_ind != UINT_MAX) {
if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk;
try_tcache_alloc = false;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1582,7 +1633,7 @@ je_rallocx(void *ptr, size_t size, int flags)
if (p == NULL)
goto label_oom;
} else {
p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc,
p = iralloct(ptr, size, alignment, zero, try_tcache_alloc,
try_tcache_dalloc, arena);
if (p == NULL)
goto label_oom;
@@ -1677,10 +1728,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
{
size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
arena_t *arena;
assert(ptr != NULL);
@@ -1689,9 +1738,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
assert(malloc_initialized || IS_INITIALIZER);
malloc_thread_init();
if (arena_ind != UINT_MAX)
if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena = arenas[arena_ind];
else
} else
arena = NULL;
old_usize = isalloc(ptr, config_prof);
@@ -1753,15 +1803,13 @@ je_sallocx(const void *ptr, int flags)
void
je_dallocx(void *ptr, int flags)
{
size_t usize;
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
bool try_tcache;
assert(ptr != NULL);
assert(malloc_initialized || IS_INITIALIZER);
if (arena_ind != UINT_MAX) {
if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
try_tcache = (chunk == ptr || chunk->arena !=
arenas[arena_ind]);
@@ -1769,34 +1817,25 @@ je_dallocx(void *ptr, int flags)
try_tcache = true;
UTRACE(ptr, 0, 0);
if (config_stats || config_valgrind)
usize = isalloc(ptr, config_prof);
if (config_prof && opt_prof) {
if (config_stats == false && config_valgrind == false)
usize = isalloc(ptr, config_prof);
prof_free(ptr, usize);
}
if (config_stats)
thread_allocated_tsd_get()->deallocated += usize;
if (config_valgrind && in_valgrind)
rzsize = p2rz(ptr);
iqalloct(ptr, try_tcache);
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
ifree(ptr, try_tcache);
}
size_t
je_nallocx(size_t size, int flags)
{
size_t usize;
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
assert(size != 0);
if (malloc_init())
return (0);
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
if ((flags & MALLOCX_LG_ALIGN_MASK) == 0)
usize = s2u(size);
else {
size_t alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
usize = sa2u(size, alignment);
}
assert(usize != 0);
return (usize);
}