Optimize the fast paths of calloc() and [m,d,sd]allocx().

This is a broader application of optimizations to malloc() and free() in
f4a0f32d34 (Fast-path improvement:
reduce # of branches and unnecessary operations.).

This resolves #321.
This commit is contained in:
Jason Evans 2016-05-06 12:16:00 -07:00
parent c2f970c32b
commit 3ef51d7f73
6 changed files with 137 additions and 243 deletions

View File

@ -680,7 +680,8 @@ void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
arena_t *arena_aalloc(const void *ptr); arena_t *arena_aalloc(const void *ptr);
size_t arena_salloc(tsd_t *tsd, const void *ptr, bool demote); size_t arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
bool slow_path);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@ -1446,7 +1447,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
} }
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
bool slow_path)
{ {
arena_chunk_t *chunk; arena_chunk_t *chunk;
@ -1473,7 +1475,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
if (likely(tcache != NULL)) { if (likely(tcache != NULL)) {
szind_t binind = size2index(size); szind_t binind = size2index(size);
tcache_dalloc_small(tsd, tcache, ptr, binind, tcache_dalloc_small(tsd, tcache, ptr, binind,
true); slow_path);
} else { } else {
size_t pageind = ((uintptr_t)ptr - size_t pageind = ((uintptr_t)ptr -
(uintptr_t)chunk) >> LG_PAGE; (uintptr_t)chunk) >> LG_PAGE;
@ -1486,7 +1488,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
if (likely(tcache != NULL) && size <= tcache_maxclass) { if (likely(tcache != NULL) && size <= tcache_maxclass) {
tcache_dalloc_large(tsd, tcache, ptr, size, tcache_dalloc_large(tsd, tcache, ptr, size,
true); slow_path);
} else { } else {
arena_dalloc_large(tsd, extent_node_arena_get( arena_dalloc_large(tsd, extent_node_arena_get(
&chunk->node), chunk, ptr); &chunk->node), chunk, ptr);

View File

@ -895,12 +895,8 @@ arena_t *iaalloc(const void *ptr);
size_t isalloc(tsd_t *tsd, const void *ptr, bool demote); size_t isalloc(tsd_t *tsd, const void *ptr, bool demote);
void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
arena_t *arena); bool slow_path);
void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
arena_t *arena);
void *icalloc(tsd_t *tsd, size_t size, szind_t ind);
void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, bool is_metadata, arena_t *arena); tcache_t *tcache, bool is_metadata, arena_t *arena);
void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
@ -911,11 +907,12 @@ size_t u2rz(size_t usize);
size_t p2rz(tsd_t *tsd, const void *ptr); size_t p2rz(tsd_t *tsd, const void *ptr);
void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
bool slow_path); bool slow_path);
void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
void idalloc(tsd_t *tsd, void *ptr); void idalloc(tsd_t *tsd, void *ptr);
void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); bool slow_path);
void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
bool slow_path);
void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
size_t extra, size_t alignment, bool zero, tcache_t *tcache, size_t extra, size_t alignment, bool zero, tcache_t *tcache,
arena_t *arena); arena_t *arena);
@ -972,35 +969,13 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
} }
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *
imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
{ {
return (iallocztm(tsd, size, ind, false, tcache, false, arena, true)); return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false,
}
JEMALLOC_ALWAYS_INLINE void *
imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
{
return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
NULL, slow_path)); NULL, slow_path));
} }
JEMALLOC_ALWAYS_INLINE void *
icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
{
return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
}
JEMALLOC_ALWAYS_INLINE void *
icalloc(tsd_t *tsd, size_t size, szind_t ind)
{
return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
NULL, true));
}
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *
ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, bool is_metadata, arena_t *arena) tcache_t *tcache, bool is_metadata, arena_t *arena)
@ -1091,13 +1066,6 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
arena_dalloc(tsd, ptr, tcache, slow_path); arena_dalloc(tsd, ptr, tcache, slow_path);
} }
JEMALLOC_ALWAYS_INLINE void
idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
idalloctm(tsd, ptr, tcache, false, true);
}
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
idalloc(tsd_t *tsd, void *ptr) idalloc(tsd_t *tsd, void *ptr)
{ {
@ -1116,20 +1084,20 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
} }
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
{ {
arena_sdalloc(tsd, ptr, size, tcache); arena_sdalloc(tsd, ptr, size, tcache, slow_path);
} }
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
{ {
if (config_fill && unlikely(opt_quarantine)) if (slow_path && config_fill && unlikely(opt_quarantine))
quarantine(tsd, ptr); quarantine(tsd, ptr);
else else
isdalloct(tsd, ptr, size, tcache); isdalloct(tsd, ptr, size, tcache, slow_path);
} }
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *
@ -1160,7 +1128,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
*/ */
copysize = (size < oldsize) ? size : oldsize; copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize); memcpy(p, ptr, copysize);
isqalloc(tsd, ptr, oldsize, tcache); isqalloc(tsd, ptr, oldsize, tcache, true);
return (p); return (p);
} }

View File

@ -291,15 +291,11 @@ huge_ralloc
huge_ralloc_no_move huge_ralloc_no_move
huge_salloc huge_salloc
iaalloc iaalloc
ialloc
iallocztm iallocztm
iarena_cleanup iarena_cleanup
icalloc
icalloct
idalloc idalloc
idalloct
idalloctm idalloctm
imalloc
imalloct
in_valgrind in_valgrind
index2size index2size
index2size_compute index2size_compute

View File

@ -3303,7 +3303,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
copysize = (usize < oldsize) ? usize : oldsize; copysize = (usize < oldsize) ? usize : oldsize;
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
memcpy(ret, ptr, copysize); memcpy(ret, ptr, copysize);
isqalloc(tsd, ptr, oldsize, tcache); isqalloc(tsd, ptr, oldsize, tcache, true);
} else { } else {
ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
zero, tcache); zero, tcache);

View File

@ -364,7 +364,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
copysize = (usize < oldsize) ? usize : oldsize; copysize = (usize < oldsize) ? usize : oldsize;
memcpy(ret, ptr, copysize); memcpy(ret, ptr, copysize);
isqalloc(tsd, ptr, oldsize, tcache); isqalloc(tsd, ptr, oldsize, tcache, true);
return (ret); return (ret);
} }

View File

@ -70,10 +70,10 @@ typedef enum {
} malloc_init_t; } malloc_init_t;
static malloc_init_t malloc_init_state = malloc_init_uninitialized; static malloc_init_t malloc_init_state = malloc_init_uninitialized;
/* 0 should be the common case. Set to true to trigger initialization. */ /* False should be the common case. Set to true to trigger initialization. */
static bool malloc_slow = true; static bool malloc_slow = true;
/* When malloc_slow != 0, set the corresponding bits for sanity check. */ /* When malloc_slow is true, set the corresponding bits for sanity check. */
enum { enum {
flag_opt_junk_alloc = (1U), flag_opt_junk_alloc = (1U),
flag_opt_junk_free = (1U << 1), flag_opt_junk_free = (1U << 1),
@ -1443,7 +1443,7 @@ malloc_init_hard(void)
*/ */
static void * static void *
imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
prof_tctx_t *tctx, bool slow_path) prof_tctx_t *tctx, bool slow_path)
{ {
void *p; void *p;
@ -1452,27 +1452,27 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
szind_t ind_large = size2index(LARGE_MINCLASS); szind_t ind_large = size2index(LARGE_MINCLASS);
p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path); p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
arena_prof_promoted(tsd, p, usize); arena_prof_promoted(tsd, p, usize);
} else } else
p = imalloc(tsd, usize, ind, slow_path); p = ialloc(tsd, usize, ind, zero, slow_path);
return (p); return (p);
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path) ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
{ {
void *p; void *p;
prof_tctx_t *tctx; prof_tctx_t *tctx;
tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path); p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
else else
p = imalloc(tsd, usize, ind, slow_path); p = ialloc(tsd, usize, ind, zero, slow_path);
if (unlikely(p == NULL)) { if (unlikely(p == NULL)) {
prof_alloc_rollback(tsd, tctx, true); prof_alloc_rollback(tsd, tctx, true);
return (NULL); return (NULL);
@ -1482,16 +1482,21 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
return (p); return (p);
} }
/*
* ialloc_body() is inlined so that fast and slow paths are generated separately
* with statically known slow_path.
*/
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
{ {
szind_t ind; szind_t ind;
if (slow_path && unlikely(malloc_init())) if (slow_path && unlikely(malloc_init())) {
*tsd = NULL;
return (NULL); return (NULL);
}
*tsd = tsd_fetch(); *tsd = tsd_fetch();
witness_assert_lockless(*tsd); witness_assert_lockless(*tsd);
ind = size2index(size); ind = size2index(size);
@ -1505,26 +1510,30 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
} }
if (config_prof && opt_prof) if (config_prof && opt_prof)
return (imalloc_prof(*tsd, *usize, ind, slow_path)); return (ialloc_prof(*tsd, *usize, ind, zero, slow_path));
return (imalloc(*tsd, size, ind, slow_path)); return (ialloc(*tsd, size, ind, zero, slow_path));
} }
JEMALLOC_ALWAYS_INLINE_C void JEMALLOC_ALWAYS_INLINE_C void
imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path) ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
bool update_errno, bool slow_path)
{ {
if (unlikely(ret == NULL)) { if (unlikely(ret == NULL)) {
if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in malloc(): " malloc_printf("<jemalloc>: Error in %s(): out of "
"out of memory\n"); "memory\n", func);
abort(); abort();
} }
set_errno(ENOMEM); if (update_errno)
set_errno(ENOMEM);
} }
if (config_stats && likely(ret != NULL)) { if (config_stats && likely(ret != NULL)) {
assert(usize == isalloc(tsd, ret, config_prof)); assert(usize == isalloc(tsd, ret, config_prof));
*tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_allocatedp_get(tsd) += usize;
} }
witness_assert_lockless(tsd);
} }
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@ -1540,20 +1549,15 @@ je_malloc(size_t size)
size = 1; size = 1;
if (likely(!malloc_slow)) { if (likely(!malloc_slow)) {
/* ret = ialloc_body(size, false, &tsd, &usize, false);
* imalloc_body() is inlined so that fast and slow paths are ialloc_post_check(ret, tsd, usize, "malloc", true, false);
* generated separately with statically known slow_path.
*/
ret = imalloc_body(size, &tsd, &usize, false);
imalloc_post_check(ret, tsd, usize, false);
} else { } else {
ret = imalloc_body(size, &tsd, &usize, true); ret = ialloc_body(size, false, &tsd, &usize, true);
imalloc_post_check(ret, tsd, usize, true); ialloc_post_check(ret, tsd, usize, "malloc", true, true);
UTRACE(0, size, ret); UTRACE(0, size, ret);
JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false); JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
} }
witness_assert_lockless(tsd);
return (ret); return (ret);
} }
@ -1695,45 +1699,6 @@ je_aligned_alloc(size_t alignment, size_t size)
return (ret); return (ret);
} }
static void *
icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
{
void *p;
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
szind_t ind_large = size2index(LARGE_MINCLASS);
p = icalloc(tsd, LARGE_MINCLASS, ind_large);
if (p == NULL)
return (NULL);
arena_prof_promoted(tsd, p, usize);
} else
p = icalloc(tsd, usize, ind);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
{
void *p;
prof_tctx_t *tctx;
tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
p = icalloc_prof_sample(tsd, usize, ind, tctx);
else
p = icalloc(tsd, usize, ind);
if (unlikely(p == NULL)) {
prof_alloc_rollback(tsd, tctx, true);
return (NULL);
}
prof_malloc(tsd, p, usize, tctx);
return (p);
}
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
void JEMALLOC_NOTHROW * void JEMALLOC_NOTHROW *
JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2) JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
@ -1742,68 +1707,33 @@ je_calloc(size_t num, size_t size)
void *ret; void *ret;
tsd_t *tsd; tsd_t *tsd;
size_t num_size; size_t num_size;
szind_t ind;
size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t usize JEMALLOC_CC_SILENCE_INIT(0);
if (unlikely(malloc_init())) {
tsd = NULL;
num_size = 0;
ret = NULL;
goto label_return;
}
tsd = tsd_fetch();
witness_assert_lockless(tsd);
num_size = num * size; num_size = num * size;
if (unlikely(num_size == 0)) { if (unlikely(num_size == 0)) {
if (num == 0 || size == 0) if (num == 0 || size == 0)
num_size = 1; num_size = 1;
else { else
ret = NULL; num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
goto label_return;
}
/* /*
* Try to avoid division here. We know that it isn't possible to * Try to avoid division here. We know that it isn't possible to
* overflow during multiplication if neither operand uses any of the * overflow during multiplication if neither operand uses any of the
* most significant half of the bits in a size_t. * most significant half of the bits in a size_t.
*/ */
} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
2))) && (num_size / size != num))) { 2))) && (num_size / size != num)))
/* size_t overflow. */ num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
ret = NULL;
goto label_return;
}
ind = size2index(num_size); if (likely(!malloc_slow)) {
if (unlikely(ind >= NSIZES)) { ret = ialloc_body(num_size, true, &tsd, &usize, false);
ret = NULL; ialloc_post_check(ret, tsd, usize, "calloc", true, false);
goto label_return;
}
if (config_prof && opt_prof) {
usize = index2size(ind);
ret = icalloc_prof(tsd, usize, ind);
} else { } else {
if (config_stats || (config_valgrind && unlikely(in_valgrind))) ret = ialloc_body(num_size, true, &tsd, &usize, true);
usize = index2size(ind); ialloc_post_check(ret, tsd, usize, "calloc", true, true);
ret = icalloc(tsd, num_size, ind); UTRACE(0, num_size, ret);
JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
} }
label_return:
if (unlikely(ret == NULL)) {
if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in calloc(): out of "
"memory\n");
abort();
}
set_errno(ENOMEM);
}
if (config_stats && likely(ret != NULL)) {
assert(usize == isalloc(tsd, ret, config_prof));
*tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, num_size, ret);
JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true);
witness_assert_lockless(tsd);
return (ret); return (ret);
} }
@ -1880,7 +1810,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
} }
JEMALLOC_INLINE_C void JEMALLOC_INLINE_C void
isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
{ {
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
@ -1895,7 +1825,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
*tsd_thread_deallocatedp_get(tsd) += usize; *tsd_thread_deallocatedp_get(tsd) += usize;
if (config_valgrind && unlikely(in_valgrind)) if (config_valgrind && unlikely(in_valgrind))
rzsize = p2rz(tsd, ptr); rzsize = p2rz(tsd, ptr);
isqalloc(tsd, ptr, usize, tcache); isqalloc(tsd, ptr, usize, tcache, slow_path);
JEMALLOC_VALGRIND_FREE(ptr, rzsize); JEMALLOC_VALGRIND_FREE(ptr, rzsize);
} }
@ -1946,9 +1876,9 @@ je_realloc(void *ptr, size_t size)
} else { } else {
/* realloc(NULL, size) is equivalent to malloc(size). */ /* realloc(NULL, size) is equivalent to malloc(size). */
if (likely(!malloc_slow)) if (likely(!malloc_slow))
ret = imalloc_body(size, &tsd, &usize, false); ret = ialloc_body(size, false, &tsd, &usize, false);
else else
ret = imalloc_body(size, &tsd, &usize, true); ret = ialloc_body(size, false, &tsd, &usize, true);
} }
if (unlikely(ret == NULL)) { if (unlikely(ret == NULL)) {
@ -1978,6 +1908,7 @@ je_free(void *ptr)
UTRACE(ptr, 0, 0); UTRACE(ptr, 0, 0);
if (likely(ptr != NULL)) { if (likely(ptr != NULL)) {
tsd_t *tsd = tsd_fetch(); tsd_t *tsd = tsd_fetch();
witness_assert_lockless(tsd);
if (likely(!malloc_slow)) if (likely(!malloc_slow))
ifree(tsd, ptr, tcache_get(tsd, false), false); ifree(tsd, ptr, tcache_get(tsd, false), false);
else else
@ -2056,7 +1987,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
*/ */
JEMALLOC_ALWAYS_INLINE_C bool JEMALLOC_ALWAYS_INLINE_C bool
imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
{ {
@ -2087,29 +2018,9 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
return (false); return (false);
} }
JEMALLOC_ALWAYS_INLINE_C bool
imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
{
if (likely(flags == 0)) {
*usize = s2u(size);
if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
return (true);
*alignment = 0;
*zero = false;
*tcache = tcache_get(tsd, true);
*arena = NULL;
return (false);
} else {
return (imallocx_flags_decode_hard(tsd, size, flags, usize,
alignment, zero, tcache, arena));
}
}
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, arena_t *arena) tcache_t *tcache, arena_t *arena, bool slow_path)
{ {
szind_t ind; szind_t ind;
@ -2117,14 +2028,13 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
ind = size2index(usize); ind = size2index(usize);
assert(ind < NSIZES); assert(ind < NSIZES);
if (unlikely(zero)) return (iallocztm(tsd, usize, ind, zero, tcache, false, arena,
return (icalloct(tsd, usize, ind, tcache, arena)); slow_path));
return (imalloct(tsd, usize, ind, tcache, arena));
} }
static void * static void *
imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, arena_t *arena) tcache_t *tcache, arena_t *arena, bool slow_path)
{ {
void *p; void *p;
@ -2132,18 +2042,20 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
arena); arena, slow_path);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
arena_prof_promoted(tsd, p, usize); arena_prof_promoted(tsd, p, usize);
} else } else {
p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena,
slow_path);
}
return (p); return (p);
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
{ {
void *p; void *p;
size_t alignment; size_t alignment;
@ -2157,10 +2069,11 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
return (NULL); return (NULL);
tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
if (likely((uintptr_t)tctx == (uintptr_t)1U)) if (likely((uintptr_t)tctx == (uintptr_t)1U))
p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
slow_path);
else if ((uintptr_t)tctx > (uintptr_t)1U) { else if ((uintptr_t)tctx > (uintptr_t)1U) {
p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
arena); arena, slow_path);
} else } else
p = NULL; p = NULL;
if (unlikely(p == NULL)) { if (unlikely(p == NULL)) {
@ -2174,7 +2087,8 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
bool slow_path)
{ {
void *p; void *p;
size_t alignment; size_t alignment;
@ -2182,24 +2096,50 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
tcache_t *tcache; tcache_t *tcache;
arena_t *arena; arena_t *arena;
if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
&zero, &tcache, &arena)))
return (NULL);
p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
slow_path);
assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
bool slow_path)
{
if (slow_path && unlikely(malloc_init())) {
*tsd = NULL;
return (NULL);
}
*tsd = tsd_fetch();
witness_assert_lockless(*tsd);
if (likely(flags == 0)) { if (likely(flags == 0)) {
szind_t ind = size2index(size); szind_t ind = size2index(size);
if (unlikely(ind >= NSIZES)) if (unlikely(ind >= NSIZES))
return (NULL); return (NULL);
if (config_stats || (config_valgrind && if (config_stats || (config_prof && opt_prof) || (slow_path &&
unlikely(in_valgrind))) { config_valgrind && unlikely(in_valgrind))) {
*usize = index2size(ind); *usize = index2size(ind);
assert(*usize > 0 && *usize <= HUGE_MAXCLASS); assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
} }
return (imalloc(tsd, size, ind, true));
if (config_prof && opt_prof) {
return (ialloc_prof(*tsd, *usize, ind, false,
slow_path));
}
return (ialloc(*tsd, size, ind, false, slow_path));
} }
if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, if (config_prof && opt_prof)
&alignment, &zero, &tcache, &arena))) return (imallocx_prof(*tsd, size, flags, usize, slow_path));
return (NULL);
p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); return (imallocx_no_prof(*tsd, size, flags, usize, slow_path));
assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
return (p);
} }
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@ -2213,36 +2153,18 @@ je_mallocx(size_t size, int flags)
assert(size != 0); assert(size != 0);
if (unlikely(malloc_init())) { if (likely(!malloc_slow)) {
tsd = NULL; p = imallocx_body(size, flags, &tsd, &usize, false);
goto label_oom; ialloc_post_check(p, tsd, usize, "mallocx", false, false);
} else {
p = imallocx_body(size, flags, &tsd, &usize, true);
ialloc_post_check(p, tsd, usize, "mallocx", false, true);
UTRACE(0, size, p);
JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize,
MALLOCX_ZERO_GET(flags));
} }
tsd = tsd_fetch();
witness_assert_lockless(tsd);
if (config_prof && opt_prof)
p = imallocx_prof(tsd, size, flags, &usize);
else
p = imallocx_no_prof(tsd, size, flags, &usize);
if (unlikely(p == NULL))
goto label_oom;
if (config_stats) {
assert(usize == isalloc(tsd, p, config_prof));
*tsd_thread_allocatedp_get(tsd) += usize;
}
UTRACE(0, size, p);
JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags));
witness_assert_lockless(tsd);
return (p); return (p);
label_oom:
if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
abort();
}
UTRACE(0, size, 0);
witness_assert_lockless(tsd);
return (NULL);
} }
static void * static void *
@ -2567,7 +2489,10 @@ je_dallocx(void *ptr, int flags)
tcache = tcache_get(tsd, false); tcache = tcache_get(tsd, false);
UTRACE(ptr, 0, 0); UTRACE(ptr, 0, 0);
ifree(tsd_fetch(), ptr, tcache, true); if (likely(!malloc_slow))
ifree(tsd, ptr, tcache, false);
else
ifree(tsd, ptr, tcache, true);
witness_assert_lockless(tsd); witness_assert_lockless(tsd);
} }
@ -2609,7 +2534,10 @@ je_sdallocx(void *ptr, size_t size, int flags)
tcache = tcache_get(tsd, false); tcache = tcache_get(tsd, false);
UTRACE(ptr, 0, 0); UTRACE(ptr, 0, 0);
isfree(tsd, ptr, usize, tcache); if (likely(!malloc_slow))
isfree(tsd, ptr, usize, tcache, false);
else
isfree(tsd, ptr, usize, tcache, true);
witness_assert_lockless(tsd); witness_assert_lockless(tsd);
} }