From 3ef51d7f733ac6432e80fa902a779ab5b98d74f6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 May 2016 12:16:00 -0700 Subject: [PATCH] Optimize the fast paths of calloc() and [m,d,sd]allocx(). This is a broader application of optimizations to malloc() and free() in f4a0f32d340985de477bbe329ecdaecd69ed1055 (Fast-path improvement: reduce # of branches and unnecessary operations.). This resolves #321. --- include/jemalloc/internal/arena.h | 10 +- .../jemalloc/internal/jemalloc_internal.h.in | 60 +--- include/jemalloc/internal/private_symbols.txt | 6 +- src/arena.c | 2 +- src/huge.c | 2 +- src/jemalloc.c | 300 +++++++----------- 6 files changed, 137 insertions(+), 243 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 53e6b3ad..debb43f3 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -680,7 +680,8 @@ void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(tsd_t *tsd, const void *ptr, bool demote); void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); -void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -1446,7 +1447,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) } JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path) { arena_chunk_t *chunk; @@ -1473,7 +1475,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (likely(tcache != NULL)) { szind_t binind = size2index(size); tcache_dalloc_small(tsd, tcache, ptr, binind, - true); + slow_path); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1486,7 +1488,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (likely(tcache != NULL) && size <= tcache_maxclass) { tcache_dalloc_large(tsd, tcache, ptr, size, - true); + slow_path); } else { arena_dalloc_large(tsd, extent_node_arena_get( &chunk->node), chunk, ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 62d5da29..fe504d8d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -895,12 +895,8 @@ arena_t *iaalloc(const void *ptr); size_t isalloc(tsd_t *tsd, const void *ptr, bool demote); void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); -void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, - arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path); -void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, - arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size, szind_t ind); +void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, + bool slow_path); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, @@ -911,11 +907,12 @@ size_t u2rz(size_t usize); size_t p2rz(tsd_t *tsd, const void *ptr); void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, bool slow_path); -void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, + bool slow_path); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); @@ -972,35 +969,13 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { - return (iallocztm(tsd, size, ind, false, tcache, false, arena, true)); -} - -JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path) -{ - - return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false, + return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false, NULL, slow_path)); } -JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) -{ - - return (iallocztm(tsd, size, ind, true, tcache, false, arena, true)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size, szind_t ind) -{ - - return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false, - NULL, true)); -} - JEMALLOC_ALWAYS_INLINE void * ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena) @@ -1091,13 +1066,6 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, arena_dalloc(tsd, ptr, tcache, slow_path); } -JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) -{ - - idalloctm(tsd, ptr, tcache, false, true); -} - JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { @@ -1116,20 +1084,20 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) } JEMALLOC_ALWAYS_INLINE void -isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) { - arena_sdalloc(tsd, ptr, size, tcache); + arena_sdalloc(tsd, ptr, size, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - isdalloct(tsd, ptr, size, tcache); + isdalloct(tsd, ptr, size, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void * @@ -1160,7 +1128,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); return (p); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 0f9b99e4..e47296ff 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -291,15 +291,11 @@ huge_ralloc huge_ralloc_no_move huge_salloc iaalloc +ialloc iallocztm iarena_cleanup -icalloc -icalloct idalloc -idalloct idalloctm -imalloc -imalloct in_valgrind index2size index2size_compute diff --git a/src/arena.c b/src/arena.c index 1172dc2c..992d96f5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3303,7 +3303,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (usize < oldsize) ? usize : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); } else { ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment, zero, tcache); diff --git a/src/huge.c b/src/huge.c index 0b3aed0d..71fb50c5 100644 --- a/src/huge.c +++ b/src/huge.c @@ -364,7 +364,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); + isqalloc(tsd, ptr, oldsize, tcache, true); return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 1a26a44f..259ab4f7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -70,10 +70,10 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; -/* 0 should be the common case. Set to true to trigger initialization. */ +/* False should be the common case. Set to true to trigger initialization. */ static bool malloc_slow = true; -/* When malloc_slow != 0, set the corresponding bits for sanity check. */ +/* When malloc_slow is true, set the corresponding bits for sanity check. */ enum { flag_opt_junk_alloc = (1U), flag_opt_junk_free = (1U << 1), @@ -1443,7 +1443,7 @@ malloc_init_hard(void) */ static void * -imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, +ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero, prof_tctx_t *tctx, bool slow_path) { void *p; @@ -1452,27 +1452,27 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, return (NULL); if (usize <= SMALL_MAXCLASS) { szind_t ind_large = size2index(LARGE_MINCLASS); - p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path); + p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path); if (p == NULL) return (NULL); arena_prof_promoted(tsd, p, usize); } else - p = imalloc(tsd, usize, ind, slow_path); + p = ialloc(tsd, usize, ind, zero, slow_path); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path) +ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path); + p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path); else - p = imalloc(tsd, usize, ind, slow_path); + p = ialloc(tsd, usize, ind, zero, slow_path); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1482,16 +1482,21 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path) return (p); } +/* + * ialloc_body() is inlined so that fast and slow paths are generated separately + * with statically known slow_path. + */ JEMALLOC_ALWAYS_INLINE_C void * -imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) +ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path) { szind_t ind; - if (slow_path && unlikely(malloc_init())) + if (slow_path && unlikely(malloc_init())) { + *tsd = NULL; return (NULL); + } *tsd = tsd_fetch(); - witness_assert_lockless(*tsd); ind = size2index(size); @@ -1505,26 +1510,30 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) } if (config_prof && opt_prof) - return (imalloc_prof(*tsd, *usize, ind, slow_path)); + return (ialloc_prof(*tsd, *usize, ind, zero, slow_path)); - return (imalloc(*tsd, size, ind, slow_path)); + return (ialloc(*tsd, size, ind, zero, slow_path)); } JEMALLOC_ALWAYS_INLINE_C void -imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path) +ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func, + bool update_errno, bool slow_path) { + if (unlikely(ret == NULL)) { if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in malloc(): " - "out of memory\n"); + malloc_printf(": Error in %s(): out of " + "memory\n", func); abort(); } - set_errno(ENOMEM); + if (update_errno) + set_errno(ENOMEM); } if (config_stats && likely(ret != NULL)) { assert(usize == isalloc(tsd, ret, config_prof)); *tsd_thread_allocatedp_get(tsd) += usize; } + witness_assert_lockless(tsd); } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1540,20 +1549,15 @@ je_malloc(size_t size) size = 1; if (likely(!malloc_slow)) { - /* - * imalloc_body() is inlined so that fast and slow paths are - * generated separately with statically known slow_path. - */ - ret = imalloc_body(size, &tsd, &usize, false); - imalloc_post_check(ret, tsd, usize, false); + ret = ialloc_body(size, false, &tsd, &usize, false); + ialloc_post_check(ret, tsd, usize, "malloc", true, false); } else { - ret = imalloc_body(size, &tsd, &usize, true); - imalloc_post_check(ret, tsd, usize, true); + ret = ialloc_body(size, false, &tsd, &usize, true); + ialloc_post_check(ret, tsd, usize, "malloc", true, true); UTRACE(0, size, ret); JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false); } - witness_assert_lockless(tsd); return (ret); } @@ -1695,45 +1699,6 @@ je_aligned_alloc(size_t alignment, size_t size) return (ret); } -static void * -icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx) -{ - void *p; - - if (tctx == NULL) - return (NULL); - if (usize <= SMALL_MAXCLASS) { - szind_t ind_large = size2index(LARGE_MINCLASS); - p = icalloc(tsd, LARGE_MINCLASS, ind_large); - if (p == NULL) - return (NULL); - arena_prof_promoted(tsd, p, usize); - } else - p = icalloc(tsd, usize, ind); - - return (p); -} - -JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind) -{ - void *p; - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); - if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(tsd, usize, ind, tctx); - else - p = icalloc(tsd, usize, ind); - if (unlikely(p == NULL)) { - prof_alloc_rollback(tsd, tctx, true); - return (NULL); - } - prof_malloc(tsd, p, usize, tctx); - - return (p); -} - JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW * JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2) @@ -1742,68 +1707,33 @@ je_calloc(size_t num, size_t size) void *ret; tsd_t *tsd; size_t num_size; - szind_t ind; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - if (unlikely(malloc_init())) { - tsd = NULL; - num_size = 0; - ret = NULL; - goto label_return; - } - tsd = tsd_fetch(); - witness_assert_lockless(tsd); - num_size = num * size; if (unlikely(num_size == 0)) { if (num == 0 || size == 0) num_size = 1; - else { - ret = NULL; - goto label_return; - } + else + num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */ /* * Try to avoid division here. We know that it isn't possible to * overflow during multiplication if neither operand uses any of the * most significant half of the bits in a size_t. */ } else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) << - 2))) && (num_size / size != num))) { - /* size_t overflow. */ - ret = NULL; - goto label_return; - } + 2))) && (num_size / size != num))) + num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */ - ind = size2index(num_size); - if (unlikely(ind >= NSIZES)) { - ret = NULL; - goto label_return; - } - if (config_prof && opt_prof) { - usize = index2size(ind); - ret = icalloc_prof(tsd, usize, ind); + if (likely(!malloc_slow)) { + ret = ialloc_body(num_size, true, &tsd, &usize, false); + ialloc_post_check(ret, tsd, usize, "calloc", true, false); } else { - if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = index2size(ind); - ret = icalloc(tsd, num_size, ind); + ret = ialloc_body(num_size, true, &tsd, &usize, true); + ialloc_post_check(ret, tsd, usize, "calloc", true, true); + UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false); } -label_return: - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in calloc(): out of " - "memory\n"); - abort(); - } - set_errno(ENOMEM); - } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(tsd, ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, num_size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true); - witness_assert_lockless(tsd); return (ret); } @@ -1880,7 +1810,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) } JEMALLOC_INLINE_C void -isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1895,7 +1825,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(tsd, ptr); - isqalloc(tsd, ptr, usize, tcache); + isqalloc(tsd, ptr, usize, tcache, slow_path); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1946,9 +1876,9 @@ je_realloc(void *ptr, size_t size) } else { /* realloc(NULL, size) is equivalent to malloc(size). */ if (likely(!malloc_slow)) - ret = imalloc_body(size, &tsd, &usize, false); + ret = ialloc_body(size, false, &tsd, &usize, false); else - ret = imalloc_body(size, &tsd, &usize, true); + ret = ialloc_body(size, false, &tsd, &usize, true); } if (unlikely(ret == NULL)) { @@ -1978,6 +1908,7 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); + witness_assert_lockless(tsd); if (likely(!malloc_slow)) ifree(tsd, ptr, tcache_get(tsd, false), false); else @@ -2056,7 +1987,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = */ JEMALLOC_ALWAYS_INLINE_C bool -imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { @@ -2087,29 +2018,9 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, return (false); } -JEMALLOC_ALWAYS_INLINE_C bool -imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) -{ - - if (likely(flags == 0)) { - *usize = s2u(size); - if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) - return (true); - *alignment = 0; - *zero = false; - *tcache = tcache_get(tsd, true); - *arena = NULL; - return (false); - } else { - return (imallocx_flags_decode_hard(tsd, size, flags, usize, - alignment, zero, tcache, arena)); - } -} - JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) + tcache_t *tcache, arena_t *arena, bool slow_path) { szind_t ind; @@ -2117,14 +2028,13 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); ind = size2index(usize); assert(ind < NSIZES); - if (unlikely(zero)) - return (icalloct(tsd, usize, ind, tcache, arena)); - return (imalloct(tsd, usize, ind, tcache, arena)); + return (iallocztm(tsd, usize, ind, zero, tcache, false, arena, + slow_path)); } static void * imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) + tcache_t *tcache, arena_t *arena, bool slow_path) { void *p; @@ -2132,18 +2042,20 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero, assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache, - arena); + arena, slow_path); if (p == NULL) return (NULL); arena_prof_promoted(tsd, p, usize); - } else - p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena); + } else { + p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena, + slow_path); + } return (p); } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) +imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path) { void *p; size_t alignment; @@ -2157,10 +2069,11 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) return (NULL); tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) - p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena, + slow_path); else if ((uintptr_t)tctx > (uintptr_t)1U) { p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache, - arena); + arena, slow_path); } else p = NULL; if (unlikely(p == NULL)) { @@ -2174,7 +2087,8 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) } JEMALLOC_ALWAYS_INLINE_C void * -imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) +imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, + bool slow_path) { void *p; size_t alignment; @@ -2182,24 +2096,50 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) tcache_t *tcache; arena_t *arena; + if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, + &zero, &tcache, &arena))) + return (NULL); + p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena, + slow_path); + assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); + return (p); +} + +JEMALLOC_ALWAYS_INLINE_C void * +imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize, + bool slow_path) +{ + + if (slow_path && unlikely(malloc_init())) { + *tsd = NULL; + return (NULL); + } + + *tsd = tsd_fetch(); + witness_assert_lockless(*tsd); + if (likely(flags == 0)) { szind_t ind = size2index(size); if (unlikely(ind >= NSIZES)) return (NULL); - if (config_stats || (config_valgrind && - unlikely(in_valgrind))) { + if (config_stats || (config_prof && opt_prof) || (slow_path && + config_valgrind && unlikely(in_valgrind))) { *usize = index2size(ind); assert(*usize > 0 && *usize <= HUGE_MAXCLASS); } - return (imalloc(tsd, size, ind, true)); + + if (config_prof && opt_prof) { + return (ialloc_prof(*tsd, *usize, ind, false, + slow_path)); + } + + return (ialloc(*tsd, size, ind, false, slow_path)); } - if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, - &alignment, &zero, &tcache, &arena))) - return (NULL); - p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena); - assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0)); - return (p); + if (config_prof && opt_prof) + return (imallocx_prof(*tsd, size, flags, usize, slow_path)); + + return (imallocx_no_prof(*tsd, size, flags, usize, slow_path)); } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -2213,36 +2153,18 @@ je_mallocx(size_t size, int flags) assert(size != 0); - if (unlikely(malloc_init())) { - tsd = NULL; - goto label_oom; + if (likely(!malloc_slow)) { + p = imallocx_body(size, flags, &tsd, &usize, false); + ialloc_post_check(p, tsd, usize, "mallocx", false, false); + } else { + p = imallocx_body(size, flags, &tsd, &usize, true); + ialloc_post_check(p, tsd, usize, "mallocx", false, true); + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize, + MALLOCX_ZERO_GET(flags)); } - tsd = tsd_fetch(); - witness_assert_lockless(tsd); - if (config_prof && opt_prof) - p = imallocx_prof(tsd, size, flags, &usize); - else - p = imallocx_no_prof(tsd, size, flags, &usize); - if (unlikely(p == NULL)) - goto label_oom; - - if (config_stats) { - assert(usize == isalloc(tsd, p, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, size, p); - JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags)); - witness_assert_lockless(tsd); return (p); -label_oom: - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in mallocx(): out of memory\n"); - abort(); - } - UTRACE(0, size, 0); - witness_assert_lockless(tsd); - return (NULL); } static void * @@ -2567,7 +2489,10 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, tcache, true); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache, false); + else + ifree(tsd, ptr, tcache, true); witness_assert_lockless(tsd); } @@ -2609,7 +2534,10 @@ je_sdallocx(void *ptr, size_t size, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - isfree(tsd, ptr, usize, tcache); + if (likely(!malloc_slow)) + isfree(tsd, ptr, usize, tcache, false); + else + isfree(tsd, ptr, usize, tcache, true); witness_assert_lockless(tsd); }