From 5a858c64d6f049c64c11baf907ab8655e6ed72a3 Mon Sep 17 00:00:00 2001 From: Kevin Svetlitski Date: Fri, 2 Jun 2023 15:15:37 -0700 Subject: [PATCH] Reduce the memory overhead of sampled small allocations Previously, small allocations which were sampled as part of heap profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory usage becomes problematic when the page size is increased, as noted in #2358. Small allocations are now rounded up to the nearest multiple of `PAGE` instead, reducing the memory overhead by a factor of 4 in the most extreme cases. --- include/jemalloc/internal/arena_externs.h | 9 +-- include/jemalloc/internal/arena_inlines_b.h | 11 ++- .../internal/jemalloc_internal_inlines_c.h | 69 +++++++++++++----- include/jemalloc/internal/pages.h | 3 + include/jemalloc/internal/prof_inlines.h | 9 +-- include/jemalloc/internal/prof_types.h | 8 +++ include/jemalloc/internal/safety_check.h | 40 +++++++++-- include/jemalloc/internal/sz.h | 15 ++++ include/jemalloc/internal/tcache_inlines.h | 2 +- src/arena.c | 64 +++++++++-------- src/jemalloc.c | 72 ++++++++++--------- src/pages.c | 2 +- 12 files changed, 206 insertions(+), 98 deletions(-) diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 3821233f..2d82ad8f 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -65,10 +65,11 @@ void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, const unsigned nfill); void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, - szind_t ind, bool zero); + szind_t ind, bool zero, bool slab); void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize); + size_t alignment, bool zero, bool slab, tcache_t *tcache); +void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, + size_t bumped_usize); void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab); @@ -81,7 +82,7 @@ void arena_dalloc_small(tsdn_t *tsdn, void *ptr); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero, size_t *newsize); void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache, + size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache, hook_ralloc_args_t *hook_args); dss_prec_t arena_dss_prec_get(arena_t *arena); ehooks_t *arena_get_ehooks(arena_t *arena); diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index bf25a31c..420a62b2 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -182,23 +182,22 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, - tcache_t *tcache, bool slow_path) { + bool slab, tcache_t *tcache, bool slow_path) { assert(!tsdn_null(tsdn) || tcache == NULL); if (likely(tcache != NULL)) { - if (likely(size <= SC_SMALL_MAXCLASS)) { + if (likely(slab)) { + assert(sz_can_use_slab(size)); return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache, size, ind, zero, slow_path); - } - if (likely(size <= tcache_maxclass)) { + } else if (likely(size <= tcache_maxclass)) { return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache, size, ind, zero, slow_path); } /* (size > tcache_maxclass) case falls through. */ - assert(size > tcache_maxclass); } - return arena_malloc_hard(tsdn, arena, size, ind, zero); + return arena_malloc_hard(tsdn, arena, size, ind, zero, slab); } JEMALLOC_ALWAYS_INLINE arena_t * diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 206f1400..ae9cb0c2 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -52,10 +52,12 @@ isalloc(tsdn_t *tsdn, const void *ptr) { } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, - bool is_internal, arena_t *arena, bool slow_path) { +iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, + bool slab, tcache_t *tcache, bool is_internal, arena_t *arena, + bool slow_path) { void *ret; + assert(!slab || sz_can_use_slab(size)); /* slab && large is illegal */ assert(!is_internal || tcache == NULL); assert(!is_internal || arena == NULL || arena_is_auto(arena)); if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { @@ -63,13 +65,21 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, WITNESS_RANK_CORE, 0); } - ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); } return ret; } +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_internal, arena_t *arena, bool slow_path) { + bool slab = sz_can_use_slab(size); + return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache, + is_internal, arena, slow_path); +} + JEMALLOC_ALWAYS_INLINE void * ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false, @@ -77,10 +87,11 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { } JEMALLOC_ALWAYS_INLINE void * -ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_internal, arena_t *arena) { +ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) { void *ret; + assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */ assert(usize != 0); assert(usize == sz_sa2u(usize, alignment)); assert(!is_internal || tcache == NULL); @@ -88,7 +99,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); + ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); @@ -96,12 +107,26 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, return ret; } +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_internal, arena_t *arena) { + return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, + sz_can_use_slab(usize), tcache, is_internal, arena); +} + JEMALLOC_ALWAYS_INLINE void * ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena); } +JEMALLOC_ALWAYS_INLINE void * +ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, + bool zero, bool slab, tcache_t *tcache, arena_t *arena) { + return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab, + tcache, false, arena); +} + JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, @@ -146,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, JEMALLOC_ALWAYS_INLINE void * iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, + size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -157,7 +182,8 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } - p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab, + tcache, arena); if (p == NULL) { return NULL; } @@ -184,8 +210,9 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, * passed-around anywhere. */ JEMALLOC_ALWAYS_INLINE void * -iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) +iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena, + hook_ralloc_args_t *hook_args) { assert(ptr != NULL); assert(size != 0); @@ -199,18 +226,28 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, * and copy. */ return iralloct_realign(tsdn, ptr, oldsize, size, alignment, - zero, tcache, arena, hook_args); + zero, slab, tcache, arena, hook_args); } return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, - tcache, hook_args); + slab, tcache, hook_args); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, + size_t usize, bool zero, tcache_t *tcache, arena_t *arena, + hook_ralloc_args_t *hook_args) +{ + bool slab = sz_can_use_slab(usize); + return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero, + slab, tcache, arena, hook_args); } JEMALLOC_ALWAYS_INLINE void * iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, hook_ralloc_args_t *hook_args) { - return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, - tcache_get(tsd), NULL, hook_args); + size_t usize, bool zero, hook_ralloc_args_t *hook_args) { + return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, usize, + zero, tcache_get(tsd), NULL, hook_args); } JEMALLOC_ALWAYS_INLINE bool @@ -405,7 +442,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { JEMALLOC_ALWAYS_INLINE bool prof_sample_aligned(const void *ptr) { - return ((uintptr_t)ptr & PAGE_MASK) == 0; + return ((uintptr_t)ptr & PROF_SAMPLE_ALIGNMENT_MASK) == 0; } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h index ad1f606a..361de587 100644 --- a/include/jemalloc/internal/pages.h +++ b/include/jemalloc/internal/pages.h @@ -1,6 +1,9 @@ #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H +/* Actual operating system page size, detected during bootstrap, <= PAGE. */ +extern size_t os_page; + /* Page size. LG_PAGE is determined by the configure script. */ #ifdef PAGE_MASK # undef PAGE_MASK diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index b74b115c..b5273010 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -239,14 +239,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize, } JEMALLOC_ALWAYS_INLINE size_t -prof_sample_align(size_t orig_align) { +prof_sample_align(size_t usize, size_t orig_align) { /* - * Enforce page alignment, so that sampled allocations can be identified + * Enforce alignment, so that sampled allocations can be identified * w/o metadata lookup. */ assert(opt_prof); - return (opt_cache_oblivious && orig_align < PAGE) ? PAGE : - orig_align; + return (orig_align < PROF_SAMPLE_ALIGNMENT && + (sz_can_use_slab(usize) || opt_cache_oblivious)) ? + PROF_SAMPLE_ALIGNMENT : orig_align; } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index 104f7e61..046ea204 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -80,4 +80,12 @@ typedef struct prof_recent_s prof_recent_t; /* Thread name storage size limit. */ #define PROF_THREAD_NAME_MAX_LEN 16 +/* + * Minimum required alignment for sampled allocations. Over-aligning sampled + * allocations allows us to quickly identify them on the dalloc path without + * resorting to metadata lookup. + */ +#define PROF_SAMPLE_ALIGNMENT PAGE +#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK + #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h index 900cfa55..7854c1bf 100644 --- a/include/jemalloc/internal/safety_check.h +++ b/include/jemalloc/internal/safety_check.h @@ -3,6 +3,8 @@ #define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32 +#include "jemalloc/internal/pages.h" + void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr, size_t true_size, size_t input_size); void safety_check_fail(const char *format, ...); @@ -12,22 +14,50 @@ typedef void (*safety_check_abort_hook_t)(const char *message); /* Can set to NULL for a default. */ void safety_check_set_abort(safety_check_abort_hook_t abort_fn); +#define REDZONE_SIZE ((size_t) 32) +#define REDZONE_FILL_VALUE 0xBC + +/* + * Normally the redzone extends `REDZONE_SIZE` bytes beyond the end of + * the allocation. However, we don't let the redzone extend onto another + * OS page because this would impose additional overhead if that page was + * not already resident in memory. + */ +JEMALLOC_ALWAYS_INLINE const unsigned char * +compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) { + const unsigned char *ptr = (const unsigned char *) _ptr; + const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ? + &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize]; + const unsigned char *page_end = (const unsigned char *) + ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page); + return redzone_end < page_end ? redzone_end : page_end; +} + JEMALLOC_ALWAYS_INLINE void safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) { - assert(usize < bumped_usize); - for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { - *((unsigned char *)ptr + i) = 0xBC; + assert(usize <= bumped_usize); + const unsigned char *redzone_end = + compute_redzone_end(ptr, usize, bumped_usize); + for (unsigned char *curr = &((unsigned char *)ptr)[usize]; + curr < redzone_end; curr++) { + *curr = REDZONE_FILL_VALUE; } } JEMALLOC_ALWAYS_INLINE void safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize) { - for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) { - if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) { + const unsigned char *redzone_end = + compute_redzone_end(ptr, usize, bumped_usize); + for (const unsigned char *curr= &((const unsigned char *)ptr)[usize]; + curr < redzone_end; curr++) { + if (unlikely(*curr != REDZONE_FILL_VALUE)) { safety_check_fail("Use after free error\n"); } } } +#undef REDZONE_SIZE +#undef REDZONE_FILL_VALUE + #endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */ diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 3c0fc1da..a799cea9 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -365,6 +365,21 @@ sz_sa2u(size_t size, size_t alignment) { return usize; } +/* + * Under normal circumstances, whether or not to use a slab + * to satisfy an allocation depends solely on the allocation's + * effective size. However, this is *not* the case when an allocation + * is sampled for profiling, in which case you *must not* use a slab + * regardless of the effective size. Thus `sz_can_use_slab` is called + * on the common path, but there exist `*_explicit_slab` variants of + * several functions for handling the aforementioned case of + * sampled allocations. + */ +JEMALLOC_ALWAYS_INLINE bool +sz_can_use_slab(size_t size) { + return size <= SC_SMALL_MAXCLASS; +} + size_t sz_psz_quantize_floor(size_t size); size_t sz_psz_quantize_ceil(size_t size); diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 2634f145..2b8db0a3 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -60,7 +60,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, if (unlikely(tcache_small_bin_disabled(binind, bin))) { /* stats and zero are handled directly by the arena. */ return arena_malloc_hard(tsd_tsdn(tsd), arena, size, - binind, zero); + binind, zero, /* slab */ true); } tcache_bin_flush_stashed(tsd, tcache, bin, binind, /* is_small */ true); diff --git a/src/arena.c b/src/arena.c index 3b151b77..b154b7a5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1191,7 +1191,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void * arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, - bool zero) { + bool zero, bool slab) { assert(!tsdn_null(tsdn) || arena != NULL); if (likely(!tsdn_null(tsdn))) { @@ -1201,18 +1201,19 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, return NULL; } - if (likely(size <= SC_SMALL_MAXCLASS)) { + if (likely(slab)) { + assert(sz_can_use_slab(size)); return arena_malloc_small(tsdn, arena, ind, zero); + } else { + return large_malloc(tsdn, arena, sz_index2size(ind), zero); } - return large_malloc(tsdn, arena, sz_index2size(ind), zero); } void * arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, - bool zero, tcache_t *tcache) { - void *ret; - - if (usize <= SC_SMALL_MAXCLASS) { + bool zero, bool slab, tcache_t *tcache) { + if (slab) { + assert(sz_can_use_slab(usize)); /* Small; alignment doesn't require special slab placement. */ /* usize should be a result of sz_sa2u() */ @@ -1223,27 +1224,26 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, */ assert(alignment <= PAGE); - ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, tcache, true); + return arena_malloc(tsdn, arena, usize, sz_size2index(usize), + zero, slab, tcache, true); } else { if (likely(alignment <= CACHELINE)) { - ret = large_malloc(tsdn, arena, usize, zero); + return large_malloc(tsdn, arena, usize, zero); } else { - ret = large_palloc(tsdn, arena, usize, alignment, zero); + return large_palloc(tsdn, arena, usize, alignment, zero); } } - return ret; } void -arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { +arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) { cassert(config_prof); assert(ptr != NULL); - assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); - assert(usize <= SC_SMALL_MAXCLASS); + assert(isalloc(tsdn, ptr) == bumped_usize); + assert(sz_can_use_slab(usize)); if (config_opt_safety_checks) { - safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS); + safety_check_set_redzone(ptr, usize, bumped_usize); } edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); @@ -1259,13 +1259,19 @@ static size_t arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { cassert(config_prof); assert(ptr != NULL); + size_t usize = isalloc(tsdn, ptr); + size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT); + assert(bumped_usize <= SC_LARGE_MINCLASS && + PAGE_CEILING(bumped_usize) == bumped_usize); + assert(edata_size_get(edata) - bumped_usize <= sz_large_pad); + szind_t szind = sz_size2index(bumped_usize); - edata_szind_set(edata, SC_NBINS); - emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false); + edata_szind_set(edata, szind); + emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false); - assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); + assert(isalloc(tsdn, ptr) == bumped_usize); - return SC_LARGE_MINCLASS; + return bumped_usize; } void @@ -1282,10 +1288,10 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, * Currently, we only do redzoning for small sampled * allocations. */ - assert(bumped_usize == SC_LARGE_MINCLASS); safety_check_verify_redzone(ptr, usize, bumped_usize); } - if (bumped_usize <= tcache_maxclass && tcache != NULL) { + if (bumped_usize >= SC_LARGE_MINCLASS && + bumped_usize <= tcache_maxclass && tcache != NULL) { tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, sz_size2index(bumped_usize), slow_path); } else { @@ -1443,28 +1449,30 @@ done: static void * arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache) { + size_t alignment, bool zero, bool slab, tcache_t *tcache) { if (alignment == 0) { return arena_malloc(tsdn, arena, usize, sz_size2index(usize), - zero, tcache, true); + zero, slab, tcache, true); } usize = sz_sa2u(usize, alignment); if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } - return ipalloct(tsdn, usize, alignment, zero, tcache, arena); + return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab, + tcache, arena); } void * arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache, + size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache, hook_ralloc_args_t *hook_args) { size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { return NULL; } - if (likely(usize <= SC_SMALL_MAXCLASS)) { + if (likely(slab)) { + assert(sz_can_use_slab(usize)); /* Try to avoid moving the allocation. */ UNUSED size_t newsize; if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero, @@ -1488,7 +1496,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, * object. In that case, fall back to allocating new space and copying. */ void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment, - zero, tcache); + zero, slab, tcache); if (ret == NULL) { return NULL; } diff --git a/src/jemalloc.c b/src/jemalloc.c index 88559be0..4e4e4bee 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2360,7 +2360,7 @@ arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) { /* ind is ignored if dopts->alignment > 0. */ JEMALLOC_ALWAYS_INLINE void * imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd, - size_t size, size_t usize, szind_t ind) { + size_t size, size_t usize, szind_t ind, bool slab) { /* Fill in the tcache. */ tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind, sopts->slow, /* is_alloc */ true); @@ -2372,12 +2372,12 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd, } if (unlikely(dopts->alignment != 0)) { - return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment, - dopts->zero, tcache, arena); + return ipalloct_explicit_slab(tsd_tsdn(tsd), usize, + dopts->alignment, dopts->zero, slab, tcache, arena); } - return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false, - arena, sopts->slow); + return iallocztm_explicit_slab(tsd_tsdn(tsd), size, ind, dopts->zero, + slab, tcache, false, arena, sopts->slow); } JEMALLOC_ALWAYS_INLINE void * @@ -2385,28 +2385,26 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd, size_t usize, szind_t ind) { void *ret; + dopts->alignment = prof_sample_align(usize, dopts->alignment); /* - * For small allocations, sampling bumps the usize. If so, we allocate - * from the ind_large bucket. + * If the allocation is small enough that it would normally be allocated + * on a slab, we need to take additional steps to ensure that it gets + * its own extent instead. */ - szind_t ind_large; - - dopts->alignment = prof_sample_align(dopts->alignment); - if (usize <= SC_SMALL_MAXCLASS) { - assert(((dopts->alignment == 0) ? - sz_s2u(SC_LARGE_MINCLASS) : - sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment)) - == SC_LARGE_MINCLASS); - ind_large = sz_size2index(SC_LARGE_MINCLASS); - size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS); + if (sz_can_use_slab(usize)) { + assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0); + size_t bumped_usize = sz_sa2u(usize, dopts->alignment); + szind_t bumped_ind = sz_size2index(bumped_usize); + dopts->tcache_ind = TCACHE_IND_NONE; ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize, - bumped_usize, ind_large); + bumped_usize, bumped_ind, /* slab */ false); if (unlikely(ret == NULL)) { return NULL; } - arena_prof_promote(tsd_tsdn(tsd), ret, usize); + arena_prof_promote(tsd_tsdn(tsd), ret, usize, bumped_usize); } else { - ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind); + ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind, + /* slab */ false); } assert(prof_sample_aligned(ret)); @@ -2532,9 +2530,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { emap_alloc_ctx_t alloc_ctx; if (likely((uintptr_t)tctx == (uintptr_t)1U)) { - alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS); + alloc_ctx.slab = sz_can_use_slab(usize); allocation = imalloc_no_sample( - sopts, dopts, tsd, usize, usize, ind); + sopts, dopts, tsd, usize, usize, ind, + alloc_ctx.slab); } else if ((uintptr_t)tctx > (uintptr_t)1U) { allocation = imalloc_sample( sopts, dopts, tsd, usize, ind); @@ -2551,7 +2550,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { } else { assert(!opt_prof); allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize, - ind); + ind, sz_can_use_slab(usize)); if (unlikely(allocation == NULL)) { goto label_oom; } @@ -3314,18 +3313,25 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize, return NULL; } - alignment = prof_sample_align(alignment); - if (usize <= SC_SMALL_MAXCLASS) { - p = iralloct(tsdn, old_ptr, old_usize, - SC_LARGE_MINCLASS, alignment, zero, tcache, - arena, hook_args); + alignment = prof_sample_align(usize, alignment); + /* + * If the allocation is small enough that it would normally be allocated + * on a slab, we need to take additional steps to ensure that it gets + * its own extent instead. + */ + if (sz_can_use_slab(usize)) { + size_t bumped_usize = sz_sa2u(usize, alignment); + p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, + bumped_usize, alignment, zero, /* slab */ false, + tcache, arena, hook_args); if (p == NULL) { return NULL; } - arena_prof_promote(tsdn, p, usize); + arena_prof_promote(tsdn, p, usize, bumped_usize); } else { - p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero, - tcache, arena, hook_args); + p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, usize, + alignment, zero, /* slab */ false, tcache, arena, + hook_args); } assert(prof_sample_aligned(p)); @@ -3348,7 +3354,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size, usize, alignment, zero, tcache, arena, tctx, hook_args); } else { p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment, - zero, tcache, arena, hook_args); + usize, zero, tcache, arena, hook_args); } if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx); @@ -3407,7 +3413,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) { } } else { p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment, - zero, tcache, arena, &hook_args); + usize, zero, tcache, arena, &hook_args); if (unlikely(p == NULL)) { goto label_oom; } diff --git a/src/pages.c b/src/pages.c index 2d5b8164..41bbef57 100644 --- a/src/pages.c +++ b/src/pages.c @@ -33,7 +33,7 @@ /* Data. */ /* Actual operating system page size, detected during bootstrap, <= PAGE. */ -static size_t os_page; +size_t os_page; #ifndef _WIN32 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)