#ifndef JEMALLOC_INTERNAL_INLINES_C_H #define JEMALLOC_INTERNAL_INLINES_C_H #include "jemalloc/internal/hook.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/log.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/thread_event.h" #include "jemalloc/internal/witness.h" #include "jemalloc/internal/arena_externs.h" #include "jemalloc/internal/emap.h" /* * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we * should have one constant here per magic value there. Note however that the * representations need not be related. */ #define TCACHE_IND_NONE ((unsigned)-1) #define TCACHE_IND_AUTOMATIC ((unsigned)-2) #define ARENA_IND_AUTOMATIC ((unsigned)-1) /* * Translating the names of the 'i' functions: * Abbreviations used in the first part of the function name (before * alloc/dalloc) describe what that function accomplishes: * a: arena (query) * s: size (query, or sized deallocation) * e: extent (query) * p: aligned (allocates) * vs: size (query, without knowing that the pointer is into the heap) * r: rallocx implementation * x: xallocx implementation * Abbreviations used in the second part of the function name (after * alloc/dalloc) describe the arguments it takes * z: whether to return zeroed memory * t: accepts a tcache_t * parameter * m: accepts an arena_t * parameter */ JEMALLOC_ALWAYS_INLINE arena_t * iaalloc(tsdn_t *tsdn, const void *ptr) { assert(ptr != NULL); return arena_aalloc(tsdn, ptr); } JEMALLOC_ALWAYS_INLINE size_t isalloc(tsdn_t *tsdn, const void *ptr) { assert(ptr != NULL); return arena_salloc(tsdn, ptr); } JEMALLOC_ALWAYS_INLINE void * iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path) { void *ret; assert(!is_internal || tcache == NULL); assert(!is_internal || arena == NULL || arena_is_auto(arena)); if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); } ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); } return ret; } JEMALLOC_ALWAYS_INLINE void * ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false, NULL, slow_path); } JEMALLOC_ALWAYS_INLINE void * ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_internal, arena_t *arena) { void *ret; assert(usize != 0); assert(usize == sz_sa2u(usize, alignment)); assert(!is_internal || tcache == NULL); assert(!is_internal || arena == NULL || arena_is_auto(arena)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); if (config_stats && is_internal && likely(ret != NULL)) { arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); } return ret; } JEMALLOC_ALWAYS_INLINE void * ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena); } JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, tcache_get(tsd), false, NULL); } JEMALLOC_ALWAYS_INLINE size_t ivsalloc(tsdn_t *tsdn, const void *ptr) { return arena_vsalloc(tsdn, ptr); } JEMALLOC_ALWAYS_INLINE void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) { assert(ptr != NULL); assert(!is_internal || tcache == NULL); assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr))); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); if (config_stats && is_internal) { arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); } if (!is_internal && !tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { assert(tcache == NULL); } arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true); } JEMALLOC_ALWAYS_INLINE void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, emap_alloc_ctx_t *alloc_ctx, bool slow_path) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); } JEMALLOC_ALWAYS_INLINE void * iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); void *p; size_t usize, copysize; usize = sz_sa2u(size, alignment); if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); if (p == NULL) { return NULL; } /* * Copy at most size bytes (not size+extra), since the caller has no * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); hook_invoke_alloc(hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p, hook_args->args); hook_invoke_dalloc(hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); return p; } /* * is_realloc threads through the knowledge of whether or not this call comes * from je_realloc (as opposed to je_rallocx); this ensures that we pass the * correct entry point into any hooks. * Note that these functions are all force-inlined, so no actual bool gets * passed-around anywhere. */ JEMALLOC_ALWAYS_INLINE void * iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args) { assert(ptr != NULL); assert(size != 0); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* * Existing object alignment is inadequate; allocate new space * and copy. */ return iralloct_realign(tsdn, ptr, oldsize, size, alignment, zero, tcache, arena, hook_args); } return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, tcache, hook_args); } JEMALLOC_ALWAYS_INLINE void * iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, hook_ralloc_args_t *hook_args) { return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, tcache_get(tsd), NULL, hook_args); } JEMALLOC_ALWAYS_INLINE bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, size_t *newsize) { assert(ptr != NULL); assert(size != 0); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* Existing object alignment is inadequate. */ *newsize = oldsize; return true; } return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero, newsize); } JEMALLOC_ALWAYS_INLINE void fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after, cache_bin_t *bin, void *ret) { thread_allocated_set(tsd, allocated_after); if (config_stats) { bin->tstats.nrequests++; } LOG("core.malloc.exit", "result: %p", ret); } JEMALLOC_ALWAYS_INLINE bool malloc_initialized(void) { return (malloc_init_state == malloc_init_initialized); } /* * malloc() fastpath. Included here so that we can inline it into operator new; * function call overhead there is non-negligible as a fraction of total CPU in * allocation-heavy C++ programs. We take the fallback alloc to allow malloc * (which can return NULL) to differ in its behavior from operator new (which * can't). It matches the signature of malloc / operator new so that we can * tail-call the fallback allocator, allowing us to avoid setting up the call * frame in the common case. * * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit * tcache. If either of these is false, we tail-call to the slowpath, * malloc_default(). Tail-calling is used to avoid any caller-saved * registers. * * fastpath supports ticker and profiling, both of which will also * tail-call to the slowpath if they fire. */ JEMALLOC_ALWAYS_INLINE void * imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) { LOG("core.malloc.entry", "size: %zu", size); if (tsd_get_allocates() && unlikely(!malloc_initialized())) { return fallback_alloc(size); } tsd_t *tsd = tsd_get(false); if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) { return fallback_alloc(size); } /* * The code below till the branch checking the next_event threshold may * execute before malloc_init(), in which case the threshold is 0 to * trigger slow path and initialization. * * Note that when uninitialized, only the fast-path variants of the sz / * tsd facilities may be called. */ szind_t ind; /* * The thread_allocated counter in tsd serves as a general purpose * accumulator for bytes of allocation to trigger different types of * events. usize is always needed to advance thread_allocated, though * it's not always needed in the core allocation logic. */ size_t usize; sz_size2index_usize_fastpath(size, &ind, &usize); /* Fast path relies on size being a bin. */ assert(ind < SC_NBINS); assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) && (size <= SC_SMALL_MAXCLASS)); uint64_t allocated, threshold; te_malloc_fastpath_ctx(tsd, &allocated, &threshold); uint64_t allocated_after = allocated + usize; /* * The ind and usize might be uninitialized (or partially) before * malloc_init(). The assertions check for: 1) full correctness (usize * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0) * when !initialized. */ if (!malloc_initialized()) { assert(threshold == 0); } else { assert(ind == sz_size2index(size)); assert(usize > 0 && usize == sz_index2size(ind)); } /* * Check for events and tsd non-nominal (fast_threshold will be set to * 0) in a single branch. */ if (unlikely(allocated_after >= threshold)) { return fallback_alloc(size); } assert(tsd_fast(tsd)); tcache_t *tcache = tsd_tcachep_get(tsd); assert(tcache == tcache_get(tsd)); cache_bin_t *bin = &tcache->bins[ind]; /* Suppress spurious warning from static analysis */ assert(bin != NULL); bool tcache_success; void *ret; /* * We split up the code this way so that redundant low-water * computation doesn't happen on the (more common) case in which we * don't touch the low water mark. The compiler won't do this * duplication on its own. */ ret = cache_bin_alloc_easy(bin, &tcache_success); if (tcache_success) { fastpath_success_finish(tsd, allocated_after, bin, ret); return ret; } ret = cache_bin_alloc(bin, &tcache_success); if (tcache_success) { fastpath_success_finish(tsd, allocated_after, bin, ret); return ret; } return fallback_alloc(size); } JEMALLOC_ALWAYS_INLINE tcache_t * tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) { tcache_t *tcache; if (tcache_ind == TCACHE_IND_AUTOMATIC) { if (likely(!slow)) { /* Getting tcache ptr unconditionally. */ tcache = tsd_tcachep_get(tsd); assert(tcache == tcache_get(tsd)); } else if (is_alloc || likely(tsd_reentrancy_level_get(tsd) == 0)) { tcache = tcache_get(tsd); } else { tcache = NULL; } } else { /* * Should not specify tcache on deallocation path when being * reentrant. */ assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 || tsd_state_nocleanup(tsd)); if (tcache_ind == TCACHE_IND_NONE) { tcache = NULL; } else { tcache = tcaches_get(tsd, tcache_ind); } } return tcache; } JEMALLOC_ALWAYS_INLINE bool maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) { if (config_opt_size_checks) { emap_alloc_ctx_t dbg_ctx; emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &dbg_ctx); if (alloc_ctx->szind != dbg_ctx.szind) { safety_check_fail_sized_dealloc( /* current_dealloc */ true, ptr, /* true_size */ sz_size2index(dbg_ctx.szind), /* input_size */ sz_size2index(alloc_ctx->szind)); return true; } if (alloc_ctx->slab != dbg_ctx.slab) { safety_check_fail( "Internal heap corruption detected: " "mismatch in slab bit"); return true; } } return false; } JEMALLOC_ALWAYS_INLINE bool prof_sample_aligned(const void *ptr) { return ((uintptr_t)ptr & PAGE_MASK) == 0; } JEMALLOC_ALWAYS_INLINE bool free_fastpath_nonfast_aligned(void *ptr, bool check_prof) { /* * free_fastpath do not handle two uncommon cases: 1) sampled profiled * objects and 2) sampled junk & stash for use-after-free detection. * Both have special alignments which are used to escape the fastpath. * * prof_sample is page-aligned, which covers the UAF check when both * are enabled (the assertion below). Avoiding redundant checks since * this is on the fastpath -- at most one runtime branch from this. */ if (config_debug && cache_bin_nonfast_aligned(ptr)) { assert(prof_sample_aligned(ptr)); } if (config_prof && check_prof) { /* When prof is enabled, the prof_sample alignment is enough. */ if (prof_sample_aligned(ptr)) { return true; } else { return false; } } if (config_uaf_detection) { if (cache_bin_nonfast_aligned(ptr)) { return true; } else { return false; } } return false; } /* Returns whether or not the free attempt was successful. */ JEMALLOC_ALWAYS_INLINE bool free_fastpath(void *ptr, size_t size, bool size_hint) { tsd_t *tsd = tsd_get(false); /* The branch gets optimized away unless tsd_get_allocates(). */ if (unlikely(tsd == NULL)) { return false; } /* * The tsd_fast() / initialized checks are folded into the branch * testing (deallocated_after >= threshold) later in this function. * The threshold will be set to 0 when !tsd_fast. */ assert(tsd_fast(tsd) || *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0); emap_alloc_ctx_t alloc_ctx; if (!size_hint) { bool err = emap_alloc_ctx_try_lookup_fast(tsd, &arena_emap_global, ptr, &alloc_ctx); /* Note: profiled objects will have alloc_ctx.slab set */ if (unlikely(err || !alloc_ctx.slab || free_fastpath_nonfast_aligned(ptr, /* check_prof */ false))) { return false; } assert(alloc_ctx.szind != SC_NSIZES); } else { /* * Check for both sizes that are too large, and for sampled / * special aligned objects. The alignment check will also check * for null ptr. */ if (unlikely(size > SC_LOOKUP_MAXCLASS || free_fastpath_nonfast_aligned(ptr, /* check_prof */ true))) { return false; } alloc_ctx.szind = sz_size2index_lookup(size); /* Max lookup class must be small. */ assert(alloc_ctx.szind < SC_NBINS); /* This is a dead store, except when opt size checking is on. */ alloc_ctx.slab = true; } /* * Currently the fastpath only handles small sizes. The branch on * SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking * tcache szind upper limit (i.e. tcache_maxclass) as well. */ assert(alloc_ctx.slab); uint64_t deallocated, threshold; te_free_fastpath_ctx(tsd, &deallocated, &threshold); size_t usize = sz_index2size(alloc_ctx.szind); uint64_t deallocated_after = deallocated + usize; /* * Check for events and tsd non-nominal (fast_threshold will be set to * 0) in a single branch. Note that this handles the uninitialized case * as well (TSD init will be triggered on the non-fastpath). Therefore * anything depends on a functional TSD (e.g. the alloc_ctx sanity check * below) needs to be after this branch. */ if (unlikely(deallocated_after >= threshold)) { return false; } assert(tsd_fast(tsd)); bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx); if (fail) { /* See the comment in isfree. */ return true; } tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC, /* slow */ false, /* is_alloc */ false); cache_bin_t *bin = &tcache->bins[alloc_ctx.szind]; /* * If junking were enabled, this is where we would do it. It's not * though, since we ensured above that we're on the fast path. Assert * that to double-check. */ assert(!opt_junk_free); if (!cache_bin_dalloc_easy(bin, ptr)) { return false; } *tsd_thread_deallocatedp_get(tsd) = deallocated_after; return true; } JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW je_sdallocx_noflags(void *ptr, size_t size) { LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr, size); if (!free_fastpath(ptr, size, true)) { sdallocx_default(ptr, size, 0); } LOG("core.sdallocx.exit", ""); } JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW je_sdallocx_impl(void *ptr, size_t size, int flags) { if (flags != 0 || !free_fastpath(ptr, size, true)) { sdallocx_default(ptr, size, flags); } } JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW je_free_impl(void *ptr) { if (!free_fastpath(ptr, 0, false)) { free_default(ptr); } } #endif /* JEMALLOC_INTERNAL_INLINES_C_H */