Inline free and sdallocx into operator delete

This commit is contained in:
Guangli Dai 2022-09-19 17:05:55 -07:00 committed by Qi Wang
parent 06374d2a6a
commit e8f9f13811
6 changed files with 241 additions and 228 deletions

View File

@ -70,7 +70,8 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
void je_sdallocx_noflags(void *ptr, size_t size);
void sdallocx_default(void *ptr, size_t size, int flags);
void free_default(void *ptr);
void *malloc_default(size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */

View File

@ -7,6 +7,17 @@
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event.h"
#include "jemalloc/internal/witness.h"
#include "jemalloc/internal/arena_externs.h"
#include "jemalloc/internal/emap.h"
/*
* These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we
* should have one constant here per magic value there. Note however that the
* representations need not be related.
*/
#define TCACHE_IND_NONE ((unsigned)-1)
#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
#define ARENA_IND_AUTOMATIC ((unsigned)-1)
/*
* Translating the names of the 'i' functions:
@ -337,4 +348,217 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
return fallback_alloc(size);
}
JEMALLOC_ALWAYS_INLINE tcache_t *
tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
tcache_t *tcache;
if (tcache_ind == TCACHE_IND_AUTOMATIC) {
if (likely(!slow)) {
/* Getting tcache ptr unconditionally. */
tcache = tsd_tcachep_get(tsd);
assert(tcache == tcache_get(tsd));
} else if (is_alloc ||
likely(tsd_reentrancy_level_get(tsd) == 0)) {
tcache = tcache_get(tsd);
} else {
tcache = NULL;
}
} else {
/*
* Should not specify tcache on deallocation path when being
* reentrant.
*/
assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
tsd_state_nocleanup(tsd));
if (tcache_ind == TCACHE_IND_NONE) {
tcache = NULL;
} else {
tcache = tcaches_get(tsd, tcache_ind);
}
}
return tcache;
}
JEMALLOC_ALWAYS_INLINE bool
maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
if (config_opt_size_checks) {
emap_alloc_ctx_t dbg_ctx;
emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
&dbg_ctx);
if (alloc_ctx->szind != dbg_ctx.szind) {
safety_check_fail_sized_dealloc(
/* current_dealloc */ true, ptr,
/* true_size */ sz_size2index(dbg_ctx.szind),
/* input_size */ sz_size2index(alloc_ctx->szind));
return true;
}
if (alloc_ctx->slab != dbg_ctx.slab) {
safety_check_fail(
"Internal heap corruption detected: "
"mismatch in slab bit");
return true;
}
}
return false;
}
JEMALLOC_ALWAYS_INLINE bool
prof_sample_aligned(const void *ptr) {
return ((uintptr_t)ptr & PAGE_MASK) == 0;
}
JEMALLOC_ALWAYS_INLINE bool
free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
/*
* free_fastpath do not handle two uncommon cases: 1) sampled profiled
* objects and 2) sampled junk & stash for use-after-free detection.
* Both have special alignments which are used to escape the fastpath.
*
* prof_sample is page-aligned, which covers the UAF check when both
* are enabled (the assertion below). Avoiding redundant checks since
* this is on the fastpath -- at most one runtime branch from this.
*/
if (config_debug && cache_bin_nonfast_aligned(ptr)) {
assert(prof_sample_aligned(ptr));
}
if (config_prof && check_prof) {
/* When prof is enabled, the prof_sample alignment is enough. */
if (prof_sample_aligned(ptr)) {
return true;
} else {
return false;
}
}
if (config_uaf_detection) {
if (cache_bin_nonfast_aligned(ptr)) {
return true;
} else {
return false;
}
}
return false;
}
/* Returns whether or not the free attempt was successful. */
JEMALLOC_ALWAYS_INLINE
bool free_fastpath(void *ptr, size_t size, bool size_hint) {
tsd_t *tsd = tsd_get(false);
/* The branch gets optimized away unless tsd_get_allocates(). */
if (unlikely(tsd == NULL)) {
return false;
}
/*
* The tsd_fast() / initialized checks are folded into the branch
* testing (deallocated_after >= threshold) later in this function.
* The threshold will be set to 0 when !tsd_fast.
*/
assert(tsd_fast(tsd) ||
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
emap_alloc_ctx_t alloc_ctx;
if (!size_hint) {
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
&arena_emap_global, ptr, &alloc_ctx);
/* Note: profiled objects will have alloc_ctx.slab set */
if (unlikely(err || !alloc_ctx.slab ||
free_fastpath_nonfast_aligned(ptr,
/* check_prof */ false))) {
return false;
}
assert(alloc_ctx.szind != SC_NSIZES);
} else {
/*
* Check for both sizes that are too large, and for sampled /
* special aligned objects. The alignment check will also check
* for null ptr.
*/
if (unlikely(size > SC_LOOKUP_MAXCLASS ||
free_fastpath_nonfast_aligned(ptr,
/* check_prof */ true))) {
return false;
}
alloc_ctx.szind = sz_size2index_lookup(size);
/* Max lookup class must be small. */
assert(alloc_ctx.szind < SC_NBINS);
/* This is a dead store, except when opt size checking is on. */
alloc_ctx.slab = true;
}
/*
* Currently the fastpath only handles small sizes. The branch on
* SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
* tcache szind upper limit (i.e. tcache_maxclass) as well.
*/
assert(alloc_ctx.slab);
uint64_t deallocated, threshold;
te_free_fastpath_ctx(tsd, &deallocated, &threshold);
size_t usize = sz_index2size(alloc_ctx.szind);
uint64_t deallocated_after = deallocated + usize;
/*
* Check for events and tsd non-nominal (fast_threshold will be set to
* 0) in a single branch. Note that this handles the uninitialized case
* as well (TSD init will be triggered on the non-fastpath). Therefore
* anything depends on a functional TSD (e.g. the alloc_ctx sanity check
* below) needs to be after this branch.
*/
if (unlikely(deallocated_after >= threshold)) {
return false;
}
assert(tsd_fast(tsd));
bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
if (fail) {
/* See the comment in isfree. */
return true;
}
tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
/* slow */ false, /* is_alloc */ false);
cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
/*
* If junking were enabled, this is where we would do it. It's not
* though, since we ensured above that we're on the fast path. Assert
* that to double-check.
*/
assert(!opt_junk_free);
if (!cache_bin_dalloc_easy(bin, ptr)) {
return false;
}
*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
return true;
}
JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
je_sdallocx_noflags(void *ptr, size_t size) {
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
size);
if (!free_fastpath(ptr, size, true)) {
sdallocx_default(ptr, size, 0);
}
LOG("core.sdallocx.exit", "");
}
JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
je_sdallocx_impl(void *ptr, size_t size, int flags) {
if (flags != 0 || !free_fastpath(ptr, size, true)) {
sdallocx_default(ptr, size, flags);
}
}
JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
je_free_impl(void *ptr) {
if (!free_fastpath(ptr, 0, false)) {
free_default(ptr);
}
}
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */

View File

@ -4,6 +4,7 @@
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/thread_event.h"
#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
JEMALLOC_ALWAYS_INLINE void
prof_active_assert() {
@ -227,11 +228,6 @@ prof_sample_align(size_t orig_align) {
orig_align;
}
JEMALLOC_ALWAYS_INLINE bool
prof_sample_aligned(const void *ptr) {
return ((uintptr_t)ptr & PAGE_MASK) == 0;
}
JEMALLOC_ALWAYS_INLINE bool
prof_sampled(tsd_t *tsd, const void *ptr) {
prof_info_t prof_info;

View File

@ -2273,15 +2273,6 @@ static_opts_init(static_opts_t *static_opts) {
static_opts->usize = false;
}
/*
* These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we
* should have one constant here per magic value there. Note however that the
* representations need not be related.
*/
#define TCACHE_IND_NONE ((unsigned)-1)
#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
#define ARENA_IND_AUTOMATIC ((unsigned)-1)
typedef struct dynamic_opts_s dynamic_opts_t;
struct dynamic_opts_s {
void **result;
@ -2346,36 +2337,6 @@ zero_get(bool guarantee, bool slow) {
}
}
JEMALLOC_ALWAYS_INLINE tcache_t *
tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
tcache_t *tcache;
if (tcache_ind == TCACHE_IND_AUTOMATIC) {
if (likely(!slow)) {
/* Getting tcache ptr unconditionally. */
tcache = tsd_tcachep_get(tsd);
assert(tcache == tcache_get(tsd));
} else if (is_alloc ||
likely(tsd_reentrancy_level_get(tsd) == 0)) {
tcache = tcache_get(tsd);
} else {
tcache = NULL;
}
} else {
/*
* Should not specify tcache on deallocation path when being
* reentrant.
*/
assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
tsd_state_nocleanup(tsd));
if (tcache_ind == TCACHE_IND_NONE) {
tcache = NULL;
} else {
tcache = tcaches_get(tsd, tcache_ind);
}
}
return tcache;
}
/* Return true if a manual arena is specified and arena_get() OOMs. */
JEMALLOC_ALWAYS_INLINE bool
arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
@ -2915,29 +2876,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
thread_dalloc_event(tsd, usize);
}
JEMALLOC_ALWAYS_INLINE bool
maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
if (config_opt_size_checks) {
emap_alloc_ctx_t dbg_ctx;
emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
&dbg_ctx);
if (alloc_ctx->szind != dbg_ctx.szind) {
safety_check_fail_sized_dealloc(
/* current_dealloc */ true, ptr,
/* true_size */ sz_size2index(dbg_ctx.szind),
/* input_size */ sz_size2index(alloc_ctx->szind));
return true;
}
if (alloc_ctx->slab != dbg_ctx.slab) {
safety_check_fail(
"Internal heap corruption detected: "
"mismatch in slab bit");
return true;
}
}
return false;
}
JEMALLOC_ALWAYS_INLINE void
isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
if (!slow_path) {
@ -3044,142 +2982,11 @@ free_default(void *ptr) {
}
}
JEMALLOC_ALWAYS_INLINE bool
free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
/*
* free_fastpath do not handle two uncommon cases: 1) sampled profiled
* objects and 2) sampled junk & stash for use-after-free detection.
* Both have special alignments which are used to escape the fastpath.
*
* prof_sample is page-aligned, which covers the UAF check when both
* are enabled (the assertion below). Avoiding redundant checks since
* this is on the fastpath -- at most one runtime branch from this.
*/
if (config_debug && cache_bin_nonfast_aligned(ptr)) {
assert(prof_sample_aligned(ptr));
}
if (config_prof && check_prof) {
/* When prof is enabled, the prof_sample alignment is enough. */
if (prof_sample_aligned(ptr)) {
return true;
} else {
return false;
}
}
if (config_uaf_detection) {
if (cache_bin_nonfast_aligned(ptr)) {
return true;
} else {
return false;
}
}
return false;
}
/* Returns whether or not the free attempt was successful. */
JEMALLOC_ALWAYS_INLINE
bool free_fastpath(void *ptr, size_t size, bool size_hint) {
tsd_t *tsd = tsd_get(false);
/* The branch gets optimized away unless tsd_get_allocates(). */
if (unlikely(tsd == NULL)) {
return false;
}
/*
* The tsd_fast() / initialized checks are folded into the branch
* testing (deallocated_after >= threshold) later in this function.
* The threshold will be set to 0 when !tsd_fast.
*/
assert(tsd_fast(tsd) ||
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
emap_alloc_ctx_t alloc_ctx;
if (!size_hint) {
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
&arena_emap_global, ptr, &alloc_ctx);
/* Note: profiled objects will have alloc_ctx.slab set */
if (unlikely(err || !alloc_ctx.slab ||
free_fastpath_nonfast_aligned(ptr,
/* check_prof */ false))) {
return false;
}
assert(alloc_ctx.szind != SC_NSIZES);
} else {
/*
* Check for both sizes that are too large, and for sampled /
* special aligned objects. The alignment check will also check
* for null ptr.
*/
if (unlikely(size > SC_LOOKUP_MAXCLASS ||
free_fastpath_nonfast_aligned(ptr,
/* check_prof */ true))) {
return false;
}
alloc_ctx.szind = sz_size2index_lookup(size);
/* Max lookup class must be small. */
assert(alloc_ctx.szind < SC_NBINS);
/* This is a dead store, except when opt size checking is on. */
alloc_ctx.slab = true;
}
/*
* Currently the fastpath only handles small sizes. The branch on
* SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
* tcache szind upper limit (i.e. tcache_maxclass) as well.
*/
assert(alloc_ctx.slab);
uint64_t deallocated, threshold;
te_free_fastpath_ctx(tsd, &deallocated, &threshold);
size_t usize = sz_index2size(alloc_ctx.szind);
uint64_t deallocated_after = deallocated + usize;
/*
* Check for events and tsd non-nominal (fast_threshold will be set to
* 0) in a single branch. Note that this handles the uninitialized case
* as well (TSD init will be triggered on the non-fastpath). Therefore
* anything depends on a functional TSD (e.g. the alloc_ctx sanity check
* below) needs to be after this branch.
*/
if (unlikely(deallocated_after >= threshold)) {
return false;
}
assert(tsd_fast(tsd));
bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
if (fail) {
/* See the comment in isfree. */
return true;
}
tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
/* slow */ false, /* is_alloc */ false);
cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
/*
* If junking were enabled, this is where we would do it. It's not
* though, since we ensured above that we're on the fast path. Assert
* that to double-check.
*/
assert(!opt_junk_free);
if (!cache_bin_dalloc_easy(bin, ptr)) {
return false;
}
*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
return true;
}
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
je_free(void *ptr) {
LOG("core.free.entry", "ptr: %p", ptr);
if (!free_fastpath(ptr, 0, false)) {
free_default(ptr);
}
je_free_impl(ptr);
LOG("core.free.exit", "");
}
@ -4000,21 +3807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
size, flags);
if (flags != 0 || !free_fastpath(ptr, size, true)) {
sdallocx_default(ptr, size, flags);
}
LOG("core.sdallocx.exit", "");
}
void JEMALLOC_NOTHROW
je_sdallocx_noflags(void *ptr, size_t size) {
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
size);
if (!free_fastpath(ptr, size, true)) {
sdallocx_default(ptr, size, 0);
}
je_sdallocx_impl(ptr, size, flags);
LOG("core.sdallocx.exit", "");
}

View File

@ -173,21 +173,21 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
void
operator delete(void *ptr) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void
operator delete[](void *ptr) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void
operator delete(void *ptr, const std::nothrow_t &) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
#if __cpp_sized_deallocation >= 201309
@ -224,27 +224,27 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
if (unlikely(ptr == nullptr)) {
return;
}
je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
}
void
operator delete(void* ptr, std::align_val_t) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void
operator delete[](void* ptr, std::align_val_t) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void
operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void
operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
je_free(ptr);
je_free_impl(ptr);
}
void

View File

@ -4,7 +4,7 @@
static void
malloc_free(void) {
void *p = malloc(1);
expect_ptr_not_null(p, "Unexpected new failure");
expect_ptr_not_null(p, "Unexpected malloc failure");
free(p);
}
@ -18,7 +18,7 @@ new_delete(void) {
static void
malloc_free_array(void) {
void *p = malloc(sizeof(int)*8);
expect_ptr_not_null(p, "Unexpected new[] failure");
expect_ptr_not_null(p, "Unexpected malloc failure");
free(p);
}
@ -40,7 +40,7 @@ new_sized_delete(void) {
static void
malloc_sdallocx(void) {
void *p = malloc(1);
expect_ptr_not_null(p, "Unexpected new failure");
expect_ptr_not_null(p, "Unexpected malloc failure");
sdallocx(p, 1, 0);
}
#endif
@ -79,5 +79,4 @@ main() {
test_free_vs_delete,
test_free_array_vs_delete_array,
test_sized_delete_vs_sdallocx);
}