Add a free() and sdallocx(where flags=0) fastpath
Add unsized and sized deallocation fastpaths. Similar to the malloc() fastpath, this removes all frame manipulation for the majority of free() calls. The performance advantages here are less than that of the malloc() fastpath, but from prod tests seems to still be half a percent or so of improvement. Stats and sampling a both supported (sdallocx needs a sampling check, for rtree lookups slab will only be set for unsampled objects). We don't support flush, any flush requests go to the slowpath.
This commit is contained in:
parent
e2ab215324
commit
794e29c0ab
@ -2153,6 +2153,7 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JEMALLOC_NOINLINE
|
||||||
void *
|
void *
|
||||||
malloc_default(size_t size) {
|
malloc_default(size_t size) {
|
||||||
void *ret;
|
void *ret;
|
||||||
@ -2650,10 +2651,9 @@ je_realloc(void *ptr, size_t arg_size) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
|
JEMALLOC_NOINLINE
|
||||||
je_free(void *ptr) {
|
void
|
||||||
LOG("core.free.entry", "ptr: %p", ptr);
|
free_default(void *ptr) {
|
||||||
|
|
||||||
UTRACE(ptr, 0, 0);
|
UTRACE(ptr, 0, 0);
|
||||||
if (likely(ptr != NULL)) {
|
if (likely(ptr != NULL)) {
|
||||||
/*
|
/*
|
||||||
@ -2685,6 +2685,73 @@ je_free(void *ptr) {
|
|||||||
}
|
}
|
||||||
check_entry_exit_locking(tsd_tsdn(tsd));
|
check_entry_exit_locking(tsd_tsdn(tsd));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE
|
||||||
|
bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
||||||
|
tsd_t *tsd = tsd_get(false);
|
||||||
|
if (unlikely(!tsd || !tsd_fast(tsd))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
||||||
|
|
||||||
|
alloc_ctx_t alloc_ctx;
|
||||||
|
/*
|
||||||
|
* If !config_cache_oblivious, we can check PAGE alignment to
|
||||||
|
* detect sampled objects. Otherwise addresses are
|
||||||
|
* randomized, and we have to look it up in the rtree anyway.
|
||||||
|
* See also isfree().
|
||||||
|
*/
|
||||||
|
if (!size_hint || config_cache_oblivious) {
|
||||||
|
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
|
||||||
|
bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
|
||||||
|
rtree_ctx, (uintptr_t)ptr,
|
||||||
|
&alloc_ctx.szind, &alloc_ctx.slab);
|
||||||
|
assert(alloc_ctx.szind != SC_NSIZES);
|
||||||
|
|
||||||
|
/* Note: profiled objects will have alloc_ctx.slab set */
|
||||||
|
if (!res || !alloc_ctx.slab) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Check for both sizes that are too large, and for sampled objects.
|
||||||
|
* Sampled objects are always page-aligned. The sampled object check
|
||||||
|
* will also check for null ptr.
|
||||||
|
*/
|
||||||
|
if (size > SC_LOOKUP_MAXCLASS || (((uintptr_t)ptr & PAGE_MASK) == 0)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
alloc_ctx.szind = sz_size2index_lookup(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
|
||||||
|
cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind];
|
||||||
|
if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config_stats) {
|
||||||
|
size_t usize = sz_index2size(alloc_ctx.szind);
|
||||||
|
*tsd_thread_deallocatedp_get(tsd) += usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
|
||||||
|
je_free(void *ptr) {
|
||||||
|
LOG("core.free.entry", "ptr: %p", ptr);
|
||||||
|
|
||||||
|
if (!free_fastpath(ptr, 0, false)) {
|
||||||
|
free_default(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
LOG("core.free.exit", "");
|
LOG("core.free.exit", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3362,14 +3429,11 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
|
|||||||
return usize;
|
return usize;
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
|
JEMALLOC_NOINLINE void
|
||||||
je_sdallocx(void *ptr, size_t size, int flags) {
|
sdallocx_default(void *ptr, size_t size, int flags) {
|
||||||
assert(ptr != NULL);
|
assert(ptr != NULL);
|
||||||
assert(malloc_initialized() || IS_INITIALIZER);
|
assert(malloc_initialized() || IS_INITIALIZER);
|
||||||
|
|
||||||
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
|
|
||||||
size, flags);
|
|
||||||
|
|
||||||
tsd_t *tsd = tsd_fetch();
|
tsd_t *tsd = tsd_fetch();
|
||||||
bool fast = tsd_fast(tsd);
|
bool fast = tsd_fast(tsd);
|
||||||
size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
|
size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
|
||||||
@ -3409,6 +3473,17 @@ je_sdallocx(void *ptr, size_t size, int flags) {
|
|||||||
}
|
}
|
||||||
check_entry_exit_locking(tsd_tsdn(tsd));
|
check_entry_exit_locking(tsd_tsdn(tsd));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_EXPORT void JEMALLOC_NOTHROW
|
||||||
|
je_sdallocx(void *ptr, size_t size, int flags) {
|
||||||
|
LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
|
||||||
|
size, flags);
|
||||||
|
|
||||||
|
if (flags !=0 || !free_fastpath(ptr, size, true)) {
|
||||||
|
sdallocx_default(ptr, size, flags);
|
||||||
|
}
|
||||||
|
|
||||||
LOG("core.sdallocx.exit", "");
|
LOG("core.sdallocx.exit", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user