Optimize away the tsd_fast() check on free fastpath.
To ensure that the free fastpath can tolerate uninitialized tsd, improved the static initializer for rtree_ctx in tsd.
This commit is contained in:
parent
26f5257b88
commit
4d56aaeca5
@ -276,12 +276,14 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true on error.
|
* Only used on the fastpath of free. Returns true when cannot be fulfilled by
|
||||||
|
* fast path, e.g. when the metadata key is not cached.
|
||||||
*/
|
*/
|
||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
|
emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
|
||||||
emap_alloc_ctx_t *alloc_ctx) {
|
emap_alloc_ctx_t *alloc_ctx) {
|
||||||
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
|
/* Use the unsafe getter since this may gets called during exit. */
|
||||||
|
rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
|
||||||
|
|
||||||
rtree_metadata_t metadata;
|
rtree_metadata_t metadata;
|
||||||
bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
|
bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
|
||||||
|
@ -35,9 +35,6 @@
|
|||||||
# define RTREE_LEAF_COMPACT
|
# define RTREE_LEAF_COMPACT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Needed for initialization only. */
|
|
||||||
#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
|
|
||||||
|
|
||||||
typedef struct rtree_node_elm_s rtree_node_elm_t;
|
typedef struct rtree_node_elm_s rtree_node_elm_t;
|
||||||
struct rtree_node_elm_s {
|
struct rtree_node_elm_s {
|
||||||
atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
|
atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
|
||||||
|
@ -18,16 +18,28 @@
|
|||||||
* cache misses if made overly large, plus the cost of linear search in the LRU
|
* cache misses if made overly large, plus the cost of linear search in the LRU
|
||||||
* cache.
|
* cache.
|
||||||
*/
|
*/
|
||||||
#define RTREE_CTX_LG_NCACHE 4
|
#define RTREE_CTX_NCACHE 16
|
||||||
#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
|
|
||||||
#define RTREE_CTX_NCACHE_L2 8
|
#define RTREE_CTX_NCACHE_L2 8
|
||||||
|
|
||||||
/*
|
/* Needed for initialization only. */
|
||||||
* Zero initializer required for tsd initialization only. Proper initialization
|
#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
|
||||||
* done via rtree_ctx_data_init().
|
#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
|
||||||
*/
|
|
||||||
#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
|
|
||||||
|
|
||||||
|
#define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
|
||||||
|
#define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
|
||||||
|
#define RTREE_CTX_INIT_ELM_4 RTREE_CTX_INIT_ELM_2, RTREE_CTX_INIT_ELM_2
|
||||||
|
#define RTREE_CTX_INIT_ELM_8 RTREE_CTX_INIT_ELM_4, RTREE_CTX_INIT_ELM_4
|
||||||
|
#define RTREE_CTX_INIT_ELM_16 RTREE_CTX_INIT_ELM_8, RTREE_CTX_INIT_ELM_8
|
||||||
|
|
||||||
|
#define _RTREE_CTX_INIT_ELM_DATA(n) RTREE_CTX_INIT_ELM_##n
|
||||||
|
#define RTREE_CTX_INIT_ELM_DATA(n) _RTREE_CTX_INIT_ELM_DATA(n)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Static initializer (to invalidate the cache entries) is required because the
|
||||||
|
* free fastpath may access the rtree cache before a full tsd initialization.
|
||||||
|
*/
|
||||||
|
#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
|
||||||
|
{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
|
||||||
|
|
||||||
typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
|
typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
|
||||||
|
|
||||||
|
@ -118,17 +118,10 @@ te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
|
te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold) {
|
||||||
bool size_hint) {
|
/* Unsafe getters since this may happen before tsd_init. */
|
||||||
if (!size_hint) {
|
*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
|
||||||
*deallocated = tsd_thread_deallocated_get(tsd);
|
*threshold = *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
|
||||||
*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
|
|
||||||
} else {
|
|
||||||
/* Unsafe getters since this may happen before tsd_init. */
|
|
||||||
*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
|
|
||||||
*threshold =
|
|
||||||
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
|
|
||||||
}
|
|
||||||
assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
|
assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
|
|||||||
/* activity_callback_thunk */ \
|
/* activity_callback_thunk */ \
|
||||||
ACTIVITY_CALLBACK_THUNK_INITIALIZER, \
|
ACTIVITY_CALLBACK_THUNK_INITIALIZER, \
|
||||||
/* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \
|
/* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \
|
||||||
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER,
|
/* rtree_ctx */ RTREE_CTX_INITIALIZER,
|
||||||
|
|
||||||
/* O(name, type, nullable type) */
|
/* O(name, type, nullable type) */
|
||||||
#define TSD_DATA_FAST \
|
#define TSD_DATA_FAST \
|
||||||
|
@ -2910,12 +2910,20 @@ free_default(void *ptr) {
|
|||||||
JEMALLOC_ALWAYS_INLINE
|
JEMALLOC_ALWAYS_INLINE
|
||||||
bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
||||||
tsd_t *tsd = tsd_get(false);
|
tsd_t *tsd = tsd_get(false);
|
||||||
|
/* The branch gets optimized away unless tsd_get_allocates(). */
|
||||||
|
if (unlikely(tsd == NULL)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* The tsd_fast() / initialized checks are folded into the branch
|
||||||
|
* testing (deallocated_after >= threshold) later in this function.
|
||||||
|
* The threshold will be set to 0 when !tsd_fast.
|
||||||
|
*/
|
||||||
|
assert(tsd_fast(tsd) ||
|
||||||
|
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
|
||||||
|
|
||||||
emap_alloc_ctx_t alloc_ctx;
|
emap_alloc_ctx_t alloc_ctx;
|
||||||
if (!size_hint) {
|
if (!size_hint) {
|
||||||
if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
|
bool err = emap_alloc_ctx_try_lookup_fast(tsd,
|
||||||
&arena_emap_global, ptr, &alloc_ctx);
|
&arena_emap_global, ptr, &alloc_ctx);
|
||||||
|
|
||||||
@ -2925,15 +2933,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||||||
}
|
}
|
||||||
assert(alloc_ctx.szind != SC_NSIZES);
|
assert(alloc_ctx.szind != SC_NSIZES);
|
||||||
} else {
|
} else {
|
||||||
/*
|
|
||||||
* The size hinted fastpath does not involve rtree lookup, thus
|
|
||||||
* can tolerate an uninitialized tsd. This allows the tsd_fast
|
|
||||||
* check to be folded into the branch testing fast_threshold
|
|
||||||
* (set to 0 when !tsd_fast).
|
|
||||||
*/
|
|
||||||
if (unlikely(tsd == NULL)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Check for both sizes that are too large, and for sampled
|
* Check for both sizes that are too large, and for sampled
|
||||||
* objects. Sampled objects are always page-aligned. The
|
* objects. Sampled objects are always page-aligned. The
|
||||||
@ -2949,7 +2948,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint64_t deallocated, threshold;
|
uint64_t deallocated, threshold;
|
||||||
te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
|
te_free_fastpath_ctx(tsd, &deallocated, &threshold);
|
||||||
|
|
||||||
size_t usize = sz_index2size(alloc_ctx.szind);
|
size_t usize = sz_index2size(alloc_ctx.szind);
|
||||||
uint64_t deallocated_after = deallocated + usize;
|
uint64_t deallocated_after = deallocated + usize;
|
||||||
@ -2963,7 +2962,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
|||||||
if (unlikely(deallocated_after >= threshold)) {
|
if (unlikely(deallocated_after >= threshold)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
assert(tsd_fast(tsd));
|
||||||
bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
|
bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
|
||||||
if (fail) {
|
if (fail) {
|
||||||
/* See the comment in isfree. */
|
/* See the comment in isfree. */
|
||||||
|
@ -48,6 +48,13 @@ thd_start(void *arg) {
|
|||||||
int d = (int)(uintptr_t)arg;
|
int d = (int)(uintptr_t)arg;
|
||||||
void *p;
|
void *p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test free before tsd init -- the free fast path (which does not
|
||||||
|
* explicitly check for NULL) has to tolerate this case, and fall back
|
||||||
|
* to free_default.
|
||||||
|
*/
|
||||||
|
free(NULL);
|
||||||
|
|
||||||
tsd_t *tsd = tsd_fetch();
|
tsd_t *tsd = tsd_fetch();
|
||||||
expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
|
expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
|
||||||
"Initial tsd get should return initialization value");
|
"Initial tsd get should return initialization value");
|
||||||
|
Loading…
Reference in New Issue
Block a user