Bundle 3 branches on fast path into tsd_state.
Added tsd_state_nominal_slow, which on fast path malloc() incorporates tcache_enabled check, and on fast path free() bundles both malloc_slow and tcache_enabled branches.
This commit is contained in:
parent
ccfe68a916
commit
b348ba29bb
@ -3,6 +3,10 @@
|
||||
|
||||
#include "jemalloc/internal/atomic.h"
|
||||
|
||||
/* TSD checks this to set thread local slow state accordingly. */
|
||||
extern bool malloc_slow;
|
||||
|
||||
/* Run-time options. */
|
||||
extern bool opt_abort;
|
||||
extern const char *opt_junk;
|
||||
extern bool opt_junk_alloc;
|
||||
|
@ -289,6 +289,7 @@ malloc_mutex_postfork_parent
|
||||
malloc_mutex_prefork
|
||||
malloc_mutex_unlock
|
||||
malloc_printf
|
||||
malloc_slow
|
||||
malloc_snprintf
|
||||
malloc_strtoumax
|
||||
malloc_tsd_boot0
|
||||
@ -526,6 +527,7 @@ tsd_cleanup
|
||||
tsd_cleanup_wrapper
|
||||
tsd_fetch
|
||||
tsd_fetch_impl
|
||||
tsd_fetch_slow
|
||||
tsd_get
|
||||
tsd_get_allocates
|
||||
tsd_iarena_get
|
||||
@ -541,6 +543,7 @@ tsd_narenas_tdatap_get
|
||||
tsd_reentrancy_level_get
|
||||
tsd_reentrancy_level_set
|
||||
tsd_reentrancy_levelp_get
|
||||
tsd_slow_update
|
||||
tsd_wrapper_get
|
||||
tsd_wrapper_set
|
||||
tsd_nominal
|
||||
|
@ -40,6 +40,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
|
||||
}
|
||||
/* Commit the state last. Above calls check current state. */
|
||||
tsd_tcache_enabled_set(tsd, enabled);
|
||||
tsd_slow_update(tsd);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
|
@ -14,5 +14,7 @@ void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
|
||||
#endif
|
||||
bool tsd_data_init(void *arg);
|
||||
void tsd_cleanup(void *arg);
|
||||
tsd_t *tsd_fetch_slow(tsd_t *tsd);
|
||||
void tsd_slow_update(tsd_t *tsd);
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
|
||||
|
@ -19,12 +19,54 @@ bool tsdn_null(const tsdn_t *tsdn);
|
||||
tsd_t *tsdn_tsd(tsdn_t *tsdn);
|
||||
rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
|
||||
rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
|
||||
bool tsd_fast(tsd_t *tsd);
|
||||
void tsd_assert_fast(tsd_t *tsd);
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
|
||||
malloc_tsd_externs(, tsd_t)
|
||||
malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
|
||||
|
||||
#define MALLOC_TSD_getset_yes(n, t) \
|
||||
JEMALLOC_ALWAYS_INLINE t \
|
||||
tsd_##n##_get(tsd_t *tsd) { \
|
||||
return *tsd_##n##p_get(tsd); \
|
||||
} \
|
||||
JEMALLOC_ALWAYS_INLINE void \
|
||||
tsd_##n##_set(tsd_t *tsd, t n) { \
|
||||
assert(tsd->state == tsd_state_nominal || \
|
||||
tsd->state == tsd_state_nominal_slow || \
|
||||
tsd->state == tsd_state_reincarnated); \
|
||||
tsd->n = n; \
|
||||
}
|
||||
#define MALLOC_TSD_getset_no(n, t)
|
||||
#define O(n, t, gs, i, c) \
|
||||
JEMALLOC_ALWAYS_INLINE t * \
|
||||
tsd_##n##p_get(tsd_t *tsd) { \
|
||||
return &tsd->n; \
|
||||
} \
|
||||
\
|
||||
MALLOC_TSD_getset_##gs(n, t)
|
||||
MALLOC_TSD
|
||||
#undef MALLOC_TSD_getset_yes
|
||||
#undef MALLOC_TSD_getset_no
|
||||
#undef O
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
tsd_assert_fast(tsd_t *tsd) {
|
||||
assert(!malloc_slow && tsd_tcache_enabled_get(tsd));
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
tsd_fast(tsd_t *tsd) {
|
||||
bool fast = (tsd->state == tsd_state_nominal);
|
||||
if (fast) {
|
||||
tsd_assert_fast(tsd);
|
||||
}
|
||||
|
||||
return fast;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE tsd_t *
|
||||
tsd_fetch_impl(bool init) {
|
||||
tsd_t *tsd = tsd_get(init);
|
||||
@ -35,19 +77,10 @@ tsd_fetch_impl(bool init) {
|
||||
assert(tsd != NULL);
|
||||
|
||||
if (unlikely(tsd->state != tsd_state_nominal)) {
|
||||
if (tsd->state == tsd_state_uninitialized) {
|
||||
tsd->state = tsd_state_nominal;
|
||||
/* Trigger cleanup handler registration. */
|
||||
tsd_set(tsd);
|
||||
tsd_data_init(tsd);
|
||||
} else if (tsd->state == tsd_state_purgatory) {
|
||||
tsd->state = tsd_state_reincarnated;
|
||||
tsd_set(tsd);
|
||||
tsd_data_init(tsd);
|
||||
} else {
|
||||
assert(tsd->state == tsd_state_reincarnated);
|
||||
}
|
||||
return tsd_fetch_slow(tsd);
|
||||
}
|
||||
assert(tsd_fast(tsd));
|
||||
tsd_assert_fast(tsd);
|
||||
|
||||
return tsd;
|
||||
}
|
||||
@ -64,33 +97,9 @@ tsd_tsdn(tsd_t *tsd) {
|
||||
|
||||
JEMALLOC_INLINE bool
|
||||
tsd_nominal(tsd_t *tsd) {
|
||||
return (tsd->state == tsd_state_nominal);
|
||||
return (tsd->state <= tsd_state_nominal_max);
|
||||
}
|
||||
|
||||
#define MALLOC_TSD_getset_yes(n, t) \
|
||||
JEMALLOC_ALWAYS_INLINE t \
|
||||
tsd_##n##_get(tsd_t *tsd) { \
|
||||
return *tsd_##n##p_get(tsd); \
|
||||
} \
|
||||
JEMALLOC_ALWAYS_INLINE void \
|
||||
tsd_##n##_set(tsd_t *tsd, t n) { \
|
||||
assert(tsd->state == tsd_state_nominal || \
|
||||
tsd->state == tsd_state_reincarnated); \
|
||||
tsd->n = n; \
|
||||
}
|
||||
#define MALLOC_TSD_getset_no(n, t)
|
||||
#define O(n, t, gs, i, c) \
|
||||
JEMALLOC_ALWAYS_INLINE t * \
|
||||
tsd_##n##p_get(tsd_t *tsd) { \
|
||||
return &tsd->n; \
|
||||
} \
|
||||
\
|
||||
MALLOC_TSD_getset_##gs(n, t)
|
||||
MALLOC_TSD
|
||||
#undef MALLOC_TSD_getset_yes
|
||||
#undef MALLOC_TSD_getset_no
|
||||
#undef O
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE tsdn_t *
|
||||
tsdn_fetch(void) {
|
||||
if (!tsd_booted_get()) {
|
||||
|
@ -64,7 +64,7 @@ struct tsd_init_head_s {
|
||||
O(iarena, arena_t *, yes, no, yes) \
|
||||
O(arena, arena_t *, yes, no, yes) \
|
||||
O(arenas_tdata, arena_tdata_t *,yes, no, yes) \
|
||||
O(tcache, tcache_t, yes, no, yes) \
|
||||
O(tcache, tcache_t, no, no, yes) \
|
||||
O(witnesses, witness_list_t, no, no, yes) \
|
||||
O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t, \
|
||||
no, no, no) \
|
||||
|
@ -20,11 +20,15 @@ typedef struct tsdn_s tsdn_t;
|
||||
#define TSDN_NULL ((tsdn_t *)0)
|
||||
|
||||
enum {
|
||||
tsd_state_uninitialized = 0,
|
||||
tsd_state_nominal = 1,
|
||||
tsd_state_nominal = 0, /* Common case --> jnz. */
|
||||
tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
|
||||
/* the above 2 nominal states should be lower values. */
|
||||
tsd_state_nominal_max = 1, /* used for comparison only. */
|
||||
tsd_state_purgatory = 2,
|
||||
tsd_state_reincarnated = 3
|
||||
tsd_state_reincarnated = 3,
|
||||
tsd_state_uninitialized = 4
|
||||
};
|
||||
|
||||
/* Manually limit tsd_state_t to a single byte. */
|
||||
typedef uint8_t tsd_state_t;
|
||||
|
||||
|
101
src/jemalloc.c
101
src/jemalloc.c
@ -76,7 +76,7 @@ typedef enum {
|
||||
static malloc_init_t malloc_init_state = malloc_init_uninitialized;
|
||||
|
||||
/* False should be the common case. Set to true to trigger initialization. */
|
||||
static bool malloc_slow = true;
|
||||
bool malloc_slow = true;
|
||||
|
||||
/* When malloc_slow is true, set the corresponding bits for sanity check. */
|
||||
enum {
|
||||
@ -1539,7 +1539,13 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
|
||||
|
||||
/* Fill in the tcache. */
|
||||
if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
|
||||
tcache = tcache_get(tsd);
|
||||
if (likely(!sopts->slow)) {
|
||||
/* Getting tcache ptr unconditionally. */
|
||||
tcache = tsd_tcachep_get(tsd);
|
||||
assert(tcache == tcache_get(tsd));
|
||||
} else {
|
||||
tcache = tcache_get(tsd);
|
||||
}
|
||||
} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
|
||||
tcache = NULL;
|
||||
} else {
|
||||
@ -1640,13 +1646,11 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE_C int
|
||||
imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
||||
imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
/* Where the actual allocated memory will live. */
|
||||
void *allocation = NULL;
|
||||
/* Filled in by compute_size_with_overflow below. */
|
||||
size_t size = 0;
|
||||
/* We compute a value for this right before allocating. */
|
||||
tsd_t *tsd = NULL;
|
||||
/*
|
||||
* For unaligned allocations, we need only ind. For aligned
|
||||
* allocations, or in case of stats or profiling we need usize.
|
||||
@ -1667,13 +1671,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
||||
*/
|
||||
int8_t *reentrancy_level = NULL;
|
||||
|
||||
/* Initialize (if we can't prove we don't have to). */
|
||||
if (sopts->slow) {
|
||||
if (unlikely(malloc_init())) {
|
||||
goto label_oom;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the amount of memory the user wants. */
|
||||
if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
|
||||
&size))) {
|
||||
@ -1714,11 +1711,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We always need the tsd, even if we aren't going to use the tcache for
|
||||
* some reason. Let's grab it right away.
|
||||
*/
|
||||
tsd = tsd_fetch();
|
||||
|
||||
/*
|
||||
* If we need to handle reentrancy, we can do it out of a
|
||||
@ -1752,11 +1744,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
||||
|
||||
alloc_ctx_t alloc_ctx;
|
||||
if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
|
||||
if (usize > SMALL_MAXCLASS) {
|
||||
alloc_ctx.slab = false;
|
||||
} else {
|
||||
alloc_ctx.slab = true;
|
||||
}
|
||||
alloc_ctx.slab = (usize <= SMALL_MAXCLASS);
|
||||
allocation = imalloc_no_sample(
|
||||
sopts, dopts, tsd, usize, usize, ind);
|
||||
} else if ((uintptr_t)tctx > (uintptr_t)1U) {
|
||||
@ -1879,12 +1867,29 @@ label_invalid_alignment:
|
||||
/* Returns the errno-style error code of the allocation. */
|
||||
JEMALLOC_ALWAYS_INLINE_C int
|
||||
imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
|
||||
if (unlikely(malloc_slow)) {
|
||||
sopts->slow = true;
|
||||
return imalloc_body(sopts, dopts);
|
||||
} else {
|
||||
if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
|
||||
if (config_xmalloc && unlikely(opt_xmalloc)) {
|
||||
malloc_write(sopts->oom_string);
|
||||
abort();
|
||||
}
|
||||
UTRACE(NULL, size, NULL);
|
||||
set_errno(ENOMEM);
|
||||
*dopts->result = NULL;
|
||||
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
/* We always need the tsd. Let's grab it right away. */
|
||||
tsd_t *tsd = tsd_fetch();
|
||||
assert(tsd);
|
||||
if (likely(tsd_fast(tsd))) {
|
||||
/* Fast and common path. */
|
||||
tsd_assert_fast(tsd);
|
||||
sopts->slow = false;
|
||||
return imalloc_body(sopts, dopts);
|
||||
return imalloc_body(sopts, dopts, tsd);
|
||||
} else {
|
||||
sopts->slow = true;
|
||||
return imalloc_body(sopts, dopts, tsd);
|
||||
}
|
||||
}
|
||||
/******************************************************************************/
|
||||
@ -2198,13 +2203,23 @@ je_free(void *ptr) {
|
||||
if (*tsd_reentrancy_levelp_get(tsd) == 0) {
|
||||
witness_assert_lockless(tsd_tsdn(tsd));
|
||||
}
|
||||
tcache_t *tcache = NULL;
|
||||
if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
|
||||
tcache = tcache_get(tsd);
|
||||
}
|
||||
if (likely(!malloc_slow)) {
|
||||
tcache_t *tcache;
|
||||
if (likely(tsd_fast(tsd))) {
|
||||
tsd_assert_fast(tsd);
|
||||
if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
|
||||
/* Getting tcache ptr unconditionally. */
|
||||
tcache = tsd_tcachep_get(tsd);
|
||||
assert(tcache == tcache_get(tsd));
|
||||
} else {
|
||||
tcache = NULL;
|
||||
}
|
||||
ifree(tsd, ptr, tcache, false);
|
||||
} else {
|
||||
if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
|
||||
tcache = tcache_get(tsd);
|
||||
} else {
|
||||
tcache = NULL;
|
||||
}
|
||||
ifree(tsd, ptr, tcache, true);
|
||||
}
|
||||
if (*tsd_reentrancy_levelp_get(tsd) == 0) {
|
||||
@ -2699,6 +2714,7 @@ je_dallocx(void *ptr, int flags) {
|
||||
assert(malloc_initialized() || IS_INITIALIZER);
|
||||
|
||||
tsd = tsd_fetch();
|
||||
bool fast = tsd_fast(tsd);
|
||||
witness_assert_lockless(tsd_tsdn(tsd));
|
||||
if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
|
||||
/* Not allowed to be reentrant and specify a custom tcache. */
|
||||
@ -2710,14 +2726,20 @@ je_dallocx(void *ptr, int flags) {
|
||||
}
|
||||
} else {
|
||||
if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
|
||||
tcache = tcache_get(tsd);
|
||||
if (likely(fast)) {
|
||||
tcache = tsd_tcachep_get(tsd);
|
||||
assert(tcache == tcache_get(tsd));
|
||||
} else {
|
||||
tcache = tcache_get(tsd);
|
||||
}
|
||||
} else {
|
||||
tcache = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
UTRACE(ptr, 0, 0);
|
||||
if (likely(!malloc_slow)) {
|
||||
if (likely(fast)) {
|
||||
tsd_assert_fast(tsd);
|
||||
ifree(tsd, ptr, tcache, false);
|
||||
} else {
|
||||
ifree(tsd, ptr, tcache, true);
|
||||
@ -2749,6 +2771,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
|
||||
assert(ptr != NULL);
|
||||
assert(malloc_initialized() || IS_INITIALIZER);
|
||||
tsd = tsd_fetch();
|
||||
bool fast = tsd_fast(tsd);
|
||||
usize = inallocx(tsd_tsdn(tsd), size, flags);
|
||||
assert(usize == isalloc(tsd_tsdn(tsd), ptr));
|
||||
|
||||
@ -2763,14 +2786,20 @@ je_sdallocx(void *ptr, size_t size, int flags) {
|
||||
}
|
||||
} else {
|
||||
if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
|
||||
tcache = tcache_get(tsd);
|
||||
if (likely(fast)) {
|
||||
tcache = tsd_tcachep_get(tsd);
|
||||
assert(tcache == tcache_get(tsd));
|
||||
} else {
|
||||
tcache = tcache_get(tsd);
|
||||
}
|
||||
} else {
|
||||
tcache = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
UTRACE(ptr, 0, 0);
|
||||
if (likely(!malloc_slow)) {
|
||||
if (likely(fast)) {
|
||||
tsd_assert_fast(tsd);
|
||||
isfree(tsd, ptr, usize, tcache, false);
|
||||
} else {
|
||||
isfree(tsd, ptr, usize, tcache, true);
|
||||
|
@ -334,6 +334,8 @@ bool
|
||||
tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
||||
/* Called upon tsd initialization. */
|
||||
tsd_tcache_enabled_set(tsd, opt_tcache);
|
||||
tsd_slow_update(tsd);
|
||||
|
||||
if (opt_tcache) {
|
||||
/* Trigger tcache init. */
|
||||
tsd_tcache_data_init(tsd);
|
||||
|
40
src/tsd.c
40
src/tsd.c
@ -12,6 +12,40 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
void
|
||||
tsd_slow_update(tsd_t *tsd) {
|
||||
if (tsd_nominal(tsd)) {
|
||||
if (malloc_slow || !tsd->tcache_enabled) {
|
||||
tsd->state = tsd_state_nominal_slow;
|
||||
} else {
|
||||
tsd->state = tsd_state_nominal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tsd_t *
|
||||
tsd_fetch_slow(tsd_t *tsd) {
|
||||
if (tsd->state == tsd_state_nominal_slow) {
|
||||
/* On slow path but no work needed. */
|
||||
assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
|
||||
*tsd_arenas_tdata_bypassp_get(tsd));
|
||||
} else if (tsd->state == tsd_state_uninitialized) {
|
||||
tsd->state = tsd_state_nominal;
|
||||
tsd_slow_update(tsd);
|
||||
/* Trigger cleanup handler registration. */
|
||||
tsd_set(tsd);
|
||||
tsd_data_init(tsd);
|
||||
} else if (tsd->state == tsd_state_purgatory) {
|
||||
tsd->state = tsd_state_reincarnated;
|
||||
tsd_set(tsd);
|
||||
tsd_data_init(tsd);
|
||||
} else {
|
||||
assert(tsd->state == tsd_state_reincarnated);
|
||||
}
|
||||
|
||||
return tsd;
|
||||
}
|
||||
|
||||
void *
|
||||
malloc_tsd_malloc(size_t size) {
|
||||
return a0malloc(CACHELINE_CEILING(size));
|
||||
@ -82,6 +116,7 @@ tsd_cleanup(void *arg) {
|
||||
/* Do nothing. */
|
||||
break;
|
||||
case tsd_state_nominal:
|
||||
case tsd_state_nominal_slow:
|
||||
case tsd_state_reincarnated:
|
||||
/*
|
||||
* Reincarnated means another destructor deallocated memory
|
||||
@ -129,7 +164,10 @@ malloc_tsd_boot0(void) {
|
||||
void
|
||||
malloc_tsd_boot1(void) {
|
||||
tsd_boot1();
|
||||
*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
|
||||
tsd_t *tsd = tsd_fetch();
|
||||
/* malloc_slow has been set properly. Update tsd_slow. */
|
||||
tsd_slow_update(tsd);
|
||||
*tsd_arenas_tdata_bypassp_get(tsd) = false;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
Loading…
Reference in New Issue
Block a user