Pre-generate ncached_max for all bins for better tcache_max tuning experience.

This commit is contained in:
guangli-dai
2023-08-22 16:31:54 -07:00
committed by Qi Wang
parent 36becb1302
commit 6b197fdd46
12 changed files with 417 additions and 308 deletions

View File

@@ -198,7 +198,9 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
assert(sz_can_use_slab(size));
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
} else if (likely(size <= tcache_max_get(tcache))) {
} else if (likely(ind < TCACHE_NBINS_MAX &&
!tcache_bin_disabled(ind, &tcache->bins[ind],
tcache->tcache_slow))) {
return tcache_alloc_large(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
}
@@ -298,7 +300,9 @@ JEMALLOC_ALWAYS_INLINE void
arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
bool slow_path) {
assert (!tsdn_null(tsdn) && tcache != NULL);
if (szind < tcache_nhbins_get(tcache)) {
if (szind < TCACHE_NBINS_MAX &&
!tcache_bin_disabled(szind, &tcache->bins[szind],
tcache->tcache_slow)) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
} else {

View File

@@ -23,16 +23,20 @@
*/
typedef uint16_t cache_bin_sz_t;
#define JUNK_ADDR ((uintptr_t)0x7a7a7a7a7a7a7a7aULL)
/*
* Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
* bug starts leaking those. Make it look like the junk pattern but be distinct
* from it.
*/
static const uintptr_t cache_bin_preceding_junk =
(uintptr_t)0x7a7a7a7a7a7a7a7aULL;
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
static const uintptr_t cache_bin_trailing_junk =
(uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
static const uintptr_t cache_bin_preceding_junk = JUNK_ADDR;
/* Note: JUNK_ADDR vs. JUNK_ADDR + 1 -- this tells you which pointer leaked. */
static const uintptr_t cache_bin_trailing_junk = JUNK_ADDR + 1;
/*
* A pointer used to initialize a fake stack_head for disabled small bins
* so that the enabled/disabled assessment does not rely on ncached_max.
*/
extern const uintptr_t disabled_bin;
/*
* That implies the following value, for the maximum number of items in any
@@ -174,9 +178,35 @@ cache_bin_nonfast_aligned(const void *ptr) {
return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
}
static inline const void *
cache_bin_disabled_bin_stack(void) {
return &disabled_bin;
}
/*
* If a cache bin was zero initialized (either because it lives in static or
* thread-local storage, or was memset to 0), this function indicates whether or
* not cache_bin_init was called on it.
*/
static inline bool
cache_bin_still_zero_initialized(cache_bin_t *bin) {
return bin->stack_head == NULL;
}
static inline bool
cache_bin_disabled(cache_bin_t *bin) {
bool disabled = (bin->stack_head == cache_bin_disabled_bin_stack());
if (disabled) {
assert((uintptr_t)(*bin->stack_head) == JUNK_ADDR);
}
return disabled;
}
/* Returns ncached_max: Upper limit on ncached. */
static inline cache_bin_sz_t
cache_bin_info_ncached_max(cache_bin_info_t *info) {
cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
assert(!cache_bin_disabled(bin));
assert(info == &bin->bin_info);
return info->ncached_max;
}
@@ -234,7 +264,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin) {
static inline cache_bin_sz_t
cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
assert(n <= cache_bin_info_ncached_max(info));
assert(n <= cache_bin_info_ncached_max_get(bin, info));
return n;
}
@@ -271,7 +301,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
static inline uint16_t
cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
return (uint16_t)bin->low_bits_empty -
info->ncached_max * sizeof(void *);
cache_bin_info_ncached_max_get(bin, info) * sizeof(void *);
}
/*
@@ -281,7 +311,7 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
*/
static inline void **
cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
void **ret = cache_bin_empty_position_get(bin) - ncached_max;
assert(ret <= bin->stack_head);
@@ -313,7 +343,7 @@ cache_bin_low_water_get_internal(cache_bin_t *bin) {
static inline cache_bin_sz_t
cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
assert(low_water <= cache_bin_info_ncached_max(info));
assert(low_water <= cache_bin_info_ncached_max_get(bin, info));
assert(low_water <= cache_bin_ncached_get_local(bin, info));
cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
@@ -328,11 +358,13 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
*/
static inline void
cache_bin_low_water_set(cache_bin_t *bin) {
assert(!cache_bin_disabled(bin));
bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
}
static inline void
cache_bin_low_water_adjust(cache_bin_t *bin) {
assert(!cache_bin_disabled(bin));
if (cache_bin_ncached_get_internal(bin)
< cache_bin_low_water_get_internal(bin)) {
cache_bin_low_water_set(bin);
@@ -494,25 +526,26 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
/* Get the number of stashed pointers. */
JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
info);
cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
bin->low_bits_full) / sizeof(void *);
assert(n <= ncached_max);
if (config_debug && n != 0) {
/* Below are for assertions only. */
void **low_bound = cache_bin_low_bound_get(bin, info);
/* Below are for assertions only. */
void **low_bound = cache_bin_low_bound_get(bin, info);
assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
void *stashed = *(low_bound + n - 1);
bool aligned = cache_bin_nonfast_aligned(stashed);
assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
void *stashed = *(low_bound + n - 1);
bool aligned = cache_bin_nonfast_aligned(stashed);
#ifdef JEMALLOC_JET
/* Allow arbitrary pointers to be stashed in tests. */
aligned = true;
/* Allow arbitrary pointers to be stashed in tests. */
aligned = true;
#endif
assert(n == 0 || (stashed != NULL && aligned));
assert(stashed != NULL && aligned);
}
return n;
}
@@ -520,7 +553,7 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
assert(n <= cache_bin_info_ncached_max(info));
assert(n <= cache_bin_info_ncached_max_get(bin, info));
return n;
}
@@ -541,8 +574,8 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
* This function should not call other utility functions because the racy
* condition may cause unexpected / undefined behaviors in unverified utility
* functions. Currently, this function calls two utility functions
* cache_bin_info_ncached_max and cache_bin_low_bits_low_bound_get because they
* help access values that will not be concurrently modified.
* cache_bin_info_ncached_max_get and cache_bin_low_bits_low_bound_get because
* they help access values that will not be concurrently modified.
*/
static inline void
cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
@@ -552,7 +585,8 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
(uint16_t)(uintptr_t)bin->stack_head;
cache_bin_sz_t n = diff / sizeof(void *);
assert(n <= cache_bin_info_ncached_max(info));
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
assert(n <= ncached_max);
*ncached = n;
/* Racy version of cache_bin_nstashed_get_internal. */
@@ -560,7 +594,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
info);
n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
assert(n <= cache_bin_info_ncached_max(info));
assert(n <= ncached_max);
*nstashed = n;
/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
}
@@ -697,13 +731,8 @@ void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
void cache_bin_postincrement(void *alloc, size_t *cur_offset);
void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
size_t *cur_offset);
void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
/*
* If a cache bin was zero initialized (either because it lives in static or
* thread-local storage, or was memset to 0), this function indicates whether or
* not cache_bin_init was called on it.
*/
bool cache_bin_still_zero_initialized(cache_bin_t *bin);
bool cache_bin_stack_use_thp(void);
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */

View File

@@ -24,9 +24,9 @@ extern unsigned opt_lg_tcache_flush_large_div;
* large-object bins. This is only used during threads initialization and
* changing it will not reflect on initialized threads as expected. Thus,
* it should not be changed on the fly. To change the number of tcache bins
* in use, refer to tcache_nhbins of each tcache.
* in use, refer to tcache_nbins of each tcache.
*/
extern unsigned global_do_not_change_nhbins;
extern unsigned global_do_not_change_nbins;
/*
* Maximum cached size class. Same as above, this is only used during threads
@@ -58,6 +58,7 @@ void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
tcache_t *tcache, arena_t *arena);
tcache_t *tcache_create_explicit(tsd_t *tsd);
void thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
void tcache_cleanup(tsd_t *tsd);
void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
@@ -70,8 +71,8 @@ void tcache_prefork(tsdn_t *tsdn);
void tcache_postfork_parent(tsdn_t *tsdn);
void tcache_postfork_child(tsdn_t *tsdn);
void tcache_flush(tsd_t *tsd);
bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
void tcache_enabled_set(tsd_t *tsd, bool enabled);
void tcache_assert_initialized(tcache_t *tcache);

View File

@@ -18,94 +18,72 @@ tcache_enabled_get(tsd_t *tsd) {
return tsd_tcache_enabled_get(tsd);
}
static inline void
tcache_enabled_set(tsd_t *tsd, bool enabled) {
bool was_enabled = tsd_tcache_enabled_get(tsd);
if (!was_enabled && enabled) {
tsd_tcache_data_init(tsd, NULL);
} else if (was_enabled && !enabled) {
tcache_cleanup(tsd);
}
/* Commit the state last. Above calls check current state. */
tsd_tcache_enabled_set(tsd, enabled);
tsd_slow_update(tsd);
}
static inline unsigned
tcache_nhbins_get(tcache_t *tcache) {
assert(tcache != NULL);
assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
return tcache->tcache_nhbins;
tcache_nbins_get(tcache_slow_t *tcache_slow) {
assert(tcache_slow != NULL);
unsigned nbins = tcache_slow->tcache_nbins;
assert(nbins <= TCACHE_NBINS_MAX);
return nbins;
}
static inline size_t
tcache_max_get(tcache_t *tcache) {
assert(tcache != NULL);
assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
return tcache->tcache_max;
tcache_max_get(tcache_slow_t *tcache_slow) {
assert(tcache_slow != NULL);
size_t tcache_max = sz_index2size(tcache_nbins_get(tcache_slow) - 1);
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
return tcache_max;
}
static inline void
tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
assert(tcache != NULL);
tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
assert(tcache_slow != NULL);
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
tcache->tcache_max = tcache_max;
tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
tcache_slow->tcache_nbins = sz_size2index(tcache_max) + 1;
}
static inline void
thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
assert(tcache_max == sz_s2u(tcache_max));
tcache_t *tcache = tsd_tcachep_get(tsd);
tcache_slow_t *tcache_slow;
assert(tcache != NULL);
bool enabled = tcache_available(tsd);
arena_t *assigned_arena;
if (enabled) {
tcache_slow = tcache_slow_get(tsd);
assert(tcache != NULL && tcache_slow != NULL);
assigned_arena = tcache_slow->arena;
/* Shutdown and reboot the tcache for a clean slate. */
tcache_cleanup(tsd);
tcache_bin_settings_backup(tcache_t *tcache,
cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
cache_bin_info_init(&tcache_bin_info[i],
tcache->bins[i].bin_info.ncached_max);
}
}
JEMALLOC_ALWAYS_INLINE bool
tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
tcache_slow_t *tcache_slow) {
assert(bin != NULL);
bool disabled = cache_bin_disabled(bin);
/*
* Still set tcache_max and tcache_nhbins of the tcache even if
* the tcache is not available yet because the values are
* stored in tsd_t and are always available for changing.
*/
tcache_max_and_nhbins_set(tcache, tcache_max);
if (enabled) {
tsd_tcache_data_init(tsd, assigned_arena);
* If a bin's ind >= nbins or ncached_max == 0, it must be disabled.
* However, when ind < nbins, it could be either enabled
* (ncached_max > 0) or disabled (ncached_max == 0). Similarly, when
* ncached_max > 0, it could be either enabled (ind < nbins) or
* disabled (ind >= nbins). Thus, if a bin is disabled, it has either
* ind >= nbins or ncached_max == 0. If a bin is enabled, it has
* ind < nbins and ncached_max > 0.
*/
unsigned nbins = tcache_nbins_get(tcache_slow);
cache_bin_sz_t ncached_max = bin->bin_info.ncached_max;
if (ind >= nbins) {
assert(disabled);
} else {
assert(!disabled || ncached_max == 0);
}
if (ncached_max == 0) {
assert(disabled);
} else {
assert(!disabled || ind >= nbins);
}
if (disabled) {
assert(ind >= nbins || ncached_max == 0);
} else {
assert(ind < nbins && ncached_max > 0);
}
assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
}
JEMALLOC_ALWAYS_INLINE bool
tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
assert(ind < SC_NBINS);
assert(bin != NULL);
bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
if (ret) {
/* small size class but cache bin disabled. */
assert((uintptr_t)(*bin->stack_head) ==
cache_bin_preceding_junk);
}
return ret;
}
JEMALLOC_ALWAYS_INLINE bool
tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
assert(ind >= SC_NBINS);
assert(bin != NULL);
return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
cache_bin_still_zero_initialized(bin));
return disabled;
}
JEMALLOC_ALWAYS_INLINE void *
@@ -124,7 +102,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
if (unlikely(arena == NULL)) {
return NULL;
}
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
if (unlikely(tcache_bin_disabled(binind, bin,
tcache->tcache_slow))) {
/* stats and zero are handled directly by the arena. */
return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
binind, zero, /* slab */ true);
@@ -157,8 +136,9 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
void *ret;
bool tcache_success;
assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
cache_bin_t *bin = &tcache->bins[binind];
assert(binind >= SC_NBINS &&
!tcache_bin_disabled(binind, bin, tcache->tcache_slow));
ret = cache_bin_alloc(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
@@ -180,7 +160,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
} else {
if (unlikely(zero)) {
size_t usize = sz_index2size(binind);
assert(usize <= tcache_max_get(tcache));
assert(usize <= tcache_max_get(tcache->tcache_slow));
memset(ret, 0, usize);
}
@@ -214,12 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
}
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
if (unlikely(tcache_bin_disabled(binind, bin,
tcache->tcache_slow))) {
arena_dalloc_small(tsd_tsdn(tsd), ptr);
return;
}
cache_bin_sz_t max = cache_bin_info_ncached_max(
&bin->bin_info);
cache_bin_sz_t max = cache_bin_info_ncached_max_get(
bin, &bin->bin_info);
unsigned remain = max >> opt_lg_tcache_flush_small_div;
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -232,12 +213,13 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
tcache_max_get(tcache->tcache_slow));
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
unsigned remain = cache_bin_info_ncached_max(
&bin->bin_info) >> opt_lg_tcache_flush_large_div;
unsigned remain = cache_bin_info_ncached_max_get(
bin, &bin->bin_info) >> opt_lg_tcache_flush_large_div;
tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret);

View File

@@ -31,6 +31,8 @@ struct tcache_slow_s {
/* The arena this tcache is associated with. */
arena_t *arena;
/* The number of bins activated in the tcache. */
unsigned tcache_nbins;
/* Next bin to GC. */
szind_t next_gc_bin;
/* For small bins, fill (ncached_max >> lg_fill_div). */
@@ -55,8 +57,6 @@ struct tcache_slow_s {
struct tcache_s {
tcache_slow_t *tcache_slow;
unsigned tcache_nhbins;
size_t tcache_max;
cache_bin_t bins[TCACHE_NBINS_MAX];
};