Enable per-tcache tcache_max

1. add tcache_max and nhbins into tcache_t so that they are per-tcache,
   with one auto tcache per thread, it's also per-thread;
2. add mallctl for each thread to set its own tcache_max (of its auto tcache);
3. store the maximum number of items in each bin instead of using a global storage;
4. add tests for the modifications above.
5. Rename `nhbins` and `tcache_maxclass` to `global_do_not_change_nhbins` and `global_do_not_change_tcache_maxclass`.
This commit is contained in:
guangli-dai
2023-08-06 11:38:30 -07:00
committed by Qi Wang
parent fbca96c433
commit a442d9b895
15 changed files with 528 additions and 222 deletions

View File

@@ -198,11 +198,11 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
assert(sz_can_use_slab(size));
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
} else if (likely(size <= tcache_maxclass)) {
} else if (likely(size <= tcache_max_get(tcache))) {
return tcache_alloc_large(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
}
/* (size > tcache_maxclass) case falls through. */
/* (size > tcache_max) case falls through. */
}
return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
@@ -297,7 +297,8 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
bool slow_path) {
if (szind < nhbins) {
assert (!tsdn_null(tsdn) && tcache != NULL);
if (szind < tcache_nhbins_get(tcache)) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
} else {

View File

@@ -125,6 +125,9 @@ struct cache_bin_s {
* array. Immutable after initialization.
*/
uint16_t low_bits_empty;
/* The maximum number of cached items in the bin. */
cache_bin_info_t bin_info;
};
/*

View File

@@ -23,6 +23,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
tcache_t *tcache = tcache_get(tsd);
if (tcache != NULL) {
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
assert(tcache_slow->arena != NULL);
tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
tcache, newarena);
}

View File

@@ -530,7 +530,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
/*
* Currently the fastpath only handles small sizes. The branch on
* SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
* tcache szind upper limit (i.e. tcache_maxclass) as well.
* tcache szind upper limit (i.e. tcache_max) as well.
*/
assert(alloc_ctx.slab);

View File

@@ -21,14 +21,19 @@ extern unsigned opt_lg_tcache_flush_large_div;
/*
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins.
* large-object bins. This is only used during threads initialization and
* changing it will not reflect on initialized threads as expected. Thus,
* it should not be changed on the fly. To change the number of tcache bins
* in use, refer to tcache_nhbins of each tcache.
*/
extern unsigned nhbins;
extern unsigned global_do_not_change_nhbins;
/* Maximum cached size class. */
extern size_t tcache_maxclass;
extern cache_bin_info_t *tcache_bin_info;
/*
* Maximum cached size class. Same as above, this is only used during threads
* initialization and should not be changed. To change the maximum cached size
* class, refer to tcache_max of each tcache.
*/
extern size_t global_do_not_change_tcache_maxclass;
/*
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -65,7 +70,7 @@ void tcache_prefork(tsdn_t *tsdn);
void tcache_postfork_parent(tsdn_t *tsdn);
void tcache_postfork_child(tsdn_t *tsdn);
void tcache_flush(tsd_t *tsd);
bool tsd_tcache_data_init(tsd_t *tsd);
bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
void tcache_assert_initialized(tcache_t *tcache);

View File

@@ -23,7 +23,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
bool was_enabled = tsd_tcache_enabled_get(tsd);
if (!was_enabled && enabled) {
tsd_tcache_data_init(tsd);
tsd_tcache_data_init(tsd, NULL);
} else if (was_enabled && !enabled) {
tcache_cleanup(tsd);
}
@@ -32,13 +32,67 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
tsd_slow_update(tsd);
}
static inline unsigned
tcache_nhbins_get(tcache_t *tcache) {
assert(tcache != NULL);
assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
return tcache->tcache_nhbins;
}
static inline size_t
tcache_max_get(tcache_t *tcache) {
assert(tcache != NULL);
assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
return tcache->tcache_max;
}
static inline void
tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
assert(tcache != NULL);
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
tcache->tcache_max = tcache_max;
tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
}
static inline void
thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
assert(tcache_max == sz_s2u(tcache_max));
tcache_t *tcache = tsd_tcachep_get(tsd);
tcache_slow_t *tcache_slow;
assert(tcache != NULL);
bool enabled = tcache_available(tsd);
arena_t *assigned_arena;
if (enabled) {
tcache_slow = tcache_slow_get(tsd);
assert(tcache != NULL && tcache_slow != NULL);
assigned_arena = tcache_slow->arena;
/* Shutdown and reboot the tcache for a clean slate. */
tcache_cleanup(tsd);
}
/*
* Still set tcache_max and tcache_nhbins of the tcache even if
* the tcache is not available yet because the values are
* stored in tsd_t and are always available for changing.
*/
tcache_max_and_nhbins_set(tcache, tcache_max);
if (enabled) {
tsd_tcache_data_init(tsd, assigned_arena);
}
assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
}
JEMALLOC_ALWAYS_INLINE bool
tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
assert(ind < SC_NBINS);
bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
if (ret && bin != NULL) {
assert(bin != NULL);
bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
if (ret) {
/* small size class but cache bin disabled. */
assert(ind >= nhbins);
assert((uintptr_t)(*bin->stack_head) ==
cache_bin_preceding_junk);
}
@@ -46,6 +100,14 @@ tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
return ret;
}
JEMALLOC_ALWAYS_INLINE bool
tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
assert(ind >= SC_NBINS);
assert(bin != NULL);
return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
cache_bin_still_zero_initialized(bin));
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -95,7 +157,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
void *ret;
bool tcache_success;
assert(binind >= SC_NBINS && binind < nhbins);
assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
cache_bin_t *bin = &tcache->bins[binind];
ret = cache_bin_alloc(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
@@ -118,7 +180,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
} else {
if (unlikely(zero)) {
size_t usize = sz_index2size(binind);
assert(usize <= tcache_maxclass);
assert(usize <= tcache_max_get(tcache));
memset(ret, 0, usize);
}
@@ -157,7 +219,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
return;
}
cache_bin_sz_t max = cache_bin_info_ncached_max(
&tcache_bin_info[binind]);
&bin->bin_info);
unsigned remain = max >> opt_lg_tcache_flush_small_div;
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -169,14 +231,13 @@ JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
> SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
unsigned remain = cache_bin_info_ncached_max(
&tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
&bin->bin_info) >> opt_lg_tcache_flush_large_div;
tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret);

View File

@@ -55,6 +55,8 @@ struct tcache_slow_s {
struct tcache_s {
tcache_slow_t *tcache_slow;
unsigned tcache_nhbins;
size_t tcache_max;
cache_bin_t bins[TCACHE_NBINS_MAX];
};

View File

@@ -19,7 +19,7 @@ typedef struct tcaches_s tcaches_t;
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP * \
(TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)