Enable per-tcache tcache_max
1. add tcache_max and nhbins into tcache_t so that they are per-tcache, with one auto tcache per thread, it's also per-thread; 2. add mallctl for each thread to set its own tcache_max (of its auto tcache); 3. store the maximum number of items in each bin instead of using a global storage; 4. add tests for the modifications above. 5. Rename `nhbins` and `tcache_maxclass` to `global_do_not_change_nhbins` and `global_do_not_change_tcache_maxclass`.
This commit is contained in:
@@ -198,11 +198,11 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
|
||||
assert(sz_can_use_slab(size));
|
||||
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
|
||||
tcache, size, ind, zero, slow_path);
|
||||
} else if (likely(size <= tcache_maxclass)) {
|
||||
} else if (likely(size <= tcache_max_get(tcache))) {
|
||||
return tcache_alloc_large(tsdn_tsd(tsdn), arena,
|
||||
tcache, size, ind, zero, slow_path);
|
||||
}
|
||||
/* (size > tcache_maxclass) case falls through. */
|
||||
/* (size > tcache_max) case falls through. */
|
||||
}
|
||||
|
||||
return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
|
||||
@@ -297,7 +297,8 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
|
||||
bool slow_path) {
|
||||
if (szind < nhbins) {
|
||||
assert (!tsdn_null(tsdn) && tcache != NULL);
|
||||
if (szind < tcache_nhbins_get(tcache)) {
|
||||
if (config_prof && unlikely(szind < SC_NBINS)) {
|
||||
arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
|
||||
} else {
|
||||
|
@@ -125,6 +125,9 @@ struct cache_bin_s {
|
||||
* array. Immutable after initialization.
|
||||
*/
|
||||
uint16_t low_bits_empty;
|
||||
|
||||
/* The maximum number of cached items in the bin. */
|
||||
cache_bin_info_t bin_info;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@@ -23,6 +23,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
|
||||
tcache_t *tcache = tcache_get(tsd);
|
||||
if (tcache != NULL) {
|
||||
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
|
||||
assert(tcache_slow->arena != NULL);
|
||||
tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
|
||||
tcache, newarena);
|
||||
}
|
||||
|
@@ -530,7 +530,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
||||
/*
|
||||
* Currently the fastpath only handles small sizes. The branch on
|
||||
* SC_LOOKUP_MAXCLASS makes sure of it. This lets us avoid checking
|
||||
* tcache szind upper limit (i.e. tcache_maxclass) as well.
|
||||
* tcache szind upper limit (i.e. tcache_max) as well.
|
||||
*/
|
||||
assert(alloc_ctx.slab);
|
||||
|
||||
|
@@ -21,14 +21,19 @@ extern unsigned opt_lg_tcache_flush_large_div;
|
||||
|
||||
/*
|
||||
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
|
||||
* large-object bins.
|
||||
* large-object bins. This is only used during threads initialization and
|
||||
* changing it will not reflect on initialized threads as expected. Thus,
|
||||
* it should not be changed on the fly. To change the number of tcache bins
|
||||
* in use, refer to tcache_nhbins of each tcache.
|
||||
*/
|
||||
extern unsigned nhbins;
|
||||
extern unsigned global_do_not_change_nhbins;
|
||||
|
||||
/* Maximum cached size class. */
|
||||
extern size_t tcache_maxclass;
|
||||
|
||||
extern cache_bin_info_t *tcache_bin_info;
|
||||
/*
|
||||
* Maximum cached size class. Same as above, this is only used during threads
|
||||
* initialization and should not be changed. To change the maximum cached size
|
||||
* class, refer to tcache_max of each tcache.
|
||||
*/
|
||||
extern size_t global_do_not_change_tcache_maxclass;
|
||||
|
||||
/*
|
||||
* Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
|
||||
@@ -65,7 +70,7 @@ void tcache_prefork(tsdn_t *tsdn);
|
||||
void tcache_postfork_parent(tsdn_t *tsdn);
|
||||
void tcache_postfork_child(tsdn_t *tsdn);
|
||||
void tcache_flush(tsd_t *tsd);
|
||||
bool tsd_tcache_data_init(tsd_t *tsd);
|
||||
bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
|
||||
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
|
||||
|
||||
void tcache_assert_initialized(tcache_t *tcache);
|
||||
|
@@ -23,7 +23,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
|
||||
bool was_enabled = tsd_tcache_enabled_get(tsd);
|
||||
|
||||
if (!was_enabled && enabled) {
|
||||
tsd_tcache_data_init(tsd);
|
||||
tsd_tcache_data_init(tsd, NULL);
|
||||
} else if (was_enabled && !enabled) {
|
||||
tcache_cleanup(tsd);
|
||||
}
|
||||
@@ -32,13 +32,67 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
|
||||
tsd_slow_update(tsd);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
tcache_nhbins_get(tcache_t *tcache) {
|
||||
assert(tcache != NULL);
|
||||
assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
|
||||
return tcache->tcache_nhbins;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
tcache_max_get(tcache_t *tcache) {
|
||||
assert(tcache != NULL);
|
||||
assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
|
||||
return tcache->tcache_max;
|
||||
}
|
||||
|
||||
static inline void
|
||||
tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
|
||||
assert(tcache != NULL);
|
||||
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
|
||||
tcache->tcache_max = tcache_max;
|
||||
tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
|
||||
assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
|
||||
assert(tcache_max == sz_s2u(tcache_max));
|
||||
tcache_t *tcache = tsd_tcachep_get(tsd);
|
||||
tcache_slow_t *tcache_slow;
|
||||
assert(tcache != NULL);
|
||||
|
||||
bool enabled = tcache_available(tsd);
|
||||
arena_t *assigned_arena;
|
||||
if (enabled) {
|
||||
tcache_slow = tcache_slow_get(tsd);
|
||||
assert(tcache != NULL && tcache_slow != NULL);
|
||||
assigned_arena = tcache_slow->arena;
|
||||
/* Shutdown and reboot the tcache for a clean slate. */
|
||||
tcache_cleanup(tsd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Still set tcache_max and tcache_nhbins of the tcache even if
|
||||
* the tcache is not available yet because the values are
|
||||
* stored in tsd_t and are always available for changing.
|
||||
*/
|
||||
tcache_max_and_nhbins_set(tcache, tcache_max);
|
||||
|
||||
if (enabled) {
|
||||
tsd_tcache_data_init(tsd, assigned_arena);
|
||||
}
|
||||
|
||||
assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
|
||||
assert(ind < SC_NBINS);
|
||||
bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
|
||||
if (ret && bin != NULL) {
|
||||
assert(bin != NULL);
|
||||
bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
|
||||
if (ret) {
|
||||
/* small size class but cache bin disabled. */
|
||||
assert(ind >= nhbins);
|
||||
assert((uintptr_t)(*bin->stack_head) ==
|
||||
cache_bin_preceding_junk);
|
||||
}
|
||||
@@ -46,6 +100,14 @@ tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
|
||||
assert(ind >= SC_NBINS);
|
||||
assert(bin != NULL);
|
||||
return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
|
||||
cache_bin_still_zero_initialized(bin));
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
size_t size, szind_t binind, bool zero, bool slow_path) {
|
||||
@@ -95,7 +157,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
||||
void *ret;
|
||||
bool tcache_success;
|
||||
|
||||
assert(binind >= SC_NBINS && binind < nhbins);
|
||||
assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
|
||||
cache_bin_t *bin = &tcache->bins[binind];
|
||||
ret = cache_bin_alloc(bin, &tcache_success);
|
||||
assert(tcache_success == (ret != NULL));
|
||||
@@ -118,7 +180,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
||||
} else {
|
||||
if (unlikely(zero)) {
|
||||
size_t usize = sz_index2size(binind);
|
||||
assert(usize <= tcache_maxclass);
|
||||
assert(usize <= tcache_max_get(tcache));
|
||||
memset(ret, 0, usize);
|
||||
}
|
||||
|
||||
@@ -157,7 +219,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
return;
|
||||
}
|
||||
cache_bin_sz_t max = cache_bin_info_ncached_max(
|
||||
&tcache_bin_info[binind]);
|
||||
&bin->bin_info);
|
||||
unsigned remain = max >> opt_lg_tcache_flush_small_div;
|
||||
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
|
||||
bool ret = cache_bin_dalloc_easy(bin, ptr);
|
||||
@@ -169,14 +231,13 @@ JEMALLOC_ALWAYS_INLINE void
|
||||
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
bool slow_path) {
|
||||
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
|
||||
> SC_SMALL_MAXCLASS);
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
|
||||
|
||||
cache_bin_t *bin = &tcache->bins[binind];
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
|
||||
unsigned remain = cache_bin_info_ncached_max(
|
||||
&tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
|
||||
&bin->bin_info) >> opt_lg_tcache_flush_large_div;
|
||||
tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
|
||||
bool ret = cache_bin_dalloc_easy(bin, ptr);
|
||||
assert(ret);
|
||||
|
@@ -55,6 +55,8 @@ struct tcache_slow_s {
|
||||
|
||||
struct tcache_s {
|
||||
tcache_slow_t *tcache_slow;
|
||||
unsigned tcache_nhbins;
|
||||
size_t tcache_max;
|
||||
cache_bin_t bins[TCACHE_NBINS_MAX];
|
||||
};
|
||||
|
||||
|
@@ -19,7 +19,7 @@ typedef struct tcaches_s tcaches_t;
|
||||
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
|
||||
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
|
||||
|
||||
#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
|
||||
#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
|
||||
#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
|
||||
#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP * \
|
||||
(TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
|
||||
|
Reference in New Issue
Block a user