Tcache: treat small and large cache bins uniformly

This commit is contained in:
David Goldblatt 2020-04-07 20:04:46 -07:00 committed by David Goldblatt
parent a13fbad374
commit cd29ebefd0
8 changed files with 87 additions and 145 deletions

View File

@ -106,16 +106,14 @@ struct cache_bin_array_descriptor_s {
*/
ql_elm(cache_bin_array_descriptor_t) link;
/* Pointers to the tcache bins. */
cache_bin_t *bins_small;
cache_bin_t *bins_large;
cache_bin_t *bins;
};
static inline void
cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
cache_bin_t *bins_small, cache_bin_t *bins_large) {
cache_bin_t *bins) {
ql_elm_new(descriptor, link);
descriptor->bins_small = bins_small;
descriptor->bins_large = bins_large;
descriptor->bins = bins;
}
/* Returns ncached_max: Upper limit on ncached. */

View File

@ -108,18 +108,6 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
return &tdata->decay_ticker;
}
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind < SC_NBINS);
return &tcache->bins_small[binind];
}
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind >= SC_NBINS &&binind < nhbins);
return &tcache->bins_large[binind - SC_NBINS];
}
JEMALLOC_ALWAYS_INLINE bool
tcache_available(tsd_t *tsd) {
/*
@ -129,9 +117,9 @@ tcache_available(tsd_t *tsd) {
*/
if (likely(tsd_tcache_enabled_get(tsd))) {
/* Associated arena == NULL implies tcache init in progress. */
assert(tsd_tcache_slowp_get(tsd)->arena == NULL ||
!cache_bin_still_zero_initialized(
tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
if (config_debug && tsd_tcache_slowp_get(tsd)->arena != NULL) {
tcache_assert_initialized(tsd_tcachep_get(tsd));
}
return true;
}

View File

@ -53,4 +53,6 @@ void tcache_flush(tsd_t *tsd);
bool tsd_tcache_data_init(tsd_t *tsd);
bool tsd_tcache_enabled_data_init(tsd_t *tsd);
void tcache_assert_initialized(tcache_t *tcache);
#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */

View File

@ -30,12 +30,11 @@ JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
size_t size, szind_t binind, bool zero, bool slow_path) {
void *ret;
cache_bin_t *bin;
bool tcache_success;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
assert(binind < SC_NBINS);
bin = tcache_small_bin_get(tcache, binind);
cache_bin_t *bin = &tcache->bins[binind];
ret = cache_bin_alloc(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
@ -74,11 +73,10 @@ JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
cache_bin_t *bin;
bool tcache_success;
assert(binind >= SC_NBINS &&binind < nhbins);
bin = tcache_large_bin_get(tcache, binind);
assert(binind >= SC_NBINS && binind < nhbins);
cache_bin_t *bin = &tcache->bins[binind];
ret = cache_bin_alloc(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
@ -120,12 +118,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
cache_bin_t *bin;
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
<= SC_SMALL_MAXCLASS);
bin = tcache_small_bin_get(tcache, binind);
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
unsigned remain = cache_bin_info_ncached_max(
&tcache_bin_info[binind]) >> 1;
@ -138,13 +134,12 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
cache_bin_t *bin;
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
> SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
bin = tcache_large_bin_get(tcache, binind);
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
unsigned remain = cache_bin_info_ncached_max(
&tcache_bin_info[binind]) >> 1;

View File

@ -49,19 +49,7 @@ struct tcache_slow_s {
struct tcache_s {
tcache_slow_t *tcache_slow;
/*
* The pointer stacks associated with bins follow as a contiguous array.
* During tcache initialization, the avail pointer in each element of
* tbins is initialized to point to the proper offset within this array.
*/
cache_bin_t bins_small[SC_NBINS];
/*
* We put the cache bins for large size classes at the end of the
* struct, since some of them might not get used. This might end up
* letting us avoid touching an extra page if we don't have to.
*/
cache_bin_t bins_large[SC_NSIZES-SC_NBINS];
cache_bin_t bins[SC_NSIZES];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */

View File

@ -148,18 +148,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
cache_bin_array_descriptor_t *descriptor;
ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
for (szind_t i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_small[i];
for (szind_t i = 0; i < nhbins; i++) {
cache_bin_t *cache_bin = &descriptor->bins[i];
astats->tcache_bytes +=
cache_bin_ncached_get(tbin,
&tcache_bin_info[i]) * sz_index2size(i);
}
for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_large[i];
astats->tcache_bytes +=
cache_bin_ncached_get(tbin,
&tcache_bin_info[i + SC_NBINS])
* sz_index2size(i + SC_NBINS);
cache_bin_ncached_get(cache_bin,
&tcache_bin_info[i]) * sz_index2size(i);
}
}
malloc_mutex_prof_read(tsdn,
@ -1697,7 +1690,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
cache_bin_array_descriptor_init(
&tcache_slow->cache_bin_array_descriptor,
tcache->bins_small, tcache->bins_large);
tcache->bins);
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
&tcache_slow->cache_bin_array_descriptor, link);
}

View File

@ -2495,7 +2495,7 @@ je_malloc(size_t size) {
assert(tsd_fast(tsd));
tcache_t *tcache = tsd_tcachep_get(tsd);
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
cache_bin_t *bin = &tcache->bins[ind];
bool tcache_success;
void *ret;
@ -2828,7 +2828,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
}
tcache_t *tcache = tsd_tcachep_get(tsd);
cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
/*
* If junking were enabled, this is where we would do it. It's not

View File

@ -43,19 +43,12 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
void
tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
szind_t binind = tcache_slow->next_gc_bin;
cache_bin_t *tbin;
bool is_small;
if (binind < SC_NBINS) {
tbin = tcache_small_bin_get(tcache, binind);
is_small = true;
} else {
tbin = tcache_large_bin_get(tcache, binind);
is_small = false;
}
bool is_small = (binind < SC_NBINS);
cache_bin_t *cache_bin = &tcache->bins[binind];
cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
&tcache_bin_info[binind]);
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
&tcache_bin_info[binind]);
if (low_water > 0) {
/*
@ -63,7 +56,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
*/
if (is_small) {
assert(!tcache_slow->bin_refilled[binind]);
tcache_bin_flush_small(tsd, tcache, tbin, binind,
tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
ncached - low_water + (low_water >> 2));
/*
* Reduce fill count by 2X. Limit lg_fill_div such that
@ -75,7 +68,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
tcache_slow->lg_fill_div[binind]++;
}
} else {
tcache_bin_flush_large(tsd, tcache, tbin, binind,
tcache_bin_flush_large(tsd, tcache, cache_bin, binind,
ncached - low_water + (low_water >> 2));
}
} else if (is_small && tcache_slow->bin_refilled[binind]) {
@ -89,7 +82,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
}
tcache_slow->bin_refilled[binind] = false;
}
cache_bin_low_water_set(tbin);
cache_bin_low_water_set(cache_bin);
tcache_slow->next_gc_bin++;
if (tcache_slow->next_gc_bin == nhbins) {
@ -99,7 +92,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
void *
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
bool *tcache_success) {
tcache_slow_t *tcache_slow = tcache->tcache_slow;
void *ret;
@ -107,10 +100,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
assert(tcache_slow->arena != NULL);
unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
>> tcache_slow->lg_fill_div[binind];
arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
binind, nfill);
arena_cache_bin_fill_small(tsdn, arena, cache_bin,
&tcache_bin_info[binind], binind, nfill);
tcache_slow->bin_refilled[binind] = true;
ret = cache_bin_alloc(tbin, tcache_success);
ret = cache_bin_alloc(cache_bin, tcache_success);
return ret;
}
@ -154,7 +147,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
}
JEMALLOC_ALWAYS_INLINE void
tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
szind_t binind, unsigned rem, bool small) {
tcache_slow_t *tcache_slow = tcache->tcache_slow;
/*
@ -168,7 +161,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
} else {
assert(binind < nhbins);
}
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
&tcache_bin_info[binind]);
assert((cache_bin_sz_t)rem <= ncached);
arena_t *tcache_arena = tcache_slow->arena;
@ -182,7 +175,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
&ptrs, nflush);
/* Look up edata once per item. */
@ -249,13 +242,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
if (small) {
cur_bin->stats.nflushes++;
cur_bin->stats.nrequests +=
tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
cache_bin->tstats.nrequests;
cache_bin->tstats.nrequests = 0;
} else {
arena_stats_large_flush_nrequests_add(tsdn,
&tcache_arena->stats, binind,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
cache_bin->tstats.nrequests);
cache_bin->tstats.nrequests = 0;
}
}
@ -336,31 +329,31 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
binind, &binshard);
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
bin->stats.nrequests += cache_bin->tstats.nrequests;
cache_bin->tstats.nrequests = 0;
malloc_mutex_unlock(tsdn, &bin->lock);
} else {
arena_stats_large_flush_nrequests_add(tsdn,
&tcache_arena->stats, binind,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
cache_bin->tstats.nrequests);
cache_bin->tstats.nrequests = 0;
}
}
cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
ncached - rem);
}
void
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
szind_t binind, unsigned rem) {
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
}
void
tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
szind_t binind, unsigned rem) {
tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
}
void
@ -376,8 +369,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
ql_elm_new(tcache_slow, link);
ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
cache_bin_array_descriptor_init(
&tcache_slow->cache_bin_array_descriptor,
tcache->bins_small, tcache->bins_large);
&tcache_slow->cache_bin_array_descriptor, tcache->bins);
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
&tcache_slow->cache_bin_array_descriptor, link);
@ -446,23 +438,18 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
tcache_slow->dyn_alloc = mem;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
unsigned i = 0;
size_t cur_offset = 0;
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
&cur_offset);
for (; i < SC_NBINS; i++) {
tcache_slow->lg_fill_div[i] = 1;
tcache_slow->bin_refilled[i] = false;
cache_bin_t *bin = tcache_small_bin_get(tcache, i);
cache_bin_init(bin, &tcache_bin_info[i], mem,
&cur_offset);
}
for (; i < nhbins; i++) {
cache_bin_t *bin = tcache_large_bin_get(tcache, i);
cache_bin_init(bin, &tcache_bin_info[i], mem,
for (unsigned i = 0; i < nhbins; i++) {
if (i < SC_NBINS) {
tcache_slow->lg_fill_div[i] = 1;
tcache_slow->bin_refilled[i] = false;
}
cache_bin_t *cache_bin = &tcache->bins[i];
cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
&cur_offset);
}
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
@ -477,8 +464,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
assert(cache_bin_still_zero_initialized(
tcache_small_bin_get(tcache, 0)));
assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
size_t alignment = tcache_bin_alloc_alignment;
size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
@ -552,20 +538,15 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
tcache_slow_t *tcache_slow = tcache->tcache_slow;
assert(tcache_slow->arena != NULL);
for (unsigned i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
if (config_stats) {
assert(tbin->tstats.nrequests == 0);
for (unsigned i = 0; i < nhbins; i++) {
cache_bin_t *cache_bin = &tcache->bins[i];
if (i < SC_NBINS) {
tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
} else {
tcache_bin_flush_large(tsd, tcache, cache_bin, i, 0);
}
}
for (unsigned i = SC_NBINS; i < nhbins; i++) {
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
if (config_stats) {
assert(tbin->tstats.nrequests == 0);
assert(cache_bin->tstats.nrequests == 0);
}
}
}
@ -584,8 +565,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
if (tsd_tcache) {
cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
cache_bin_assert_empty(bin, &tcache_bin_info[0]);
cache_bin_t *cache_bin = &tcache->bins[0];
cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
}
idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
true);
@ -614,13 +595,11 @@ tcache_cleanup(tsd_t *tsd) {
tcache_t *tcache = tsd_tcachep_get(tsd);
if (!tcache_available(tsd)) {
assert(tsd_tcache_enabled_get(tsd) == false);
assert(cache_bin_still_zero_initialized(
tcache_small_bin_get(tcache, 0)));
assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
return;
}
assert(tsd_tcache_enabled_get(tsd));
assert(!cache_bin_still_zero_initialized(
tcache_small_bin_get(tcache, 0)));
assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
tcache_destroy(tsd, tcache, true);
if (config_debug) {
@ -628,33 +607,28 @@ tcache_cleanup(tsd_t *tsd) {
* For debug testing only, we want to pretend we're still in the
* zero-initialized state.
*/
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
memset(tcache->bins_large, 0,
sizeof(cache_bin_t) * (nhbins - SC_NBINS));
memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
}
}
void
tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
unsigned i;
cassert(config_stats);
/* Merge and reset tcache stats. */
for (i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
unsigned binshard;
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
bin->stats.nrequests += tbin->tstats.nrequests;
malloc_mutex_unlock(tsdn, &bin->lock);
tbin->tstats.nrequests = 0;
}
for (; i < nhbins; i++) {
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
for (unsigned i = 0; i < nhbins; i++) {
cache_bin_t *cache_bin = &tcache->bins[i];
if (i < SC_NBINS) {
unsigned binshard;
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i,
&binshard);
bin->stats.nrequests += cache_bin->tstats.nrequests;
malloc_mutex_unlock(tsdn, &bin->lock);
} else {
arena_stats_large_flush_nrequests_add(tsdn,
&arena->stats, i, cache_bin->tstats.nrequests);
}
cache_bin->tstats.nrequests = 0;
}
}
@ -824,3 +798,7 @@ void
tcache_postfork_child(tsdn_t *tsdn) {
malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
}
void tcache_assert_initialized(tcache_t *tcache) {
assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
}