Arena: share bin offsets in a global.

This saves us a cache miss when lookup up the arena bin offset in a remote
arena during tcache flush.  All arenas share the base offset, and so we don't
need to look it up repeatedly for each arena.  Secondarily, it shaves 288 bytes
off the arena on, e.g., x86-64.
This commit is contained in:
David Goldblatt 2021-01-30 15:35:33 -08:00 committed by David Goldblatt
parent 2fcbd18115
commit 3967329813
7 changed files with 50 additions and 49 deletions

View File

@ -23,6 +23,12 @@ extern emap_t arena_emap_global;
extern size_t opt_oversize_threshold;
extern size_t oversize_threshold;
/*
* arena_bin_offsets[binind] is the offset of the first bin shard for size class
* binind.
*/
extern uint32_t arena_bin_offsets[SC_NBINS];
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);

View File

@ -534,4 +534,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
}
}
static inline bin_t *
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
return shard0 + binshard;
}
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */

View File

@ -76,13 +76,6 @@ struct arena_s {
/* The page-level allocator shard this arena uses. */
pa_shard_t pa_shard;
/*
* bins is used to store heaps of free regions.
*
* Synchronization: internal.
*/
bins_t bins[SC_NBINS];
/*
* A cached copy of base->ind. This can get accessed on hot paths;
* looking it up in base requires an extra pointer hop / cache miss.
@ -97,6 +90,12 @@ struct arena_s {
base_t *base;
/* Used to determine uptime. Read-only after initialization. */
nstime_t create_time;
/*
* The arena is allocated alongside its bins; really this is a
* dynamically sized array determined by the binshard settings.
*/
bin_t bins[0];
};
/* Used in conjunction with tsd for fast arena-related context lookup. */

View File

@ -48,6 +48,10 @@ div_info_t arena_binind_div_info[SC_NBINS];
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
uint32_t arena_bin_offsets[SC_NBINS];
static unsigned nbins_total;
static unsigned huge_arena_ind;
/******************************************************************************/
@ -179,7 +183,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
for (szind_t i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_stats_merge(tsdn, &bstats[i],
&arena->bins[i].bin_shards[j]);
arena_get_bin(arena, i, j));
}
}
}
@ -595,8 +599,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
/* Bins. */
for (unsigned i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
arena_bin_reset(tsd, arena,
&arena->bins[i].bin_shards[j]);
arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
}
}
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@ -721,7 +724,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
if (binshard_p != NULL) {
*binshard_p = binshard;
}
return &arena->bins[binind].bin_shards[binshard];
return arena_get_bin(arena, binind, binshard);
}
void
@ -1168,7 +1171,7 @@ static void
arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
szind_t binind = edata_szind_get(edata);
unsigned binshard = edata_binshard_get(edata);
bin_t *bin = &arena->bins[binind].bin_shards[binshard];
bin_t *bin = arena_get_bin(arena, binind, binshard);
malloc_mutex_lock(tsdn, &bin->lock);
arena_dalloc_bin_locked_info_t info;
@ -1411,10 +1414,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
}
unsigned nbins_total = 0;
for (i = 0; i < SC_NBINS; i++) {
nbins_total += bin_infos[i].n_shards;
}
size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
if (arena == NULL) {
@ -1457,20 +1456,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
/* Initialize bins. */
uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
for (i = 0; i < SC_NBINS; i++) {
unsigned nshards = bin_infos[i].n_shards;
arena->bins[i].bin_shards = (bin_t *)bin_addr;
bin_addr += nshards * sizeof(bin_t);
for (unsigned j = 0; j < nshards; j++) {
bool err = bin_init(&arena->bins[i].bin_shards[j]);
for (i = 0; i < nbins_total; i++) {
bool err = bin_init(&arena->bins[i]);
if (err) {
goto label_error;
}
}
}
assert(bin_addr == (uintptr_t)arena + arena_size);
arena->base = base;
/* Set arena before creating background threads. */
@ -1587,6 +1579,13 @@ arena_boot(sc_data_t *sc_data) {
div_init(&arena_binind_div_info[i],
(1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
}
uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
for (szind_t i = 0; i < SC_NBINS; i++) {
arena_bin_offsets[i] = cur_offset;
nbins_total += bin_infos[i].n_shards;
cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
}
}
void
@ -1633,23 +1632,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
void
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
for (unsigned i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
}
for (unsigned i = 0; i < nbins_total; i++) {
bin_prefork(tsdn, &arena->bins[i]);
}
}
void
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
unsigned i;
for (unsigned i = 0; i < nbins_total; i++) {
bin_postfork_parent(tsdn, &arena->bins[i]);
}
for (i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_postfork_parent(tsdn,
&arena->bins[i].bin_shards[j]);
}
}
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
base_postfork_parent(tsdn, arena->base);
pa_shard_postfork_parent(tsdn, &arena->pa_shard);
@ -1660,8 +1653,6 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
void
arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
unsigned i;
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
@ -1686,11 +1677,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
}
}
for (i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]);
}
for (unsigned i = 0; i < nbins_total; i++) {
bin_postfork_child(tsdn, &arena->bins[i]);
}
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
base_postfork_child(tsdn, arena->base);
pa_shard_postfork_child(tsdn, &arena->pa_shard);

View File

@ -3423,7 +3423,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
for (szind_t i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_t *bin = &arena->bins[i].bin_shards[j];
bin_t *bin = arena_get_bin(arena, i, j);
MUTEX_PROF_RESET(bin->lock);
}
}

View File

@ -52,11 +52,11 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
assert(*nfree <= *nregs);
assert(*nfree * edata_usize_get(edata) <= *size);
const arena_t *arena = (arena_t *)atomic_load_p(
arena_t *arena = (arena_t *)atomic_load_p(
&arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
assert(arena != NULL);
const unsigned binshard = edata_binshard_get(edata);
bin_t *bin = &arena->bins[szind].bin_shards[binshard];
bin_t *bin = arena_get_bin(arena, szind, binshard);
malloc_mutex_lock(tsdn, &bin->lock);
if (config_stats) {

View File

@ -344,8 +344,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
bin_t *cur_bin = NULL;
if (small) {
cur_binshard = edata_binshard_get(edata);
cur_bin = &cur_arena->bins[binind].bin_shards[
cur_binshard];
cur_bin = arena_get_bin(cur_arena, binind,
cur_binshard);
assert(cur_binshard < bin_infos[binind].n_shards);
}