Arena: share bin offsets in a global.
This saves us a cache miss when lookup up the arena bin offset in a remote arena during tcache flush. All arenas share the base offset, and so we don't need to look it up repeatedly for each arena. Secondarily, it shaves 288 bytes off the arena on, e.g., x86-64.
This commit is contained in:
parent
2fcbd18115
commit
3967329813
@ -23,6 +23,12 @@ extern emap_t arena_emap_global;
|
||||
extern size_t opt_oversize_threshold;
|
||||
extern size_t oversize_threshold;
|
||||
|
||||
/*
|
||||
* arena_bin_offsets[binind] is the offset of the first bin shard for size class
|
||||
* binind.
|
||||
*/
|
||||
extern uint32_t arena_bin_offsets[SC_NBINS];
|
||||
|
||||
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
|
||||
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
|
||||
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
|
||||
|
@ -534,4 +534,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||
}
|
||||
}
|
||||
|
||||
static inline bin_t *
|
||||
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
|
||||
bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
|
||||
return shard0 + binshard;
|
||||
}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
||||
|
@ -76,13 +76,6 @@ struct arena_s {
|
||||
/* The page-level allocator shard this arena uses. */
|
||||
pa_shard_t pa_shard;
|
||||
|
||||
/*
|
||||
* bins is used to store heaps of free regions.
|
||||
*
|
||||
* Synchronization: internal.
|
||||
*/
|
||||
bins_t bins[SC_NBINS];
|
||||
|
||||
/*
|
||||
* A cached copy of base->ind. This can get accessed on hot paths;
|
||||
* looking it up in base requires an extra pointer hop / cache miss.
|
||||
@ -97,6 +90,12 @@ struct arena_s {
|
||||
base_t *base;
|
||||
/* Used to determine uptime. Read-only after initialization. */
|
||||
nstime_t create_time;
|
||||
|
||||
/*
|
||||
* The arena is allocated alongside its bins; really this is a
|
||||
* dynamically sized array determined by the binshard settings.
|
||||
*/
|
||||
bin_t bins[0];
|
||||
};
|
||||
|
||||
/* Used in conjunction with tsd for fast arena-related context lookup. */
|
||||
|
64
src/arena.c
64
src/arena.c
@ -48,6 +48,10 @@ div_info_t arena_binind_div_info[SC_NBINS];
|
||||
|
||||
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||
|
||||
uint32_t arena_bin_offsets[SC_NBINS];
|
||||
static unsigned nbins_total;
|
||||
|
||||
static unsigned huge_arena_ind;
|
||||
|
||||
/******************************************************************************/
|
||||
@ -179,7 +183,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_stats_merge(tsdn, &bstats[i],
|
||||
&arena->bins[i].bin_shards[j]);
|
||||
arena_get_bin(arena, i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -595,8 +599,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
|
||||
/* Bins. */
|
||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
arena_bin_reset(tsd, arena,
|
||||
&arena->bins[i].bin_shards[j]);
|
||||
arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
|
||||
}
|
||||
}
|
||||
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
|
||||
@ -721,7 +724,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
|
||||
if (binshard_p != NULL) {
|
||||
*binshard_p = binshard;
|
||||
}
|
||||
return &arena->bins[binind].bin_shards[binshard];
|
||||
return arena_get_bin(arena, binind, binshard);
|
||||
}
|
||||
|
||||
void
|
||||
@ -1168,7 +1171,7 @@ static void
|
||||
arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
|
||||
szind_t binind = edata_szind_get(edata);
|
||||
unsigned binshard = edata_binshard_get(edata);
|
||||
bin_t *bin = &arena->bins[binind].bin_shards[binshard];
|
||||
bin_t *bin = arena_get_bin(arena, binind, binshard);
|
||||
|
||||
malloc_mutex_lock(tsdn, &bin->lock);
|
||||
arena_dalloc_bin_locked_info_t info;
|
||||
@ -1411,10 +1414,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
|
||||
}
|
||||
}
|
||||
|
||||
unsigned nbins_total = 0;
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
nbins_total += bin_infos[i].n_shards;
|
||||
}
|
||||
size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
|
||||
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
|
||||
if (arena == NULL) {
|
||||
@ -1457,20 +1456,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
|
||||
}
|
||||
|
||||
/* Initialize bins. */
|
||||
uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
|
||||
atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
unsigned nshards = bin_infos[i].n_shards;
|
||||
arena->bins[i].bin_shards = (bin_t *)bin_addr;
|
||||
bin_addr += nshards * sizeof(bin_t);
|
||||
for (unsigned j = 0; j < nshards; j++) {
|
||||
bool err = bin_init(&arena->bins[i].bin_shards[j]);
|
||||
if (err) {
|
||||
goto label_error;
|
||||
}
|
||||
for (i = 0; i < nbins_total; i++) {
|
||||
bool err = bin_init(&arena->bins[i]);
|
||||
if (err) {
|
||||
goto label_error;
|
||||
}
|
||||
}
|
||||
assert(bin_addr == (uintptr_t)arena + arena_size);
|
||||
|
||||
arena->base = base;
|
||||
/* Set arena before creating background threads. */
|
||||
@ -1587,6 +1579,13 @@ arena_boot(sc_data_t *sc_data) {
|
||||
div_init(&arena_binind_div_info[i],
|
||||
(1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
|
||||
}
|
||||
|
||||
uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
arena_bin_offsets[i] = cur_offset;
|
||||
nbins_total += bin_infos[i].n_shards;
|
||||
cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -1633,23 +1632,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
|
||||
|
||||
void
|
||||
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
|
||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
|
||||
}
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
bin_prefork(tsdn, &arena->bins[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_postfork_parent(tsdn,
|
||||
&arena->bins[i].bin_shards[j]);
|
||||
}
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
bin_postfork_parent(tsdn, &arena->bins[i]);
|
||||
}
|
||||
|
||||
malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
|
||||
base_postfork_parent(tsdn, arena->base);
|
||||
pa_shard_postfork_parent(tsdn, &arena->pa_shard);
|
||||
@ -1660,8 +1653,6 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
|
||||
|
||||
void
|
||||
arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
|
||||
unsigned i;
|
||||
|
||||
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
|
||||
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
|
||||
if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
|
||||
@ -1686,11 +1677,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]);
|
||||
}
|
||||
for (unsigned i = 0; i < nbins_total; i++) {
|
||||
bin_postfork_child(tsdn, &arena->bins[i]);
|
||||
}
|
||||
|
||||
malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
|
||||
base_postfork_child(tsdn, arena->base);
|
||||
pa_shard_postfork_child(tsdn, &arena->pa_shard);
|
||||
|
@ -3423,7 +3423,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
|
||||
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_t *bin = &arena->bins[i].bin_shards[j];
|
||||
bin_t *bin = arena_get_bin(arena, i, j);
|
||||
MUTEX_PROF_RESET(bin->lock);
|
||||
}
|
||||
}
|
||||
|
@ -52,11 +52,11 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
|
||||
assert(*nfree <= *nregs);
|
||||
assert(*nfree * edata_usize_get(edata) <= *size);
|
||||
|
||||
const arena_t *arena = (arena_t *)atomic_load_p(
|
||||
arena_t *arena = (arena_t *)atomic_load_p(
|
||||
&arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
|
||||
assert(arena != NULL);
|
||||
const unsigned binshard = edata_binshard_get(edata);
|
||||
bin_t *bin = &arena->bins[szind].bin_shards[binshard];
|
||||
bin_t *bin = arena_get_bin(arena, szind, binshard);
|
||||
|
||||
malloc_mutex_lock(tsdn, &bin->lock);
|
||||
if (config_stats) {
|
||||
|
@ -344,8 +344,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
||||
bin_t *cur_bin = NULL;
|
||||
if (small) {
|
||||
cur_binshard = edata_binshard_get(edata);
|
||||
cur_bin = &cur_arena->bins[binind].bin_shards[
|
||||
cur_binshard];
|
||||
cur_bin = arena_get_bin(cur_arena, binind,
|
||||
cur_binshard);
|
||||
assert(cur_binshard < bin_infos[binind].n_shards);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user