Arena: Make more derived stats non-atomic/locked.

This commit is contained in:
David Goldblatt 2020-03-12 08:34:47 -07:00 committed by David Goldblatt
parent d0c43217b5
commit 565045ef71
3 changed files with 36 additions and 54 deletions

View File

@ -56,12 +56,12 @@ struct arena_stats_s {
atomic_zu_t internal; atomic_zu_t internal;
atomic_zu_t allocated_large; /* Derived. */ size_t allocated_large; /* Derived. */
locked_u64_t nmalloc_large; /* Derived. */ uint64_t nmalloc_large; /* Derived. */
locked_u64_t ndalloc_large; /* Derived. */ uint64_t ndalloc_large; /* Derived. */
locked_u64_t nfills_large; /* Derived. */ uint64_t nfills_large; /* Derived. */
locked_u64_t nflushes_large; /* Derived. */ uint64_t nflushes_large; /* Derived. */
locked_u64_t nrequests_large; /* Derived. */ uint64_t nrequests_large; /* Derived. */
/* /*
* The stats logically owned by the pa_shard in the same arena. This * The stats logically owned by the pa_shard in the same arena. This
@ -71,7 +71,7 @@ struct arena_stats_s {
pa_shard_stats_t pa_shard_stats; pa_shard_stats_t pa_shard_stats;
/* Number of bytes cached in tcache associated with this arena. */ /* Number of bytes cached in tcache associated with this arena. */
atomic_zu_t tcache_bytes; /* Derived. */ size_t tcache_bytes; /* Derived. */
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];

View File

@ -150,42 +150,37 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
LOCKEDINT_MTX(arena->stats.mtx), LOCKEDINT_MTX(arena->stats.mtx),
&arena->stats.lstats[i].nmalloc); &arena->stats.lstats[i].nmalloc);
locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc); locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
locked_inc_u64_unsynchronized(&astats->nmalloc_large, astats->nmalloc_large += nmalloc;
nmalloc);
uint64_t ndalloc = locked_read_u64(tsdn, uint64_t ndalloc = locked_read_u64(tsdn,
LOCKEDINT_MTX(arena->stats.mtx), LOCKEDINT_MTX(arena->stats.mtx),
&arena->stats.lstats[i].ndalloc); &arena->stats.lstats[i].ndalloc);
locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc); locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
locked_inc_u64_unsynchronized(&astats->ndalloc_large, astats->ndalloc_large += ndalloc;
ndalloc);
uint64_t nrequests = locked_read_u64(tsdn, uint64_t nrequests = locked_read_u64(tsdn,
LOCKEDINT_MTX(arena->stats.mtx), LOCKEDINT_MTX(arena->stats.mtx),
&arena->stats.lstats[i].nrequests); &arena->stats.lstats[i].nrequests);
locked_inc_u64_unsynchronized(&lstats[i].nrequests, locked_inc_u64_unsynchronized(&lstats[i].nrequests,
nmalloc + nrequests); nmalloc + nrequests);
locked_inc_u64_unsynchronized(&astats->nrequests_large, astats->nrequests_large += nmalloc + nrequests;
nmalloc + nrequests);
/* nfill == nmalloc for large currently. */ /* nfill == nmalloc for large currently. */
locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc); locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
locked_inc_u64_unsynchronized(&astats->nfills_large, astats->nfills_large += nmalloc;
nmalloc);
uint64_t nflush = locked_read_u64(tsdn, uint64_t nflush = locked_read_u64(tsdn,
LOCKEDINT_MTX(arena->stats.mtx), LOCKEDINT_MTX(arena->stats.mtx),
&arena->stats.lstats[i].nflushes); &arena->stats.lstats[i].nflushes);
locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush); locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
locked_inc_u64_unsynchronized(&astats->nflushes_large, astats->nflushes_large += nflush;
nflush);
assert(nmalloc >= ndalloc); assert(nmalloc >= ndalloc);
assert(nmalloc - ndalloc <= SIZE_T_MAX); assert(nmalloc - ndalloc <= SIZE_T_MAX);
size_t curlextents = (size_t)(nmalloc - ndalloc); size_t curlextents = (size_t)(nmalloc - ndalloc);
lstats[i].curlextents += curlextents; lstats[i].curlextents += curlextents;
atomic_load_add_store_zu(&astats->allocated_large, astats->allocated_large +=
curlextents * sz_index2size(SC_NBINS + i)); curlextents * sz_index2size(SC_NBINS + i);
} }
for (pszind_t i = 0; i < SC_NPSIZES; i++) { for (pszind_t i = 0; i < SC_NPSIZES; i++) {
@ -213,22 +208,22 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
/* tcache_bytes counts currently cached bytes. */ /* tcache_bytes counts currently cached bytes. */
atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); astats->tcache_bytes = 0;
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
cache_bin_array_descriptor_t *descriptor; cache_bin_array_descriptor_t *descriptor;
ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
for (szind_t i = 0; i < SC_NBINS; i++) { for (szind_t i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_small[i]; cache_bin_t *tbin = &descriptor->bins_small[i];
atomic_load_add_store_zu(&astats->tcache_bytes, astats->tcache_bytes +=
cache_bin_ncached_get(tbin, cache_bin_ncached_get(tbin,
&tcache_bin_info[i]) * sz_index2size(i)); &tcache_bin_info[i]) * sz_index2size(i);
} }
for (szind_t i = 0; i < nhbins - SC_NBINS; i++) { for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_large[i]; cache_bin_t *tbin = &descriptor->bins_large[i];
atomic_load_add_store_zu(&astats->tcache_bytes, astats->tcache_bytes +=
cache_bin_ncached_get(tbin, cache_bin_ncached_get(tbin,
&tcache_bin_info[i + SC_NBINS]) &tcache_bin_info[i + SC_NBINS])
* sz_index2size(i + SC_NBINS)); * sz_index2size(i + SC_NBINS);
} }
} }
malloc_mutex_prof_read(tsdn, malloc_mutex_prof_read(tsdn,

View File

@ -915,26 +915,21 @@ MUTEX_PROF_ARENA_MUTEXES
sdstats->nflushes_small += astats->nflushes_small; sdstats->nflushes_small += astats->nflushes_small;
if (!destroyed) { if (!destroyed) {
ctl_accum_atomic_zu(&sdstats->astats.allocated_large, sdstats->astats.allocated_large +=
&astats->astats.allocated_large); astats->astats.allocated_large;
} else { } else {
assert(atomic_load_zu(&astats->astats.allocated_large, assert(astats->astats.allocated_large == 0);
ATOMIC_RELAXED) == 0);
} }
ctl_accum_locked_u64(&sdstats->astats.nmalloc_large, sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
&astats->astats.nmalloc_large); sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
ctl_accum_locked_u64(&sdstats->astats.ndalloc_large, sdstats->astats.nrequests_large
&astats->astats.ndalloc_large); += astats->astats.nrequests_large;
ctl_accum_locked_u64(&sdstats->astats.nrequests_large, sdstats->astats.nflushes_large += astats->astats.nflushes_large;
&astats->astats.nrequests_large);
ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
&astats->astats.nflushes_large);
ctl_accum_atomic_zu( ctl_accum_atomic_zu(
&sdstats->astats.pa_shard_stats.abandoned_vm, &sdstats->astats.pa_shard_stats.abandoned_vm,
&astats->astats.pa_shard_stats.abandoned_vm); &astats->astats.pa_shard_stats.abandoned_vm);
ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes, sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
&astats->astats.tcache_bytes);
if (ctl_arena->arena_ind == 0) { if (ctl_arena->arena_ind == 0) {
sdstats->astats.uptime = astats->astats.uptime; sdstats->astats.uptime = astats->astats.uptime;
@ -1082,8 +1077,7 @@ ctl_refresh(tsdn_t *tsdn) {
if (config_stats) { if (config_stats) {
ctl_stats->allocated = ctl_sarena->astats->allocated_small + ctl_stats->allocated = ctl_sarena->astats->allocated_small +
atomic_load_zu(&ctl_sarena->astats->astats.allocated_large, ctl_sarena->astats->astats.allocated_large;
ATOMIC_RELAXED);
ctl_stats->active = (ctl_sarena->pactive << LG_PAGE); ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
ctl_stats->metadata = ctl_sarena->astats->astats.base + ctl_stats->metadata = ctl_sarena->astats->astats.base +
atomic_load_zu(&ctl_sarena->astats->astats.internal, atomic_load_zu(&ctl_sarena->astats->astats.internal,
@ -2947,8 +2941,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
arenas_i(mib[2])->astats->astats.metadata_thp, size_t) arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
ATOMIC_RELAXED), size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_resident, CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
arenas_i(mib[2])->astats->astats.resident, arenas_i(mib[2])->astats->astats.resident,
size_t) size_t)
@ -2970,27 +2963,21 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills,
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes, CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes,
arenas_i(mib[2])->astats->nflushes_small, uint64_t) arenas_i(mib[2])->astats->nflushes_small, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, arenas_i(mib[2])->astats->astats.allocated_large, size_t)
ATOMIC_RELAXED), size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
locked_read_u64_unsynchronized( arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
&arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
locked_read_u64_unsynchronized( arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t)
&arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
locked_read_u64_unsynchronized( arenas_i(mib[2])->astats->astats.nrequests_large, uint64_t)
&arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
/* /*
* Note: "nmalloc_large" here instead of "nfills" in the read. This is * Note: "nmalloc_large" here instead of "nfills" in the read. This is
* intentional (large has no batch fill). * intentional (large has no batch fill).
*/ */
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills, CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
locked_read_u64_unsynchronized( arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
&arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes, CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
locked_read_u64_unsynchronized( arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
&arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
/* Lock profiling related APIs below. */ /* Lock profiling related APIs below. */
#define RO_MUTEX_CTL_GEN(n, l) \ #define RO_MUTEX_CTL_GEN(n, l) \