From 565045ef716586f93caf6c210905419be9ed6e25 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Thu, 12 Mar 2020 08:34:47 -0700 Subject: [PATCH] Arena: Make more derived stats non-atomic/locked. --- include/jemalloc/internal/arena_stats.h | 14 ++++---- src/arena.c | 29 +++++++-------- src/ctl.c | 47 +++++++++---------------- 3 files changed, 36 insertions(+), 54 deletions(-) diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h index 3bfc8582..3b3441f1 100644 --- a/include/jemalloc/internal/arena_stats.h +++ b/include/jemalloc/internal/arena_stats.h @@ -56,12 +56,12 @@ struct arena_stats_s { atomic_zu_t internal; - atomic_zu_t allocated_large; /* Derived. */ - locked_u64_t nmalloc_large; /* Derived. */ - locked_u64_t ndalloc_large; /* Derived. */ - locked_u64_t nfills_large; /* Derived. */ - locked_u64_t nflushes_large; /* Derived. */ - locked_u64_t nrequests_large; /* Derived. */ + size_t allocated_large; /* Derived. */ + uint64_t nmalloc_large; /* Derived. */ + uint64_t ndalloc_large; /* Derived. */ + uint64_t nfills_large; /* Derived. */ + uint64_t nflushes_large; /* Derived. */ + uint64_t nrequests_large; /* Derived. */ /* * The stats logically owned by the pa_shard in the same arena. This @@ -71,7 +71,7 @@ struct arena_stats_s { pa_shard_stats_t pa_shard_stats; /* Number of bytes cached in tcache associated with this arena. */ - atomic_zu_t tcache_bytes; /* Derived. */ + size_t tcache_bytes; /* Derived. */ mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; diff --git a/src/arena.c b/src/arena.c index 0fe85a9c..73033a64 100644 --- a/src/arena.c +++ b/src/arena.c @@ -150,42 +150,37 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nmalloc); locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc); - locked_inc_u64_unsynchronized(&astats->nmalloc_large, - nmalloc); + astats->nmalloc_large += nmalloc; uint64_t ndalloc = locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].ndalloc); locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc); - locked_inc_u64_unsynchronized(&astats->ndalloc_large, - ndalloc); + astats->ndalloc_large += ndalloc; uint64_t nrequests = locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nrequests); locked_inc_u64_unsynchronized(&lstats[i].nrequests, nmalloc + nrequests); - locked_inc_u64_unsynchronized(&astats->nrequests_large, - nmalloc + nrequests); + astats->nrequests_large += nmalloc + nrequests; /* nfill == nmalloc for large currently. */ locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc); - locked_inc_u64_unsynchronized(&astats->nfills_large, - nmalloc); + astats->nfills_large += nmalloc; uint64_t nflush = locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nflushes); locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush); - locked_inc_u64_unsynchronized(&astats->nflushes_large, - nflush); + astats->nflushes_large += nflush; assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; - atomic_load_add_store_zu(&astats->allocated_large, - curlextents * sz_index2size(SC_NBINS + i)); + astats->allocated_large += + curlextents * sz_index2size(SC_NBINS + i); } for (pszind_t i = 0; i < SC_NPSIZES; i++) { @@ -213,22 +208,22 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); /* tcache_bytes counts currently cached bytes. */ - atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); + astats->tcache_bytes = 0; malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); cache_bin_array_descriptor_t *descriptor; ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { for (szind_t i = 0; i < SC_NBINS; i++) { cache_bin_t *tbin = &descriptor->bins_small[i]; - atomic_load_add_store_zu(&astats->tcache_bytes, + astats->tcache_bytes += cache_bin_ncached_get(tbin, - &tcache_bin_info[i]) * sz_index2size(i)); + &tcache_bin_info[i]) * sz_index2size(i); } for (szind_t i = 0; i < nhbins - SC_NBINS; i++) { cache_bin_t *tbin = &descriptor->bins_large[i]; - atomic_load_add_store_zu(&astats->tcache_bytes, + astats->tcache_bytes += cache_bin_ncached_get(tbin, &tcache_bin_info[i + SC_NBINS]) - * sz_index2size(i + SC_NBINS)); + * sz_index2size(i + SC_NBINS); } } malloc_mutex_prof_read(tsdn, diff --git a/src/ctl.c b/src/ctl.c index 368eb5f8..a3cc74ac 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -915,26 +915,21 @@ MUTEX_PROF_ARENA_MUTEXES sdstats->nflushes_small += astats->nflushes_small; if (!destroyed) { - ctl_accum_atomic_zu(&sdstats->astats.allocated_large, - &astats->astats.allocated_large); + sdstats->astats.allocated_large += + astats->astats.allocated_large; } else { - assert(atomic_load_zu(&astats->astats.allocated_large, - ATOMIC_RELAXED) == 0); + assert(astats->astats.allocated_large == 0); } - ctl_accum_locked_u64(&sdstats->astats.nmalloc_large, - &astats->astats.nmalloc_large); - ctl_accum_locked_u64(&sdstats->astats.ndalloc_large, - &astats->astats.ndalloc_large); - ctl_accum_locked_u64(&sdstats->astats.nrequests_large, - &astats->astats.nrequests_large); - ctl_accum_locked_u64(&sdstats->astats.nflushes_large, - &astats->astats.nflushes_large); + sdstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sdstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sdstats->astats.nrequests_large + += astats->astats.nrequests_large; + sdstats->astats.nflushes_large += astats->astats.nflushes_large; ctl_accum_atomic_zu( &sdstats->astats.pa_shard_stats.abandoned_vm, &astats->astats.pa_shard_stats.abandoned_vm); - ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes, - &astats->astats.tcache_bytes); + sdstats->astats.tcache_bytes += astats->astats.tcache_bytes; if (ctl_arena->arena_ind == 0) { sdstats->astats.uptime = astats->astats.uptime; @@ -1082,8 +1077,7 @@ ctl_refresh(tsdn_t *tsdn) { if (config_stats) { ctl_stats->allocated = ctl_sarena->astats->allocated_small + - atomic_load_zu(&ctl_sarena->astats->astats.allocated_large, - ATOMIC_RELAXED); + ctl_sarena->astats->astats.allocated_large; ctl_stats->active = (ctl_sarena->pactive << LG_PAGE); ctl_stats->metadata = ctl_sarena->astats->astats.base + atomic_load_zu(&ctl_sarena->astats->astats.internal, @@ -2947,8 +2941,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_internal, CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, arenas_i(mib[2])->astats->astats.metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, - atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, - ATOMIC_RELAXED), size_t) + arenas_i(mib[2])->astats->astats.tcache_bytes, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_resident, arenas_i(mib[2])->astats->astats.resident, size_t) @@ -2970,27 +2963,21 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills, CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes, arenas_i(mib[2])->astats->nflushes_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, - atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, - ATOMIC_RELAXED), size_t) + arenas_i(mib[2])->astats->astats.allocated_large, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, - locked_read_u64_unsynchronized( - &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) + arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, - locked_read_u64_unsynchronized( - &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) + arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, - locked_read_u64_unsynchronized( - &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t) + arenas_i(mib[2])->astats->astats.nrequests_large, uint64_t) /* * Note: "nmalloc_large" here instead of "nfills" in the read. This is * intentional (large has no batch fill). */ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills, - locked_read_u64_unsynchronized( - &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) + arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes, - locked_read_u64_unsynchronized( - &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t) + arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t) /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \