diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index 9dd5304c..e1c47652 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -19,17 +19,17 @@ arena_ind_get(const arena_t *arena) { JEMALLOC_INLINE void arena_internal_add(arena_t *arena, size_t size) { - atomic_add_zu(&arena->stats.internal, size); + atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); } JEMALLOC_INLINE void arena_internal_sub(arena_t *arena, size_t size) { - atomic_sub_zu(&arena->stats.internal, size); + atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); } JEMALLOC_INLINE size_t arena_internal_get(arena_t *arena) { - return atomic_read_zu(&arena->stats.internal); + return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); } JEMALLOC_INLINE bool diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h index b64ba2d2..4e9c898a 100644 --- a/include/jemalloc/internal/stats_structs.h +++ b/include/jemalloc/internal/stats_structs.h @@ -88,7 +88,7 @@ struct arena_stats_s { #endif /* Number of bytes currently mapped, excluding retained memory. */ - size_t mapped; /* Partially derived. */ + atomic_zu_t mapped; /* Partially derived. */ /* * Number of bytes currently retained as a side effect of munmap() being @@ -96,7 +96,7 @@ struct arena_stats_s { * always decommitted or purged), but they are excluded from the mapped * statistic (above). */ - size_t retained; /* Derived. */ + atomic_zu_t retained; /* Derived. */ /* * Total number of purge sweeps, total number of madvise calls made, @@ -107,17 +107,17 @@ struct arena_stats_s { arena_stats_u64_t nmadvise; arena_stats_u64_t purged; - size_t base; /* Derived. */ - size_t internal; - size_t resident; /* Derived. */ + atomic_zu_t base; /* Derived. */ + atomic_zu_t internal; + atomic_zu_t resident; /* Derived. */ - size_t allocated_large; /* Derived. */ + atomic_zu_t allocated_large; /* Derived. */ arena_stats_u64_t nmalloc_large; /* Derived. */ arena_stats_u64_t ndalloc_large; /* Derived. */ arena_stats_u64_t nrequests_large; /* Derived. */ /* Number of bytes cached in tcache associated with this arena. */ - size_t tcache_bytes; /* Derived. */ + atomic_zu_t tcache_bytes; /* Derived. */ /* One element for each large size class. */ malloc_large_stats_t lstats[NSIZES - NBINS]; diff --git a/src/arena.c b/src/arena.c index 1fbf87dd..417778b4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -128,39 +128,47 @@ arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { } static size_t -arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p) { +arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { #ifdef JEMALLOC_ATOMIC_U64 - return atomic_read_zu(p); + return atomic_load_zu(p, ATOMIC_RELAXED); #else malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return *p; + return atomic_load_zu(p, ATOMIC_RELAXED); #endif } static void -arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p, +arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, size_t x) { #ifdef JEMALLOC_ATOMIC_U64 - atomic_add_zu(p, x); + atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); #else malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p += x; + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur + x, ATOMIC_RELAXED); #endif } static void -arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p, +arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, size_t x) { #ifdef JEMALLOC_ATOMIC_U64 - UNUSED size_t r = atomic_sub_zu(p, x); - assert(r + x >= r); + UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); + assert(r - x <= r); #else malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p -= x; - assert(*p + x >= *p); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur - x, ATOMIC_RELAXED); #endif } +/* Like the _u64 variant, needs an externally synchronized *dst. */ +static void +arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { + size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); + atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); +} + void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, szind_t szind, uint64_t nrequests) { @@ -203,20 +211,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_lock(tsdn, &arena->stats); - astats->mapped += base_mapped + arena_stats_read_zu(tsdn, &arena->stats, - &arena->stats.mapped); - astats->retained += (extents_npages_get(&arena->extents_retained) << - LG_PAGE); + arena_stats_accum_zu(&astats->mapped, base_mapped + + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped)); + arena_stats_accum_zu(&astats->retained, + extents_npages_get(&arena->extents_retained) << LG_PAGE); arena_stats_accum_u64(&astats->npurge, arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.npurge)); arena_stats_accum_u64(&astats->nmadvise, arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.nmadvise)); arena_stats_accum_u64(&astats->purged, arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.purged)); - astats->base += base_allocated; - astats->internal += arena_internal_get(arena); - astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) + - extents_npages_get(&arena->extents_cached)) << LG_PAGE)); + arena_stats_accum_zu(&astats->base, base_allocated); + arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); + arena_stats_accum_zu(&astats->resident, base_resident + + (((atomic_read_zu(&arena->nactive) + + extents_npages_get(&arena->extents_cached)) << LG_PAGE))); for (szind_t i = 0; i < NSIZES - NBINS; i++) { uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats, @@ -240,7 +249,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; - astats->allocated_large += curlextents * index2size(NBINS + i); + arena_stats_accum_zu(&astats->allocated_large, + curlextents * index2size(NBINS + i)); } arena_stats_unlock(tsdn, &arena->stats); @@ -250,13 +260,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, tcache_t *tcache; /* tcache_bytes counts currently cached bytes. */ - astats->tcache_bytes = 0; + atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); ql_foreach(tcache, &arena->tcache_ql, link) { for (szind_t i = 0; i < nhbins; i++) { tbin = &tcache->tbins[i]; - astats->tcache_bytes += tbin->ncached * - index2size(i); + arena_stats_accum_zu(&astats->tcache_bytes, + tbin->ncached * index2size(i)); } } malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); diff --git a/src/ctl.c b/src/ctl.c index bb835836..70721584 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -456,6 +456,12 @@ arena_stats_read_u64(arena_stats_u64_t *p) { #endif } +static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { + size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); + size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED); + atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED); +} + /******************************************************************************/ static unsigned @@ -613,8 +619,10 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, ctl_arena_stats_t *astats = ctl_arena->astats; if (!destroyed) { - sdstats->astats.mapped += astats->astats.mapped; - sdstats->astats.retained += astats->astats.retained; + accum_atomic_zu(&sdstats->astats.mapped, + &astats->astats.mapped); + accum_atomic_zu(&sdstats->astats.retained, + &astats->astats.retained); } accum_arena_stats_u64(&sdstats->astats.npurge, &astats->astats.npurge); @@ -624,11 +632,15 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, &astats->astats.purged); if (!destroyed) { - sdstats->astats.base += astats->astats.base; - sdstats->astats.internal += astats->astats.internal; - sdstats->astats.resident += astats->astats.resident; + accum_atomic_zu(&sdstats->astats.base, + &astats->astats.base); + accum_atomic_zu(&sdstats->astats.internal, + &astats->astats.internal); + accum_atomic_zu(&sdstats->astats.resident, + &astats->astats.resident); } else { - assert(astats->astats.internal == 0); + assert(atomic_load_zu( + &astats->astats.internal, ATOMIC_RELAXED) == 0); } if (!destroyed) { @@ -641,10 +653,11 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, sdstats->nrequests_small += astats->nrequests_small; if (!destroyed) { - sdstats->astats.allocated_large += - astats->astats.allocated_large; + accum_atomic_zu(&sdstats->astats.allocated_large, + &astats->astats.allocated_large); } else { - assert(astats->astats.allocated_large == 0); + assert(atomic_load_zu(&astats->astats.allocated_large, + ATOMIC_RELAXED) == 0); } accum_arena_stats_u64(&sdstats->astats.nmalloc_large, &astats->astats.nmalloc_large); @@ -654,8 +667,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, &astats->astats.nrequests_large); if (config_tcache) { - sdstats->astats.tcache_bytes += - astats->astats.tcache_bytes; + accum_atomic_zu(&sdstats->astats.tcache_bytes, + &astats->astats.tcache_bytes); } for (i = 0; i < NBINS; i++) { @@ -772,13 +785,19 @@ ctl_refresh(tsdn_t *tsdn) { if (config_stats) { ctl_stats->allocated = ctl_sarena->astats->allocated_small + - ctl_sarena->astats->astats.allocated_large; + atomic_load_zu(&ctl_sarena->astats->astats.allocated_large, + ATOMIC_RELAXED); ctl_stats->active = (ctl_sarena->pactive << LG_PAGE); - ctl_stats->metadata = ctl_sarena->astats->astats.base + - ctl_sarena->astats->astats.internal; - ctl_stats->resident = ctl_sarena->astats->astats.resident; - ctl_stats->mapped = ctl_sarena->astats->astats.mapped; - ctl_stats->retained = ctl_sarena->astats->astats.retained; + ctl_stats->metadata = atomic_load_zu( + &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) + + atomic_load_zu(&ctl_sarena->astats->astats.internal, + ATOMIC_RELAXED); + ctl_stats->resident = atomic_load_zu( + &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED); + ctl_stats->mapped = atomic_load_zu( + &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED); + ctl_stats->retained = atomic_load_zu( + &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED); } ctl_arenas->epoch++; } @@ -2169,9 +2188,11 @@ CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, - arenas_i(mib[2])->astats->astats.mapped, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_retained, - arenas_i(mib[2])->astats->astats.retained, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, @@ -2179,13 +2200,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_base, - arenas_i(mib[2])->astats->astats.base, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_internal, - arenas_i(mib[2])->astats->astats.internal, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_tcache_bytes, - arenas_i(mib[2])->astats->astats.tcache_bytes, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_resident, - arenas_i(mib[2])->astats->astats.resident, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED), + size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, arenas_i(mib[2])->astats->allocated_small, size_t) @@ -2196,7 +2221,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, arenas_i(mib[2])->astats->nrequests_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, - arenas_i(mib[2])->astats->astats.allocated_large, size_t) + atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)