diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 2bdbe978..04a47648 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2798,6 +2798,28 @@ struct extent_hooks_s { all bin size classes. + + + stats.arenas.<i>.small.nfills + (uint64_t) + r- + [] + + Cumulative number of tcache fills by all small size + classes. + + + + + stats.arenas.<i>.small.nflushes + (uint64_t) + r- + [] + + Cumulative number of tcache flushes by all small size + classes. + + stats.arenas.<i>.large.allocated @@ -2848,6 +2870,28 @@ struct extent_hooks_s { all large size classes. + + + stats.arenas.<i>.large.nfills + (uint64_t) + r- + [] + + Cumulative number of tcache fills by all large size + classes. + + + + + stats.arenas.<i>.large.nflushes + (uint64_t) + r- + [] + + Cumulative number of tcache flushes by all large size + classes. + + stats.arenas.<i>.bins.<j>.nmalloc diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h index ef1e25b3..3ffe9c78 100644 --- a/include/jemalloc/internal/arena_stats.h +++ b/include/jemalloc/internal/arena_stats.h @@ -35,6 +35,13 @@ struct arena_stats_large_s { * periodically merges into this counter. */ arena_stats_u64_t nrequests; /* Partially derived. */ + /* + * Number of tcache fills / flushes for large (similarly, periodically + * merged). Note that there is no large tcache batch-fill currently + * (i.e. only fill 1 at a time); however flush may be batched. + */ + arena_stats_u64_t nfills; /* Partially derived. */ + arena_stats_u64_t nflushes; /* Partially derived. */ /* Current number of allocations of this size class. */ size_t curlextents; /* Derived. */ @@ -101,6 +108,8 @@ struct arena_stats_s { atomic_zu_t allocated_large; /* Derived. */ arena_stats_u64_t nmalloc_large; /* Derived. */ arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nfills_large; /* Derived. */ + arena_stats_u64_t nflushes_large; /* Derived. */ arena_stats_u64_t nrequests_large; /* Derived. */ /* Number of bytes cached in tcache associated with this arena. */ @@ -240,11 +249,12 @@ arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { } static inline void -arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, +arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, szind_t szind, uint64_t nrequests) { arena_stats_lock(tsdn, arena_stats); - arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - - SC_NBINS].nrequests, nrequests); + arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS]; + arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests); + arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1); arena_stats_unlock(tsdn, arena_stats); } diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 775fdec0..1d1aacc6 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -39,6 +39,8 @@ typedef struct ctl_arena_stats_s { uint64_t nmalloc_small; uint64_t ndalloc_small; uint64_t nrequests_small; + uint64_t nfills_small; + uint64_t nflushes_small; bin_stats_t bstats[SC_NBINS]; arena_stats_large_t lstats[SC_NSIZES - SC_NBINS]; diff --git a/src/arena.c b/src/arena.c index a0804f61..f9336fee 100644 --- a/src/arena.c +++ b/src/arena.c @@ -151,6 +151,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_u64(&astats->nrequests_large, nmalloc + nrequests); + /* nfill == nmalloc for large currently. */ + arena_stats_accum_u64(&lstats[i].nfills, nmalloc); + arena_stats_accum_u64(&astats->nfills_large, nmalloc); + + uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats, + &arena->stats.lstats[i].nflushes); + arena_stats_accum_u64(&lstats[i].nflushes, nflush); + arena_stats_accum_u64(&astats->nflushes_large, nflush); + assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); diff --git a/src/ctl.c b/src/ctl.c index d258b8eb..f0d51df9 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -156,10 +156,14 @@ CTL_PROTO(stats_arenas_i_small_allocated) CTL_PROTO(stats_arenas_i_small_nmalloc) CTL_PROTO(stats_arenas_i_small_ndalloc) CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_small_nfills) +CTL_PROTO(stats_arenas_i_small_nflushes) CTL_PROTO(stats_arenas_i_large_allocated) CTL_PROTO(stats_arenas_i_large_nmalloc) CTL_PROTO(stats_arenas_i_large_ndalloc) CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_large_nfills) +CTL_PROTO(stats_arenas_i_large_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) @@ -414,14 +418,18 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_small_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)} }; static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_large_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)} }; #define MUTEX_PROF_DATA_NODE(prefix) \ @@ -754,6 +762,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { ctl_arena->astats->nmalloc_small = 0; ctl_arena->astats->ndalloc_small = 0; ctl_arena->astats->nrequests_small = 0; + ctl_arena->astats->nfills_small = 0; + ctl_arena->astats->nflushes_small = 0; memset(ctl_arena->astats->bstats, 0, SC_NBINS * sizeof(bin_stats_t)); memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) * @@ -785,6 +795,10 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) { ctl_arena->astats->bstats[i].ndalloc; ctl_arena->astats->nrequests_small += ctl_arena->astats->bstats[i].nrequests; + ctl_arena->astats->nfills_small += + ctl_arena->astats->bstats[i].nfills; + ctl_arena->astats->nflushes_small += + ctl_arena->astats->bstats[i].nflushes; } } else { arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads, @@ -867,6 +881,8 @@ MUTEX_PROF_ARENA_MUTEXES sdstats->nmalloc_small += astats->nmalloc_small; sdstats->ndalloc_small += astats->ndalloc_small; sdstats->nrequests_small += astats->nrequests_small; + sdstats->nfills_small += astats->nfills_small; + sdstats->nflushes_small += astats->nflushes_small; if (!destroyed) { accum_atomic_zu(&sdstats->astats.allocated_large, @@ -2847,6 +2863,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, arenas_i(mib[2])->astats->ndalloc_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, arenas_i(mib[2])->astats->nrequests_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills, + arenas_i(mib[2])->astats->nfills_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes, + arenas_i(mib[2])->astats->nflushes_small, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, ATOMIC_RELAXED), size_t) @@ -2856,12 +2876,19 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) -/* - * Note: "nmalloc" here instead of "nrequests" in the read. This is intentional. - */ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, ctl_arena_stats_read_u64( - &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */ + &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t) +/* + * Note: "nmalloc_large" here instead of "nfills" in the read. This is + * intentional (large has no batch fill). + */ +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes, + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t) /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \ diff --git a/src/stats.c b/src/stats.c index d196666f..55a59994 100644 --- a/src/stats.c +++ b/src/stats.c @@ -668,9 +668,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; - uint64_t small_nmalloc, small_ndalloc, small_nrequests; + uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills, + small_nflushes; size_t large_allocated; - uint64_t large_nmalloc, large_ndalloc, large_nrequests; + uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills, + large_nflushes; size_t tcache_bytes; uint64_t uptime; @@ -828,11 +830,23 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, COL(alloc_count_row, count_nrequests_ps, right, 10, title); col_count_nrequests_ps.str_val = "(#/sec)"; + COL(alloc_count_row, count_nfills, right, 16, title); + col_count_nfills.str_val = "nfill"; + COL(alloc_count_row, count_nfills_ps, right, 10, title); + col_count_nfills_ps.str_val = "(#/sec)"; + + COL(alloc_count_row, count_nflushes, right, 16, title); + col_count_nflushes.str_val = "nflush"; + COL(alloc_count_row, count_nflushes_ps, right, 10, title); + col_count_nflushes_ps.str_val = "(#/sec)"; + emitter_table_row(emitter, &alloc_count_row); col_count_nmalloc_ps.type = emitter_type_uint64; col_count_ndalloc_ps.type = emitter_type_uint64; col_count_nrequests_ps.type = emitter_type_uint64; + col_count_nfills_ps.type = emitter_type_uint64; + col_count_nflushes_ps.type = emitter_type_uint64; #define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \ CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \ @@ -855,6 +869,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64) col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_json_object_end(emitter); /* Close "small". */ @@ -872,6 +892,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64) col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64) + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64) + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_json_object_end(emitter); /* Close "large". */ @@ -884,12 +910,18 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc; col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc; col_count_nrequests.uint64_val = small_nrequests + large_nrequests; + col_count_nfills.uint64_val = small_nfills + large_nfills; + col_count_nflushes.uint64_val = small_nflushes + large_nflushes; col_count_nmalloc_ps.uint64_val = rate_per_second(col_count_nmalloc.uint64_val, uptime); col_count_ndalloc_ps.uint64_val = rate_per_second(col_count_ndalloc.uint64_val, uptime); col_count_nrequests_ps.uint64_val = rate_per_second(col_count_nrequests.uint64_val, uptime); + col_count_nfills_ps.uint64_val = + rate_per_second(col_count_nfills.uint64_val, uptime); + col_count_nflushes_ps.uint64_val = + rate_per_second(col_count_nflushes.uint64_val, uptime); emitter_table_row(emitter, &alloc_count_row); emitter_row_t mem_count_row; diff --git a/src/tcache.c b/src/tcache.c index 034c69a0..50099a9f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -282,8 +282,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, } if (config_stats) { merged_stats = true; - arena_stats_large_nrequests_add(tsd_tsdn(tsd), - &tcache_arena->stats, binind, + arena_stats_large_flush_nrequests_add( + tsd_tsdn(tsd), &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } @@ -324,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_stats_large_nrequests_add(tsd_tsdn(tsd), + arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd), &tcache_arena->stats, binind, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; } @@ -615,7 +615,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { for (; i < nhbins; i++) { cache_bin_t *tbin = tcache_large_bin_get(tcache, i); - arena_stats_large_nrequests_add(tsdn, &arena->stats, i, + arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; }