From 0fb5c0e853963480196ac413db18d1ad78d87ec9 Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Fri, 10 Mar 2017 12:14:05 -0800 Subject: [PATCH] Add arena lock stats output. --- include/jemalloc/internal/ctl_externs.h | 8 ++ include/jemalloc/internal/mutex_inlines.h | 6 +- include/jemalloc/internal/mutex_structs.h | 8 +- include/jemalloc/internal/mutex_types.h | 3 +- include/jemalloc/internal/stats_structs.h | 7 ++ src/arena.c | 18 +++ src/ctl.c | 142 ++++++++++++++++++++-- src/mutex.c | 46 ++++--- src/stats.c | 82 ++++++++++--- 9 files changed, 269 insertions(+), 51 deletions(-) diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h index 2ef48c66..812ec4fa 100644 --- a/include/jemalloc/internal/ctl_externs.h +++ b/include/jemalloc/internal/ctl_externs.h @@ -1,6 +1,14 @@ #ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H #define JEMALLOC_INTERNAL_CTL_EXTERNS_H +/* Maximum ctl tree depth. */ +#define CTL_MAX_DEPTH 7 + +#define NUM_ARENA_PROF_LOCKS 6 +#define NUM_LOCK_PROF_COUNTERS 7 +const char *arena_lock_names[NUM_ARENA_PROF_LOCKS]; +const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS]; + int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h index 8e81fcde..1020eefd 100644 --- a/include/jemalloc/internal/mutex_inlines.h +++ b/include/jemalloc/internal/mutex_inlines.h @@ -29,9 +29,9 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) { /* Aggregate lock prof data. */ JEMALLOC_INLINE void malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) { - nstime_add(&sum->tot_wait_time, &data->tot_wait_time); - if (nstime_compare(&data->max_wait_time, &sum->max_wait_time)) { - nstime_copy(&sum->max_wait_time, &data->max_wait_time); + sum->tot_wait_time += data->tot_wait_time; + if (data->max_wait_time > sum->max_wait_time) { + sum->max_wait_time = data->max_wait_time; } sum->n_wait_times += data->n_wait_times; sum->n_spin_acquired += data->n_spin_acquired; diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h index 7065c997..ce80e15d 100644 --- a/include/jemalloc/internal/mutex_structs.h +++ b/include/jemalloc/internal/mutex_structs.h @@ -6,10 +6,10 @@ struct lock_prof_data_s { * Counters touched on the slow path, i.e. when there is lock * contention. We update them once we have the lock. */ - /* Total time spent waiting on this lock. */ - nstime_t tot_wait_time; - /* Max time spent on a single lock operation. */ - nstime_t max_wait_time; + /* Total time (in nano seconds) spent waiting on this lock. */ + uint64_t tot_wait_time; + /* Max time (in nano seconds) spent on a single lock operation. */ + uint64_t max_wait_time; /* # of times have to wait for this lock (after spinning). */ uint64_t n_wait_times; /* # of times acquired the lock through local spinning. */ diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h index d7c7f04f..0d93fe87 100644 --- a/include/jemalloc/internal/mutex_types.h +++ b/include/jemalloc/internal/mutex_types.h @@ -28,8 +28,7 @@ typedef struct malloc_mutex_s malloc_mutex_t; # define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0) #endif -#define LOCK_PROF_DATA_INITIALIZER \ - {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0} +#define LOCK_PROF_DATA_INITIALIZER {0, 0, 0, 0, 0, 0, 0, NULL, 0} #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h index cbc448a3..0e33394b 100644 --- a/include/jemalloc/internal/stats_structs.h +++ b/include/jemalloc/internal/stats_structs.h @@ -124,6 +124,13 @@ struct arena_stats_s { /* Number of bytes cached in tcache associated with this arena. */ atomic_zu_t tcache_bytes; /* Derived. */ + lock_prof_data_t large_mtx_data; + lock_prof_data_t extent_freelist_mtx_data; + lock_prof_data_t extents_cached_mtx_data; + lock_prof_data_t extents_retained_mtx_data; + lock_prof_data_t decay_mtx_data; + lock_prof_data_t tcache_mtx_data; + /* One element for each large size class. */ malloc_large_stats_t lstats[NSIZES - NBINS]; }; diff --git a/src/arena.c b/src/arena.c index 832a125e..0c12004d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -292,9 +292,27 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, tbin->ncached * index2size(i)); } } + malloc_lock_prof_read(tsdn, &astats->tcache_mtx_data, + &arena->tcache_ql_mtx); malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } +#define READ_ARENA_MUTEX_PROF_DATA(mtx, data) \ + malloc_mutex_lock(tsdn, &arena->mtx); \ + malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx); \ + malloc_mutex_unlock(tsdn, &arena->mtx); + + /* Gather per arena mutex profiling data. */ + READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data) + READ_ARENA_MUTEX_PROF_DATA(extent_freelist_mtx, + extent_freelist_mtx_data) + READ_ARENA_MUTEX_PROF_DATA(extents_cached.mtx, + extents_cached_mtx_data) + READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx, + extents_retained_mtx_data) + READ_ARENA_MUTEX_PROF_DATA(decay.mtx, decay_mtx_data) +#undef READ_ARENA_MUTEX_PROF_DATA + for (szind_t i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; diff --git a/src/ctl.c b/src/ctl.c index d9f8be60..016275e5 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -13,6 +13,24 @@ static bool ctl_initialized; static ctl_stats_t *ctl_stats; static ctl_arenas_t *ctl_arenas; +const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = { + "large", + "extent_freelist", + "extents_cached", + "extents_retained", + "decay", + "tcache" +}; +const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS] = { + "num_ops", + "num_wait", + "num_spin_acq", + "num_owner_switch", + "total_wait_time", + "max_wait_time", + "max_num_thds" +}; + /******************************************************************************/ /* Helpers for named and indexed nodes. */ @@ -145,7 +163,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nslabs) CTL_PROTO(stats_arenas_i_bins_j_nreslabs) CTL_PROTO(stats_arenas_i_bins_j_curslabs) -CTL_PROTO(stats_arenas_i_bins_j_lock_data) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lextents_j_nmalloc) CTL_PROTO(stats_arenas_i_lextents_j_ndalloc) @@ -179,12 +196,30 @@ CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) +#define STATS_LOCKS_CTL_PROTO_GEN(l, n) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_num_ops) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_num_wait) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_num_spin_acq) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_num_owner_switch) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_total_wait_time) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_max_wait_time) \ +CTL_PROTO(stats_arenas_i_##l##_##n##_max_num_thds) + +#define ARENA_LOCKS_CTL_PROTO_GEN(n) STATS_LOCKS_CTL_PROTO_GEN(locks, n) +ARENA_LOCKS_CTL_PROTO_GEN(large) +ARENA_LOCKS_CTL_PROTO_GEN(extent_freelist) +ARENA_LOCKS_CTL_PROTO_GEN(extents_cached) +ARENA_LOCKS_CTL_PROTO_GEN(extents_retained) +ARENA_LOCKS_CTL_PROTO_GEN(decay) +ARENA_LOCKS_CTL_PROTO_GEN(tcache) +#undef ARENA_LOCKS_CTL_PROTO_GEN + +STATS_LOCKS_CTL_PROTO_GEN(bins_j, lock) +#undef STATS_LOCKS_CTL_PROTO_GEN + /******************************************************************************/ /* mallctl tree. */ -/* Maximum tree depth. */ -#define CTL_MAX_DEPTH 6 - #define NAME(n) {true}, n #define CHILD(t, c) \ sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ @@ -349,6 +384,26 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; +#define LOCK_PROF_DATA_NODE(prefix, n) \ +static const ctl_named_node_t prefix##_##n##_node[] = { \ + {NAME("num_ops"), \ + CTL(prefix##_##n##_num_ops)}, \ + {NAME("num_wait"), \ + CTL(prefix##_##n##_num_wait)}, \ + {NAME("num_spin_acq"), \ + CTL(prefix##_##n##_num_spin_acq)}, \ + {NAME("num_owner_switch"), \ + CTL(prefix##_##n##_num_owner_switch)}, \ + {NAME("total_wait_time"), \ + CTL(prefix##_##n##_total_wait_time)}, \ + {NAME("max_wait_time"), \ + CTL(prefix##_##n##_max_wait_time)}, \ + {NAME("max_num_thds"), \ + CTL(prefix##_##n##_max_num_thds)} \ + /* Note that # of current waiting thread not provided. */ \ +}; + +LOCK_PROF_DATA_NODE(stats_arenas_i_bins_j, lock) static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, @@ -359,8 +414,9 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)}, {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)}, {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}, - {NAME("lock_data"), CTL(stats_arenas_i_bins_j_lock_data)} + {NAME("lock"), CHILD(named, stats_arenas_i_bins_j_lock)} }; + static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; @@ -383,6 +439,26 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = { {INDEX(stats_arenas_i_lextents_j)} }; +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, large) +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extent_freelist) +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_cached) +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_retained) +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, decay) +LOCK_PROF_DATA_NODE(stats_arenas_i_locks, tcache) + +static const ctl_named_node_t stats_arenas_i_locks_node[] = { + {NAME("large"), CHILD(named, stats_arenas_i_locks_large)}, + {NAME("extent_freelist"), + CHILD(named, stats_arenas_i_locks_extent_freelist)}, + {NAME("extents_cached"), + CHILD(named, stats_arenas_i_locks_extents_cached)}, + {NAME("extents_retained"), + CHILD(named, stats_arenas_i_locks_extents_retained)}, + {NAME("decay"), CHILD(named, stats_arenas_i_locks_decay)}, + {NAME("tcache"), CHILD(named, stats_arenas_i_locks_tcache)} +}; +#undef LOCK_PROF_DATA_NODE + static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, @@ -406,7 +482,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, - {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)} + {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)}, + {NAME("locks"), CHILD(named, stats_arenas_i_locks)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(named, stats_arenas_i)} @@ -663,6 +740,22 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, &astats->astats.decay_muzzy.purged); + malloc_lock_prof_merge(&(sdstats->astats.large_mtx_data), + &(astats->astats.large_mtx_data)); + malloc_lock_prof_merge( + &(sdstats->astats.extent_freelist_mtx_data), + &(astats->astats.extent_freelist_mtx_data)); + malloc_lock_prof_merge( + &(sdstats->astats.extents_cached_mtx_data), + &(astats->astats.extents_cached_mtx_data)); + malloc_lock_prof_merge( + &(sdstats->astats.extents_retained_mtx_data), + &(astats->astats.extents_retained_mtx_data)); + malloc_lock_prof_merge(&(sdstats->astats.decay_mtx_data), + &(astats->astats.decay_mtx_data)); + malloc_lock_prof_merge(&(sdstats->astats.tcache_mtx_data), + &(astats->astats.tcache_mtx_data)); + if (!destroyed) { accum_atomic_zu(&sdstats->astats.base, &astats->astats.base); @@ -2319,6 +2412,41 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */ +/* Lock profiling related APIs below. */ +#define ARENAS_LOCK_CTL_GEN(l, s, d) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_ops, \ + arenas_i(mib[2])->astats->s.d.n_lock_ops, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_wait, \ + arenas_i(mib[2])->astats->s.d.n_wait_times, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_spin_acq, \ + arenas_i(mib[2])->astats->s.d.n_spin_acquired, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_owner_switch, \ + arenas_i(mib[2])->astats->s.d.n_owner_switches, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_total_wait_time, \ + arenas_i(mib[2])->astats->s.d.tot_wait_time, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_wait_time, \ + arenas_i(mib[2])->astats->s.d.max_wait_time, uint64_t) \ +CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_num_thds, \ + arenas_i(mib[2])->astats->s.d.max_n_thds, uint64_t) + +#define ARENAS_ASTATS_LOCK_CTL_GEN(l, d) \ + ARENAS_LOCK_CTL_GEN(locks_##l, astats, d) + +/* arena->large_mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(large, large_mtx_data) +/* arena->extent_freelist_mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(extent_freelist, extent_freelist_mtx_data) +/* arena->extents_cached.mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(extents_cached, extents_cached_mtx_data) +/* arena->extents_retained.mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(extents_retained, extents_retained_mtx_data) +/* arena->decay.mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(decay, decay_mtx_data) +/* arena->tcache_ql_mtx */ +ARENAS_ASTATS_LOCK_CTL_GEN(tcache, tcache_mtx_data) +/* arena->bins[j].lock */ +ARENAS_LOCK_CTL_GEN(bins_j_lock, bstats[mib[4]], lock_data) + CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, @@ -2337,8 +2465,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs, arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs, arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_lock_data, - arenas_i(mib[2])->astats->bstats[mib[4]].lock_data, lock_prof_data_t) static const ctl_named_node_t * stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, diff --git a/src/mutex.c b/src/mutex.c index 2b80a9d9..af6f3c19 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -68,49 +68,55 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void malloc_mutex_lock_slow(malloc_mutex_t *mutex) { lock_prof_data_t *data = &mutex->prof_data; - bool spin_success = false; {//TODO: a smart spin policy if (!malloc_mutex_trylock(mutex)) { - spin_success = true; - goto label_locked; + data->n_spin_acquired++; + return; } } nstime_t now, before; - uint32_t n_thds; nstime_init(&now, 0); nstime_update(&now); - n_thds = atomic_add_u32(&data->n_waiting_thds, 1); + nstime_copy(&before, &now); + + uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1); /* One last try as above two calls may take quite some cycles. */ if (!malloc_mutex_trylock(mutex)) { - spin_success = true; atomic_sub_u32(&data->n_waiting_thds, 1); - goto label_locked; + data->n_spin_acquired++; + return; } - nstime_copy(&before, &now); + + /* True slow path. */ malloc_mutex_lock_final(mutex); atomic_sub_u32(&data->n_waiting_thds, 1); nstime_update(&now); + + /* Update more slow-path only counters. */ nstime_subtract(&now, &before); -label_locked: - if (spin_success) { - data->n_spin_acquired++; - } else { - data->n_wait_times++; - nstime_add(&data->tot_wait_time, &now); - if (nstime_compare(&now, &data->max_wait_time)) { - nstime_copy(&data->max_wait_time, &now); - } - if (n_thds > data->max_n_thds) { - data->max_n_thds = n_thds; - } + uint64_t wait_time = nstime_ns(&now); + data->n_wait_times++; + data->tot_wait_time += wait_time; + if (wait_time > data->max_wait_time) { + data->max_wait_time = wait_time; } + if (n_thds > data->max_n_thds) { + data->max_n_thds = n_thds; + } +} + +static void +lock_prof_data_init(lock_prof_data_t *data) { + memset(data, 0, sizeof(lock_prof_data_t)); + data->prev_owner = NULL; } bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank) { + lock_prof_data_init(&mutex->prof_data); #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 InitializeSRWLock(&mutex->lock); diff --git a/src/stats.c b/src/stats.c index 58283f87..7b690e7f 100644 --- a/src/stats.c +++ b/src/stats.c @@ -7,7 +7,7 @@ } while (0) #define CTL_M2_GET(n, i, v, t) do { \ - size_t mib[6]; \ + size_t mib[CTL_MAX_DEPTH]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ @@ -16,7 +16,7 @@ } while (0) #define CTL_M2_M4_GET(n, i, j, v, t) do { \ - size_t mib[6]; \ + size_t mib[CTL_MAX_DEPTH]; \ size_t miblen = sizeof(mib) / sizeof(size_t); \ size_t sz = sizeof(t); \ xmallctlnametomib(n, mib, &miblen); \ @@ -77,13 +77,13 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " ndalloc nrequests curregs" " curslabs regs pgs util nfills" " nflushes newslabs reslabs" - " contention max_wait\n"); + " contention max_wait_ns\n"); } else { malloc_cprintf(write_cb, cbopaque, "bins: size ind allocated nmalloc" " ndalloc nrequests curregs" " curslabs regs pgs util newslabs" - " reslabs contention max_wait\n"); + " reslabs contention max_wait_ns\n"); } } for (j = 0, in_gap = false; j < nbins; j++) { @@ -126,9 +126,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, uint64_t); CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs, size_t); - lock_prof_data_t lock_data; - CTL_M2_M4_GET("stats.arenas.0.bins.0.lock_data", i, j, - &lock_data, lock_prof_data_t); + + uint64_t num_ops, num_wait, max_wait; + CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait", i, j, + &num_wait, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.max_wait_time", i, j, + &max_wait, uint64_t); + CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_ops", i, j, + &num_ops, uint64_t); if (json) { malloc_cprintf(write_cb, cbopaque, @@ -178,15 +183,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } } char rate[6]; - if (get_rate_str(lock_data.n_wait_times, - lock_data.n_lock_ops, rate)) { - if (lock_data.n_lock_ops == 0) { + if (get_rate_str(num_wait, num_ops, rate)) { + if (num_ops == 0) { malloc_snprintf(rate, sizeof(rate), "0"); } } - uint64_t max_wait = nstime_msec( - &lock_data.max_wait_time); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, @@ -204,11 +206,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, "%20zu %3u %12zu %12"FMTu64 " %12"FMTu64" %12"FMTu64" %12zu" " %12zu %4u %3zu %-5s %12"FMTu64 - " %12"FMTu64" %12s\n", + " %12"FMTu64" %12s %12"FMTu64"\n", reg_size, j, curregs * reg_size, nmalloc, ndalloc, nrequests, curregs, curslabs, nregs, slab_size / page, util, nslabs, - nreslabs, rate); + nreslabs, rate, max_wait); } } } @@ -287,6 +289,57 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *), } } +static void +gen_ctl_str(char *str, const char *lock, const char *counter) { + sprintf(str, "stats.arenas.0.locks.%s.%s", lock, counter); +} + +static void read_arena_lock_stats(unsigned arena_ind, + uint64_t results[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) { + char cmd[128]; + + unsigned i, j; + for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) { + for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) { + gen_ctl_str(cmd, arena_lock_names[i], + lock_counter_names[j]); + CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t); + } + } +} + +static void +stats_arena_locks_print(void (*write_cb)(void *, const char *), + void *cbopaque, bool json, unsigned arena_ind) { + uint64_t lock_stats[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]; + read_arena_lock_stats(arena_ind, lock_stats); + + /* Output lock stats. */ + if (json) { + //TODO + } else { + malloc_cprintf(write_cb, cbopaque, + " n_lock_ops n_waiting" + " n_spin_acq n_owner_switch total_wait_ns" + " max_wait_ns max_n_wait_thds\n"); + + unsigned i, j; + for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) { + malloc_cprintf(write_cb, cbopaque, + "%s", arena_lock_names[i]); + malloc_cprintf(write_cb, cbopaque, ":%*c", + (int)(18 - strlen(arena_lock_names[i])), ' '); + + for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) { + malloc_cprintf(write_cb, cbopaque, " %15"FMTu64, + lock_stats[i][j]); + } + malloc_cprintf(write_cb, cbopaque, "\n"); + } + } + +} + static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, bool json, unsigned i, bool bins, bool large) { @@ -518,6 +571,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, "resident: %12zu\n", resident); } + stats_arena_locks_print(write_cb, cbopaque, json, i); if (bins) { stats_arena_bins_print(write_cb, cbopaque, json, large, i); }