Output bin lock profiling results to malloc_stats.
Two counters are included for the small bins: lock contention rate, and max lock waiting time.
This commit is contained in:
parent
6309df628f
commit
a4f176af57
@ -9,6 +9,9 @@ bool malloc_mutex_trylock(malloc_mutex_t *mutex);
|
||||
void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
void malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
|
||||
malloc_mutex_t *mutex);
|
||||
void malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data);
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
|
||||
@ -23,6 +26,24 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
|
||||
return MALLOC_MUTEX_TRYLOCK(mutex);
|
||||
}
|
||||
|
||||
/* Aggregate lock prof data. */
|
||||
JEMALLOC_INLINE void
|
||||
malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) {
|
||||
nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
|
||||
if (nstime_compare(&data->max_wait_time, &sum->max_wait_time)) {
|
||||
nstime_copy(&sum->max_wait_time, &data->max_wait_time);
|
||||
}
|
||||
sum->n_wait_times += data->n_wait_times;
|
||||
sum->n_spin_acquired += data->n_spin_acquired;
|
||||
|
||||
if (sum->max_n_thds < data->max_n_thds) {
|
||||
sum->max_n_thds = data->max_n_thds;
|
||||
}
|
||||
sum->n_waiting_thds += data->n_waiting_thds;
|
||||
sum->n_owner_switches += data->n_owner_switches;
|
||||
sum->n_lock_ops += data->n_lock_ops;
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
||||
witness_assert_not_owner(tsdn, &mutex->witness);
|
||||
@ -58,6 +79,20 @@ JEMALLOC_INLINE void
|
||||
malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
||||
witness_assert_not_owner(tsdn, &mutex->witness);
|
||||
}
|
||||
|
||||
/* Copy the prof data from mutex for processing. */
|
||||
JEMALLOC_INLINE void
|
||||
malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
|
||||
malloc_mutex_t *mutex) {
|
||||
lock_prof_data_t *source = &mutex->prof_data;
|
||||
/* Can only read with the lock. */
|
||||
malloc_mutex_assert_owner(tsdn, mutex);
|
||||
|
||||
*data = *source;
|
||||
/* n_wait_thds is not reported (modified w/o locking). */
|
||||
data->n_waiting_thds = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
|
||||
|
@ -5,6 +5,7 @@ void nstime_init(nstime_t *time, uint64_t ns);
|
||||
void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
|
||||
uint64_t nstime_ns(const nstime_t *time);
|
||||
uint64_t nstime_sec(const nstime_t *time);
|
||||
uint64_t nstime_msec(const nstime_t *time);
|
||||
uint64_t nstime_nsec(const nstime_t *time);
|
||||
void nstime_copy(nstime_t *time, const nstime_t *source);
|
||||
int nstime_compare(const nstime_t *a, const nstime_t *b);
|
||||
|
@ -273,6 +273,7 @@ malloc_mutex_assert_owner
|
||||
malloc_mutex_boot
|
||||
malloc_mutex_init
|
||||
malloc_mutex_lock
|
||||
malloc_mutex_lock_slow
|
||||
malloc_mutex_postfork_child
|
||||
malloc_mutex_postfork_parent
|
||||
malloc_mutex_prefork
|
||||
@ -302,6 +303,7 @@ nstime_imultiply
|
||||
nstime_init
|
||||
nstime_init2
|
||||
nstime_monotonic
|
||||
nstime_msec
|
||||
nstime_ns
|
||||
nstime_nsec
|
||||
nstime_sec
|
||||
|
@ -56,6 +56,8 @@ struct malloc_bin_stats_s {
|
||||
|
||||
/* Current number of slabs in this bin. */
|
||||
size_t curslabs;
|
||||
|
||||
lock_prof_data_t lock_data;
|
||||
};
|
||||
|
||||
struct malloc_large_stats_s {
|
||||
|
@ -299,6 +299,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
arena_bin_t *bin = &arena->bins[i];
|
||||
|
||||
malloc_mutex_lock(tsdn, &bin->lock);
|
||||
malloc_lock_prof_read(tsdn, &bstats[i].lock_data, &bin->lock);
|
||||
bstats[i].nmalloc += bin->stats.nmalloc;
|
||||
bstats[i].ndalloc += bin->stats.ndalloc;
|
||||
bstats[i].nrequests += bin->stats.nrequests;
|
||||
|
@ -145,6 +145,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
|
||||
CTL_PROTO(stats_arenas_i_bins_j_nslabs)
|
||||
CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
|
||||
CTL_PROTO(stats_arenas_i_bins_j_curslabs)
|
||||
CTL_PROTO(stats_arenas_i_bins_j_lock_data)
|
||||
INDEX_PROTO(stats_arenas_i_bins_j)
|
||||
CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
|
||||
CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
|
||||
@ -357,7 +358,8 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
|
||||
{NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
|
||||
{NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
|
||||
{NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
|
||||
{NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}
|
||||
{NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
|
||||
{NAME("lock_data"), CTL(stats_arenas_i_bins_j_lock_data)}
|
||||
};
|
||||
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
|
||||
{NAME(""), CHILD(named, stats_arenas_i_bins_j)}
|
||||
@ -726,6 +728,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
|
||||
} else {
|
||||
assert(astats->bstats[i].curslabs == 0);
|
||||
}
|
||||
malloc_lock_prof_merge(&sdstats->bstats[i].lock_data,
|
||||
&astats->bstats[i].lock_data);
|
||||
}
|
||||
|
||||
for (i = 0; i < NSIZES - NBINS; i++) {
|
||||
@ -2333,6 +2337,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
|
||||
arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
|
||||
arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_lock_data,
|
||||
arenas_i(mib[2])->astats->bstats[mib[4]].lock_data, lock_prof_data_t)
|
||||
|
||||
static const ctl_named_node_t *
|
||||
stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "jemalloc/internal/jemalloc_internal.h"
|
||||
|
||||
#define BILLION UINT64_C(1000000000)
|
||||
#define MILLION UINT64_C(1000000)
|
||||
|
||||
void
|
||||
nstime_init(nstime_t *time, uint64_t ns) {
|
||||
@ -17,6 +18,11 @@ nstime_ns(const nstime_t *time) {
|
||||
return time->ns;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
nstime_msec(const nstime_t *time) {
|
||||
return time->ns / MILLION;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
nstime_sec(const nstime_t *time) {
|
||||
return time->ns / BILLION;
|
||||
|
99
src/stats.c
99
src/stats.c
@ -32,6 +32,31 @@ bool opt_stats_print = false;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Calculate x.yyy and output a string (takes a fixed sized char array). */
|
||||
static bool
|
||||
get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
|
||||
if (divisor == 0 || dividend > divisor) {
|
||||
/* The rate is not supposed to be greater than 1. */
|
||||
return true;
|
||||
}
|
||||
if (dividend > 0) {
|
||||
assert(UINT64_MAX / dividend >= 1000);
|
||||
}
|
||||
|
||||
unsigned n = (unsigned)((dividend * 1000) / divisor);
|
||||
if (n < 10) {
|
||||
malloc_snprintf(str, 6, "0.00%u", n);
|
||||
} else if (n < 100) {
|
||||
malloc_snprintf(str, 6, "0.0%u", n);
|
||||
} else if (n < 1000) {
|
||||
malloc_snprintf(str, 6, "0.%u", n);
|
||||
} else {
|
||||
malloc_snprintf(str, 6, "1");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
bool json, bool large, unsigned i) {
|
||||
@ -51,13 +76,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
"bins: size ind allocated nmalloc"
|
||||
" ndalloc nrequests curregs"
|
||||
" curslabs regs pgs util nfills"
|
||||
" nflushes newslabs reslabs\n");
|
||||
" nflushes newslabs reslabs"
|
||||
" contention max_wait\n");
|
||||
} else {
|
||||
malloc_cprintf(write_cb, cbopaque,
|
||||
"bins: size ind allocated nmalloc"
|
||||
" ndalloc nrequests curregs"
|
||||
" curslabs regs pgs util newslabs"
|
||||
" reslabs\n");
|
||||
" reslabs contention max_wait\n");
|
||||
}
|
||||
}
|
||||
for (j = 0, in_gap = false; j < nbins; j++) {
|
||||
@ -100,6 +126,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
uint64_t);
|
||||
CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
|
||||
size_t);
|
||||
lock_prof_data_t lock_data;
|
||||
CTL_M2_M4_GET("stats.arenas.0.bins.0.lock_data", i, j,
|
||||
&lock_data, lock_prof_data_t);
|
||||
|
||||
if (json) {
|
||||
malloc_cprintf(write_cb, cbopaque,
|
||||
@ -127,55 +156,59 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
curslabs,
|
||||
(j + 1 < nbins) ? "," : "");
|
||||
} else if (!in_gap) {
|
||||
size_t availregs, milli;
|
||||
char util[6]; /* "x.yyy". */
|
||||
|
||||
availregs = nregs * curslabs;
|
||||
milli = (availregs != 0) ? (1000 * curregs) / availregs
|
||||
: 1000;
|
||||
|
||||
if (milli > 1000) {
|
||||
/*
|
||||
* Race detected: the counters were read in
|
||||
* separate mallctl calls and concurrent
|
||||
* operations happened in between. In this case
|
||||
* no meaningful utilization can be computed.
|
||||
*/
|
||||
malloc_snprintf(util, sizeof(util), " race");
|
||||
} else if (milli < 10) {
|
||||
malloc_snprintf(util, sizeof(util),
|
||||
"0.00%zu", milli);
|
||||
} else if (milli < 100) {
|
||||
malloc_snprintf(util, sizeof(util), "0.0%zu",
|
||||
milli);
|
||||
} else if (milli < 1000) {
|
||||
malloc_snprintf(util, sizeof(util), "0.%zu",
|
||||
milli);
|
||||
} else {
|
||||
assert(milli == 1000);
|
||||
malloc_snprintf(util, sizeof(util), "1");
|
||||
size_t availregs = nregs * curslabs;
|
||||
char util[6];
|
||||
if (get_rate_str((uint64_t)curregs, (uint64_t)availregs,
|
||||
util)) {
|
||||
if (availregs == 0) {
|
||||
malloc_snprintf(util, sizeof(util),
|
||||
"1");
|
||||
} else if (curregs > availregs) {
|
||||
/*
|
||||
* Race detected: the counters were read
|
||||
* in separate mallctl calls and
|
||||
* concurrent operations happened in
|
||||
* between. In this case no meaningful
|
||||
* utilization can be computed.
|
||||
*/
|
||||
malloc_snprintf(util, sizeof(util),
|
||||
" race");
|
||||
} else {
|
||||
not_reached();
|
||||
}
|
||||
}
|
||||
char rate[6];
|
||||
if (get_rate_str(lock_data.n_wait_times,
|
||||
lock_data.n_lock_ops, rate)) {
|
||||
if (lock_data.n_lock_ops == 0) {
|
||||
malloc_snprintf(rate, sizeof(rate),
|
||||
"0");
|
||||
}
|
||||
}
|
||||
uint64_t max_wait = nstime_msec(
|
||||
&lock_data.max_wait_time);
|
||||
|
||||
if (config_tcache) {
|
||||
malloc_cprintf(write_cb, cbopaque,
|
||||
"%20zu %3u %12zu %12"FMTu64
|
||||
" %12"FMTu64" %12"FMTu64" %12zu"
|
||||
" %12zu %4u %3zu %-5s %12"FMTu64
|
||||
" %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
|
||||
" %12"FMTu64" %12"FMTu64" %12"FMTu64
|
||||
" %12s %12"FMTu64"\n",
|
||||
reg_size, j, curregs * reg_size, nmalloc,
|
||||
ndalloc, nrequests, curregs, curslabs,
|
||||
nregs, slab_size / page, util, nfills,
|
||||
nflushes, nslabs, nreslabs);
|
||||
nflushes, nslabs, nreslabs, rate, max_wait);
|
||||
} else {
|
||||
malloc_cprintf(write_cb, cbopaque,
|
||||
"%20zu %3u %12zu %12"FMTu64
|
||||
" %12"FMTu64" %12"FMTu64" %12zu"
|
||||
" %12zu %4u %3zu %-5s %12"FMTu64
|
||||
" %12"FMTu64"\n",
|
||||
" %12"FMTu64" %12s\n",
|
||||
reg_size, j, curregs * reg_size, nmalloc,
|
||||
ndalloc, nrequests, curregs, curslabs,
|
||||
nregs, slab_size / page, util, nslabs,
|
||||
nreslabs);
|
||||
nreslabs, rate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user