Added lock profiling and output for global locks (ctl, prof and base).

This commit is contained in:
Qi Wang 2017-03-11 20:28:31 -08:00 committed by Qi Wang
parent 0fb5c0e853
commit ca9074deff
9 changed files with 173 additions and 77 deletions

View File

@ -4,10 +4,13 @@
/* Maximum ctl tree depth. */
#define CTL_MAX_DEPTH 7
#define NUM_ARENA_PROF_LOCKS 6
#define NUM_LOCK_PROF_COUNTERS 7
const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
#define NUM_GLOBAL_PROF_LOCKS 3
#define NUM_ARENA_PROF_LOCKS 6
#define NUM_LOCK_PROF_COUNTERS 7
extern const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
extern const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS];
extern const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
void *newp, size_t newlen);

View File

@ -41,6 +41,10 @@ struct ctl_stats_s {
size_t resident;
size_t mapped;
size_t retained;
#define MTX(mutex) lock_prof_data_t mutex##_mtx_data;
GLOBAL_PROF_MUTEXES
#undef MTX
};
struct ctl_arena_s {

View File

@ -1,6 +1,11 @@
#ifndef JEMALLOC_INTERNAL_CTL_TYPES_H
#define JEMALLOC_INTERNAL_CTL_TYPES_H
#define GLOBAL_PROF_MUTEXES \
MTX(base) \
MTX(ctl) \
MTX(prof)
typedef struct ctl_node_s ctl_node_t;
typedef struct ctl_named_node_s ctl_named_node_t;
typedef struct ctl_indexed_node_s ctl_indexed_node_t;

View File

@ -106,6 +106,7 @@ bootstrap_calloc
bootstrap_free
bootstrap_malloc
bt_init
bt2gctx_mtx
buferror
ckh_count
ckh_delete

View File

@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
extern malloc_mutex_t bt2gctx_mtx;
extern bool opt_prof;
extern bool opt_prof_active;
extern bool opt_prof_thread_active_init;

View File

@ -298,9 +298,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
}
#define READ_ARENA_MUTEX_PROF_DATA(mtx, data) \
malloc_mutex_lock(tsdn, &arena->mtx); \
malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx); \
malloc_mutex_unlock(tsdn, &arena->mtx);
malloc_mutex_lock(tsdn, &arena->mtx); \
malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx); \
malloc_mutex_unlock(tsdn, &arena->mtx);
/* Gather per arena mutex profiling data. */
READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data)

144
src/ctl.c
View File

@ -13,6 +13,12 @@ static bool ctl_initialized;
static ctl_stats_t *ctl_stats;
static ctl_arenas_t *ctl_arenas;
const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS] = {
"base",
"prof",
"ctl"
};
const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
"large",
"extent_freelist",
@ -21,6 +27,7 @@ const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
"decay",
"tcache"
};
const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS] = {
"num_ops",
"num_wait",
@ -196,16 +203,26 @@ CTL_PROTO(stats_resident)
CTL_PROTO(stats_mapped)
CTL_PROTO(stats_retained)
#define STATS_LOCKS_CTL_PROTO_GEN(l, n) \
CTL_PROTO(stats_arenas_i_##l##_##n##_num_ops) \
CTL_PROTO(stats_arenas_i_##l##_##n##_num_wait) \
CTL_PROTO(stats_arenas_i_##l##_##n##_num_spin_acq) \
CTL_PROTO(stats_arenas_i_##l##_##n##_num_owner_switch) \
CTL_PROTO(stats_arenas_i_##l##_##n##_total_wait_time) \
CTL_PROTO(stats_arenas_i_##l##_##n##_max_wait_time) \
CTL_PROTO(stats_arenas_i_##l##_##n##_max_num_thds)
#define LOCK_STATS_CTL_PROTO_GEN(n) \
CTL_PROTO(stats_##n##_num_ops) \
CTL_PROTO(stats_##n##_num_wait) \
CTL_PROTO(stats_##n##_num_spin_acq) \
CTL_PROTO(stats_##n##_num_owner_switch) \
CTL_PROTO(stats_##n##_total_wait_time) \
CTL_PROTO(stats_##n##_max_wait_time) \
CTL_PROTO(stats_##n##_max_num_thds)
#define ARENA_LOCKS_CTL_PROTO_GEN(n) STATS_LOCKS_CTL_PROTO_GEN(locks, n)
/* Global locks. */
LOCK_STATS_CTL_PROTO_GEN(locks_base)
LOCK_STATS_CTL_PROTO_GEN(locks_prof)
LOCK_STATS_CTL_PROTO_GEN(locks_ctl)
/* Arena bin locks. */
LOCK_STATS_CTL_PROTO_GEN(arenas_i_bins_j_lock)
#define ARENA_LOCKS_CTL_PROTO_GEN(n) \
LOCK_STATS_CTL_PROTO_GEN(arenas_i_locks_##n)
/* Per arena locks. */
ARENA_LOCKS_CTL_PROTO_GEN(large)
ARENA_LOCKS_CTL_PROTO_GEN(extent_freelist)
ARENA_LOCKS_CTL_PROTO_GEN(extents_cached)
@ -213,9 +230,7 @@ ARENA_LOCKS_CTL_PROTO_GEN(extents_retained)
ARENA_LOCKS_CTL_PROTO_GEN(decay)
ARENA_LOCKS_CTL_PROTO_GEN(tcache)
#undef ARENA_LOCKS_CTL_PROTO_GEN
STATS_LOCKS_CTL_PROTO_GEN(bins_j, lock)
#undef STATS_LOCKS_CTL_PROTO_GEN
#undef LOCK_STATS_CTL_PROTO_GEN
/******************************************************************************/
/* mallctl tree. */
@ -384,26 +399,27 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = {
{NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
};
#define LOCK_PROF_DATA_NODE(prefix, n) \
static const ctl_named_node_t prefix##_##n##_node[] = { \
#define LOCK_PROF_DATA_NODE(prefix) \
static const ctl_named_node_t stats_##prefix##_node[] = { \
{NAME("num_ops"), \
CTL(prefix##_##n##_num_ops)}, \
CTL(stats_##prefix##_num_ops)}, \
{NAME("num_wait"), \
CTL(prefix##_##n##_num_wait)}, \
CTL(stats_##prefix##_num_wait)}, \
{NAME("num_spin_acq"), \
CTL(prefix##_##n##_num_spin_acq)}, \
CTL(stats_##prefix##_num_spin_acq)}, \
{NAME("num_owner_switch"), \
CTL(prefix##_##n##_num_owner_switch)}, \
CTL(stats_##prefix##_num_owner_switch)}, \
{NAME("total_wait_time"), \
CTL(prefix##_##n##_total_wait_time)}, \
CTL(stats_##prefix##_total_wait_time)}, \
{NAME("max_wait_time"), \
CTL(prefix##_##n##_max_wait_time)}, \
CTL(stats_##prefix##_max_wait_time)}, \
{NAME("max_num_thds"), \
CTL(prefix##_##n##_max_num_thds)} \
CTL(stats_##prefix##_max_num_thds)} \
/* Note that # of current waiting thread not provided. */ \
};
LOCK_PROF_DATA_NODE(stats_arenas_i_bins_j, lock)
LOCK_PROF_DATA_NODE(arenas_i_bins_j_lock)
static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
{NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
{NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
@ -439,12 +455,14 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
{INDEX(stats_arenas_i_lextents_j)}
};
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, large)
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extent_freelist)
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_cached)
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_retained)
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, decay)
LOCK_PROF_DATA_NODE(stats_arenas_i_locks, tcache)
#define ARENA_LOCK_PROF_DATA_NODE(n) LOCK_PROF_DATA_NODE(arenas_i_locks_##n)
ARENA_LOCK_PROF_DATA_NODE(large)
ARENA_LOCK_PROF_DATA_NODE(extent_freelist)
ARENA_LOCK_PROF_DATA_NODE(extents_cached)
ARENA_LOCK_PROF_DATA_NODE(extents_retained)
ARENA_LOCK_PROF_DATA_NODE(decay)
ARENA_LOCK_PROF_DATA_NODE(tcache)
static const ctl_named_node_t stats_arenas_i_locks_node[] = {
{NAME("large"), CHILD(named, stats_arenas_i_locks_large)},
@ -457,7 +475,6 @@ static const ctl_named_node_t stats_arenas_i_locks_node[] = {
{NAME("decay"), CHILD(named, stats_arenas_i_locks_decay)},
{NAME("tcache"), CHILD(named, stats_arenas_i_locks_tcache)}
};
#undef LOCK_PROF_DATA_NODE
static const ctl_named_node_t stats_arenas_i_node[] = {
{NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
@ -493,6 +510,15 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
{INDEX(stats_arenas_i)}
};
LOCK_PROF_DATA_NODE(locks_base)
LOCK_PROF_DATA_NODE(locks_prof)
LOCK_PROF_DATA_NODE(locks_ctl)
static const ctl_named_node_t stats_locks_node[] = {
{NAME("base"), CHILD(named, stats_locks_base)},
{NAME("prof"), CHILD(named, stats_locks_prof)},
{NAME("ctl"), CHILD(named, stats_locks_ctl)}
};
static const ctl_named_node_t stats_node[] = {
{NAME("allocated"), CTL(stats_allocated)},
{NAME("active"), CTL(stats_active)},
@ -500,8 +526,10 @@ static const ctl_named_node_t stats_node[] = {
{NAME("resident"), CTL(stats_resident)},
{NAME("mapped"), CTL(stats_mapped)},
{NAME("retained"), CTL(stats_retained)},
{NAME("locks"), CHILD(named, stats_locks)},
{NAME("arenas"), CHILD(indexed, stats_arenas)}
};
#undef LOCK_PROF_DATA_NODE
static const ctl_named_node_t root_node[] = {
{NAME("version"), CTL(version)},
@ -925,6 +953,17 @@ ctl_refresh(tsdn_t *tsdn) {
&ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
ctl_stats->retained = atomic_load_zu(
&ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
#define READ_GLOBAL_MUTEX_PROF_DATA(mtx, data) \
malloc_mutex_lock(tsdn, &mtx); \
malloc_lock_prof_read(tsdn, &ctl_stats->data, &mtx); \
malloc_mutex_unlock(tsdn, &mtx);
READ_GLOBAL_MUTEX_PROF_DATA(b0get()->mtx, base_mtx_data);
READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
/* We own ctl mutex already. */
malloc_lock_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
#undef READ_GLOBAL_MUTEX_PROF_DATA
}
ctl_arenas->epoch++;
}
@ -2413,25 +2452,34 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
uint64_t) /* Intentional. */
/* Lock profiling related APIs below. */
#define ARENAS_LOCK_CTL_GEN(l, s, d) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_ops, \
arenas_i(mib[2])->astats->s.d.n_lock_ops, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_wait, \
arenas_i(mib[2])->astats->s.d.n_wait_times, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_spin_acq, \
arenas_i(mib[2])->astats->s.d.n_spin_acquired, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_owner_switch, \
arenas_i(mib[2])->astats->s.d.n_owner_switches, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_total_wait_time, \
arenas_i(mib[2])->astats->s.d.tot_wait_time, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_wait_time, \
arenas_i(mib[2])->astats->s.d.max_wait_time, uint64_t) \
CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_num_thds, \
arenas_i(mib[2])->astats->s.d.max_n_thds, uint64_t)
#define RO_LOCK_CTL_GEN(n, l) \
CTL_RO_CGEN(config_stats, stats_##n##_num_ops, \
l.n_lock_ops, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_num_wait, \
l.n_wait_times, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq, \
l.n_spin_acquired, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch, \
l.n_owner_switches, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time, \
l.tot_wait_time, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time, \
l.max_wait_time, uint64_t) \
CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds, \
l.max_n_thds, uint64_t)
/* Global lock. */
#define MTX(mutex) RO_LOCK_CTL_GEN(locks_##mutex, ctl_stats->mutex##_mtx_data)
GLOBAL_PROF_MUTEXES
#undef MTX
/* arena->bins[j].lock */
RO_LOCK_CTL_GEN(arenas_i_bins_j_lock,
arenas_i(mib[2])->astats->bstats[mib[4]].lock_data)
/* Per arena locks */
#define ARENAS_ASTATS_LOCK_CTL_GEN(l, d) \
ARENAS_LOCK_CTL_GEN(locks_##l, astats, d)
RO_LOCK_CTL_GEN(arenas_i_locks_##l, arenas_i(mib[2])->astats->astats.d)
/* arena->large_mtx */
ARENAS_ASTATS_LOCK_CTL_GEN(large, large_mtx_data)
/* arena->extent_freelist_mtx */
@ -2444,8 +2492,8 @@ ARENAS_ASTATS_LOCK_CTL_GEN(extents_retained, extents_retained_mtx_data)
ARENAS_ASTATS_LOCK_CTL_GEN(decay, decay_mtx_data)
/* arena->tcache_ql_mtx */
ARENAS_ASTATS_LOCK_CTL_GEN(tcache, tcache_mtx_data)
/* arena->bins[j].lock */
ARENAS_LOCK_CTL_GEN(bins_j_lock, bstats[mib[4]], lock_data)
#undef ARENAS_ASTATS_LOCK_CTL_GEN
#undef RO_LOCK_CTL_GEN
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)

View File

@ -78,7 +78,8 @@ static malloc_mutex_t *tdata_locks;
* structure that knows about all backtraces currently captured.
*/
static ckh_t bt2gctx;
static malloc_mutex_t bt2gctx_mtx;
/* Non static to enable profiling. */
malloc_mutex_t bt2gctx_mtx;
/*
* Tree of all extant prof_tdata_t structures, regardless of state,

View File

@ -127,6 +127,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
size_t);
/* Output less info for bin locks to save space. */
uint64_t num_ops, num_wait, max_wait;
CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait", i, j,
&num_wait, uint64_t);
@ -290,8 +291,9 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
}
static void
gen_ctl_str(char *str, const char *lock, const char *counter) {
sprintf(str, "stats.arenas.0.locks.%s.%s", lock, counter);
gen_lock_ctl_str(char *str, const char *prefix, const char *lock,
const char *counter) {
sprintf(str, "stats.%s.%s.%s", prefix, lock, counter);
}
static void read_arena_lock_stats(unsigned arena_ind,
@ -301,13 +303,34 @@ static void read_arena_lock_stats(unsigned arena_ind,
unsigned i, j;
for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
gen_ctl_str(cmd, arena_lock_names[i],
lock_counter_names[j]);
gen_lock_ctl_str(cmd, "arenas.0.locks",
arena_lock_names[i], lock_counter_names[j]);
CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t);
}
}
}
static void lock_stats_output(void (*write_cb)(void *, const char *),
void *cbopaque, const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
bool first_mutex) {
if (first_mutex) {
/* Print title. */
malloc_cprintf(write_cb, cbopaque,
" n_lock_ops n_waiting"
" n_spin_acq n_owner_switch total_wait_ns"
" max_wait_ns max_n_wait_thds\n");
}
malloc_cprintf(write_cb, cbopaque, "%s", name);
malloc_cprintf(write_cb, cbopaque, ":%*c",
(int)(19 - strlen(name)), ' ');
for (unsigned i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
malloc_cprintf(write_cb, cbopaque, " %16"FMTu64, stats[i]);
}
malloc_cprintf(write_cb, cbopaque, "\n");
}
static void
stats_arena_locks_print(void (*write_cb)(void *, const char *),
void *cbopaque, bool json, unsigned arena_ind) {
@ -318,23 +341,9 @@ stats_arena_locks_print(void (*write_cb)(void *, const char *),
if (json) {
//TODO
} else {
malloc_cprintf(write_cb, cbopaque,
" n_lock_ops n_waiting"
" n_spin_acq n_owner_switch total_wait_ns"
" max_wait_ns max_n_wait_thds\n");
unsigned i, j;
for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
malloc_cprintf(write_cb, cbopaque,
"%s", arena_lock_names[i]);
malloc_cprintf(write_cb, cbopaque, ":%*c",
(int)(18 - strlen(arena_lock_names[i])), ' ');
for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
malloc_cprintf(write_cb, cbopaque, " %15"FMTu64,
lock_stats[i][j]);
}
malloc_cprintf(write_cb, cbopaque, "\n");
for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
lock_stats_output(write_cb, cbopaque,
arena_lock_names[i], lock_stats[i], i == 0);
}
}
@ -930,6 +939,20 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
}
static void read_global_lock_stats(
uint64_t results[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
char cmd[128];
unsigned i, j;
for (i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
gen_lock_ctl_str(cmd, "locks", global_lock_names[i],
lock_counter_names[j]);
CTL_GET(cmd, &results[i][j], uint64_t);
}
}
}
static void
stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
bool json, bool merged, bool destroyed, bool unmerged, bool bins,
@ -942,6 +965,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("stats.resident", &resident, size_t);
CTL_GET("stats.mapped", &mapped, size_t);
CTL_GET("stats.retained", &retained, size_t);
uint64_t lock_stats[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
read_global_lock_stats(lock_stats);
if (json) {
malloc_cprintf(write_cb, cbopaque,
"\t\t\"stats\": {\n");
@ -966,6 +993,11 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
"Allocated: %zu, active: %zu, metadata: %zu,"
" resident: %zu, mapped: %zu, retained: %zu\n",
allocated, active, metadata, resident, mapped, retained);
for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
lock_stats_output(write_cb, cbopaque,
global_lock_names[i], lock_stats[i], i == 0);
}
}
if (merged || destroyed || unmerged) {