Make cumulative heap profile data optional.
Add the R option to control whether cumulative heap profile data are maintained. Add the T option to control the size of per thread backtrace caches, primarily because when the R option is specified, backtraces that no longer have allocations associated with them are discarded as soon as no thread caches refer to them.
This commit is contained in:
parent
4d5c09905e
commit
a881cd2c61
@ -484,6 +484,12 @@ will disable dirty page purging.
|
||||
@roff_prof@.Dq S
|
||||
@roff_prof@option for probabilistic sampling control.
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq R
|
||||
@roff_prof@option for control of cumulative sample reporting.
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq T
|
||||
@roff_prof@option for control of per thread backtrace caching.
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq I
|
||||
@roff_prof@option for information on interval-triggered profile dumping, and the
|
||||
@roff_prof@.Dq U
|
||||
@ -595,6 +601,18 @@ Double/halve the size of the maximum size class that is a multiple of the
|
||||
quantum (8 or 16 bytes, depending on architecture).
|
||||
Above this size, cacheline spacing is used for size classes.
|
||||
The default value is 128 bytes.
|
||||
@roff_prof@.It R
|
||||
@roff_prof@Enable/disable reporting of cumulative object/byte counts in profile
|
||||
@roff_prof@dumps.
|
||||
@roff_prof@If this option is enabled, every unique backtrace must be stored for
|
||||
@roff_prof@the duration of execution.
|
||||
@roff_prof@Depending on the application, this can impose a large memory
|
||||
@roff_prof@overhead, and the cumulative counts are not always of interest.
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq T
|
||||
@roff_prof@option for control of per thread backtrace caching, which has
|
||||
@roff_prof@important interactions.
|
||||
@roff_prof@This option is enabled by default.
|
||||
@roff_prof@.It S
|
||||
@roff_prof@Double/halve the average interval between allocation samples, as
|
||||
@roff_prof@measured in bytes of allocation activity.
|
||||
@ -602,6 +620,22 @@ The default value is 128 bytes.
|
||||
@roff_prof@also decreases the computational overhead.
|
||||
@roff_prof@The default sample interval is one (i.e. all allocations are
|
||||
@roff_prof@sampled).
|
||||
@roff_prof@.It T
|
||||
@roff_prof@Double/halve the maximum per thread backtrace cache used for heap
|
||||
@roff_prof@profiling.
|
||||
@roff_prof@A backtrace can only be discarded if the
|
||||
@roff_prof@.Dq R
|
||||
@roff_prof@option is disabled, and no thread caches currently refer to the
|
||||
@roff_prof@backtrace.
|
||||
@roff_prof@Therefore, a backtrace cache limit should be imposed if the
|
||||
@roff_prof@intention is to limit how much memory is used by backtraces.
|
||||
@roff_prof@By default, no limit is imposed.
|
||||
@roff_prof@This is internally encoded as (1 << -1), and each
|
||||
@roff_prof@.Dq T
|
||||
@roff_prof@that is specified increments the shift amount.
|
||||
@roff_prof@Therefore, e.g.
|
||||
@roff_prof@.Ev JEMALLOC_OPTIONS=11T
|
||||
@roff_prof@specifies a backtrace cache limit of 1024 backtraces.
|
||||
@roff_prof@.It U
|
||||
@roff_prof@Trigger a memory profile dump every time the total virtual memory
|
||||
@roff_prof@exceeds the previous maximum.
|
||||
@ -992,6 +1026,27 @@ option.
|
||||
@roff_prof@option.
|
||||
@roff_prof@.Ed
|
||||
.\"-----------------------------------------------------------------------------
|
||||
@roff_prof@.It Sy "opt.prof_accum (bool) r-"
|
||||
@roff_prof@.Bd -ragged -offset indent -compact
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq R
|
||||
@roff_prof@option.
|
||||
@roff_prof@.Ed
|
||||
.\"-----------------------------------------------------------------------------
|
||||
@roff_prof@.It Sy "opt.lg_prof_tcmax (ssize_t) r-"
|
||||
@roff_prof@.Bd -ragged -offset indent -compact
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq T
|
||||
@roff_prof@option.
|
||||
@roff_prof@.Ed
|
||||
.\"-----------------------------------------------------------------------------
|
||||
@roff_prof@.It Sy "opt.lg_prof_sample (ssize_t) r-"
|
||||
@roff_prof@.Bd -ragged -offset indent -compact
|
||||
@roff_prof@See the
|
||||
@roff_prof@.Dq S
|
||||
@roff_prof@option.
|
||||
@roff_prof@.Ed
|
||||
.\"-----------------------------------------------------------------------------
|
||||
@roff_prof@.It Sy "opt.lg_prof_interval (ssize_t) r-"
|
||||
@roff_prof@.Bd -ragged -offset indent -compact
|
||||
@roff_prof@See the
|
||||
|
@ -24,6 +24,7 @@ extern bool isthreaded;
|
||||
#endif
|
||||
|
||||
bool malloc_mutex_init(malloc_mutex_t *mutex);
|
||||
void malloc_mutex_destroy(malloc_mutex_t *mutex);
|
||||
|
||||
#endif /* JEMALLOC_H_EXTERNS */
|
||||
/******************************************************************************/
|
||||
|
@ -6,12 +6,13 @@ typedef struct prof_bt_s prof_bt_t;
|
||||
typedef struct prof_cnt_s prof_cnt_t;
|
||||
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
|
||||
typedef struct prof_ctx_s prof_ctx_t;
|
||||
typedef struct prof_s prof_t;
|
||||
typedef struct prof_tcache_s prof_tcache_t;
|
||||
|
||||
/* Option defaults. */
|
||||
#define LG_PROF_BT_MAX_DEFAULT 2
|
||||
#define LG_PROF_SAMPLE_DEFAULT 0
|
||||
#define LG_PROF_INTERVAL_DEFAULT -1
|
||||
#define LG_PROF_TCMAX_DEFAULT -1
|
||||
|
||||
/*
|
||||
* Hard limit on stack backtrace depth. Note that the version of
|
||||
@ -41,9 +42,9 @@ struct prof_bt_s {
|
||||
#ifdef JEMALLOC_PROF_LIBGCC
|
||||
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
|
||||
typedef struct {
|
||||
prof_bt_t *bt;
|
||||
unsigned nignore;
|
||||
unsigned max;
|
||||
prof_bt_t *bt;
|
||||
unsigned nignore;
|
||||
unsigned max;
|
||||
} prof_unwind_data_t;
|
||||
#endif
|
||||
|
||||
@ -51,11 +52,11 @@ struct prof_cnt_s {
|
||||
/*
|
||||
* Profiling counters. An allocation/deallocation pair can operate on
|
||||
* different prof_thr_cnt_t objects that are linked into the same
|
||||
* prof_ctx_t sets_ql, so it is possible for the cur* counters to go
|
||||
* prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
|
||||
* negative. In principle it is possible for the *bytes counters to
|
||||
* overflow/underflow, but a general solution would require some form
|
||||
* of 128-bit counter solution; this implementation doesn't bother to
|
||||
* solve that problem.
|
||||
* overflow/underflow, but a general solution would require something
|
||||
* like 128-bit counters; this implementation doesn't bother to solve
|
||||
* that problem.
|
||||
*/
|
||||
int64_t curobjs;
|
||||
int64_t curbytes;
|
||||
@ -64,15 +65,18 @@ struct prof_cnt_s {
|
||||
};
|
||||
|
||||
struct prof_thr_cnt_s {
|
||||
/* Linkage into prof_ctx_t's sets_ql. */
|
||||
ql_elm(prof_thr_cnt_t) link;
|
||||
/* Linkage into prof_ctx_t's cnts_ql. */
|
||||
ql_elm(prof_thr_cnt_t) cnts_link;
|
||||
|
||||
/* Linkage into thread's LRU. */
|
||||
ql_elm(prof_thr_cnt_t) lru_link;
|
||||
|
||||
/*
|
||||
* Associated context. If a thread frees an object that it did not
|
||||
* allocate, it is possible that the context is not cached in the
|
||||
* thread's hash table, in which case it must be able to look up the
|
||||
* context, insert a new prof_thr_cnt_t into the thread's hash table,
|
||||
* and link it into the prof_ctx_t's sets_ql.
|
||||
* and link it into the prof_ctx_t's cnts_ql.
|
||||
*/
|
||||
prof_ctx_t *ctx;
|
||||
|
||||
@ -101,11 +105,11 @@ struct prof_ctx_s {
|
||||
/* Associated backtrace. */
|
||||
prof_bt_t *bt;
|
||||
|
||||
/* Protects cnt_merged and sets_ql. */
|
||||
/* Protects cnt_merged and cnts_ql. */
|
||||
malloc_mutex_t lock;
|
||||
|
||||
/* Temporary storage for aggregation during dump. */
|
||||
prof_cnt_t cnt_dump;
|
||||
/* Temporary storage for summation during dump. */
|
||||
prof_cnt_t cnt_summed;
|
||||
|
||||
/* When threads exit, they merge their stats into cnt_merged. */
|
||||
prof_cnt_t cnt_merged;
|
||||
@ -117,6 +121,24 @@ struct prof_ctx_s {
|
||||
ql_head(prof_thr_cnt_t) cnts_ql;
|
||||
};
|
||||
|
||||
/*
|
||||
* Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread
|
||||
* keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t
|
||||
* objects. Other threads may read the prof_thr_cnt_t contents, but no others
|
||||
* will ever write them.
|
||||
*
|
||||
* Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data
|
||||
* into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
|
||||
* objects.
|
||||
*/
|
||||
struct prof_tcache_s {
|
||||
/* (prof_bt_t *)-->(prof_thr_cnt_t *). */
|
||||
ckh_t bt2cnt;
|
||||
|
||||
/* LRU for contents of bt2cnt. */
|
||||
ql_head(prof_thr_cnt_t) lru_ql;
|
||||
};
|
||||
|
||||
#endif /* JEMALLOC_H_STRUCTS */
|
||||
/******************************************************************************/
|
||||
#ifdef JEMALLOC_H_EXTERNS
|
||||
@ -129,11 +151,13 @@ extern bool opt_prof;
|
||||
* to notice state changes.
|
||||
*/
|
||||
extern bool opt_prof_active;
|
||||
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
|
||||
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
||||
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
|
||||
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
||||
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
|
||||
extern bool opt_prof_udump; /* High-water memory dumping. */
|
||||
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||
extern bool opt_prof_udump; /* High-water memory dumping. */
|
||||
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||
extern bool opt_prof_accum; /* Report cumulative bytes. */
|
||||
extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */
|
||||
|
||||
/*
|
||||
* Profile dump interval, measured in bytes allocated. Each arena triggers a
|
||||
@ -150,9 +174,6 @@ extern uint64_t prof_interval;
|
||||
*/
|
||||
extern bool prof_promote;
|
||||
|
||||
bool prof_init(prof_t *prof, bool master);
|
||||
void prof_destroy(prof_t *prof);
|
||||
|
||||
prof_thr_cnt_t *prof_alloc_prep(size_t size);
|
||||
prof_ctx_t *prof_ctx_get(const void *ptr);
|
||||
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
|
||||
|
@ -82,6 +82,8 @@ CTL_PROTO(opt_lg_prof_sample)
|
||||
CTL_PROTO(opt_lg_prof_interval)
|
||||
CTL_PROTO(opt_prof_udump)
|
||||
CTL_PROTO(opt_prof_leak)
|
||||
CTL_PROTO(opt_prof_accum)
|
||||
CTL_PROTO(opt_lg_prof_tcmax)
|
||||
#endif
|
||||
CTL_PROTO(opt_stats_print)
|
||||
CTL_PROTO(opt_lg_qspace_max)
|
||||
@ -260,6 +262,8 @@ static const ctl_node_t opt_node[] = {
|
||||
{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
|
||||
{NAME("prof_udump"), CTL(opt_prof_udump)},
|
||||
{NAME("prof_leak"), CTL(opt_prof_leak)},
|
||||
{NAME("prof_accum"), CTL(opt_prof_accum)},
|
||||
{NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)},
|
||||
#endif
|
||||
{NAME("stats_print"), CTL(opt_stats_print)},
|
||||
{NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)},
|
||||
@ -1207,6 +1211,8 @@ CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
|
||||
CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
|
||||
CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
|
||||
CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
|
||||
CTL_RO_GEN(opt_prof_accum, opt_prof_accum, bool)
|
||||
CTL_RO_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t)
|
||||
#endif
|
||||
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
|
||||
CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
|
||||
|
@ -512,6 +512,12 @@ MALLOC_OUT:
|
||||
opt_lg_qspace_max++;
|
||||
break;
|
||||
#ifdef JEMALLOC_PROF
|
||||
case 'r':
|
||||
opt_prof_accum = false;
|
||||
break;
|
||||
case 'R':
|
||||
opt_prof_accum = true;
|
||||
break;
|
||||
case 's':
|
||||
if (opt_lg_prof_sample > 0)
|
||||
opt_lg_prof_sample--;
|
||||
@ -521,6 +527,15 @@ MALLOC_OUT:
|
||||
(sizeof(uint64_t) << 3))
|
||||
opt_lg_prof_sample++;
|
||||
break;
|
||||
case 't':
|
||||
if (opt_lg_prof_tcmax >= 0)
|
||||
opt_lg_prof_tcmax--;
|
||||
break;
|
||||
case 'T':
|
||||
if (opt_lg_prof_tcmax + 1 <
|
||||
(sizeof(size_t) << 3))
|
||||
opt_lg_prof_tcmax++;
|
||||
break;
|
||||
case 'u':
|
||||
opt_prof_udump = false;
|
||||
break;
|
||||
|
@ -72,3 +72,13 @@ malloc_mutex_init(malloc_mutex_t *mutex)
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
void
|
||||
malloc_mutex_destroy(malloc_mutex_t *mutex)
|
||||
{
|
||||
|
||||
if (pthread_mutex_destroy(mutex) != 0) {
|
||||
malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
@ -24,47 +24,41 @@ size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
|
||||
ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
|
||||
bool opt_prof_udump = false;
|
||||
bool opt_prof_leak = false;
|
||||
bool opt_prof_accum = true;
|
||||
ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
|
||||
|
||||
uint64_t prof_interval;
|
||||
bool prof_promote;
|
||||
|
||||
/*
|
||||
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
|
||||
* structure that knows about all backtraces ever captured.
|
||||
* structure that knows about all backtraces currently captured.
|
||||
*/
|
||||
static ckh_t bt2ctx;
|
||||
static malloc_mutex_t bt2ctx_mtx;
|
||||
|
||||
/*
|
||||
* Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread
|
||||
* keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t
|
||||
* objects. Other threads may read the prof_thr_cnt_t contents, but no others
|
||||
* will ever write them.
|
||||
*
|
||||
* Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data
|
||||
* into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
|
||||
* objects.
|
||||
*/
|
||||
/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
|
||||
#ifndef NO_TLS
|
||||
static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
# define BT2CNT_GET() bt2cnt_tls
|
||||
# define BT2CNT_SET(v) do { \
|
||||
bt2cnt_tls = (v); \
|
||||
pthread_setspecific(bt2cnt_tsd, (void *)(v)); \
|
||||
static __thread prof_tcache_t *prof_tcache_tls
|
||||
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
# define PROF_TCACHE_GET() prof_tcache_tls
|
||||
# define PROF_TCACHE_SET(v) do { \
|
||||
prof_tcache_tls = (v); \
|
||||
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
|
||||
} while (0)
|
||||
#else
|
||||
# define BT2CNT_GET() ((ckh_t *)pthread_getspecific(bt2cnt_tsd))
|
||||
# define BT2CNT_SET(v) do { \
|
||||
pthread_setspecific(bt2cnt_tsd, (void *)(v)); \
|
||||
# define PROF_TCACHE_GET() ((ckh_t *)pthread_getspecific(prof_tcache_tsd))
|
||||
# define PROF_TCACHE_SET(v) do { \
|
||||
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Same contents as b2cnt_tls, but initialized such that the TSD destructor is
|
||||
* called when a thread exits, so that bt2cnt_tls contents can be merged,
|
||||
* called when a thread exits, so that prof_tcache_tls contents can be merged,
|
||||
* unlinked, and deallocated.
|
||||
*/
|
||||
static pthread_key_t bt2cnt_tsd;
|
||||
static pthread_key_t prof_tcache_tsd;
|
||||
|
||||
/* (1U << opt_lg_prof_bt_max). */
|
||||
static unsigned prof_bt_max;
|
||||
@ -137,6 +131,7 @@ static bool enq_udump;
|
||||
|
||||
static prof_bt_t *bt_dup(prof_bt_t *bt);
|
||||
static void bt_init(prof_bt_t *bt, void **vec);
|
||||
static void bt_destroy(prof_bt_t *bt);
|
||||
#ifdef JEMALLOC_PROF_LIBGCC
|
||||
static _Unwind_Reason_Code prof_unwind_init_callback(
|
||||
struct _Unwind_Context *context, void *arg);
|
||||
@ -148,8 +143,10 @@ static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
|
||||
static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
||||
static bool prof_flush(bool propagate_err);
|
||||
static bool prof_write(const char *s, bool propagate_err);
|
||||
static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
|
||||
static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
|
||||
size_t *leak_nctx);
|
||||
static void prof_ctx_destroy(prof_ctx_t *ctx);
|
||||
static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
|
||||
static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
|
||||
bool propagate_err);
|
||||
static bool prof_dump_maps(bool propagate_err);
|
||||
@ -160,7 +157,7 @@ static void prof_fdump(void);
|
||||
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
|
||||
size_t *hash2);
|
||||
static bool prof_bt_keycomp(const void *k1, const void *k2);
|
||||
static void bt2cnt_thread_cleanup(void *arg);
|
||||
static void prof_tcache_cleanup(void *arg);
|
||||
#ifdef NO_TLS
|
||||
static void prof_sample_state_thread_cleanup(void *arg);
|
||||
#endif
|
||||
@ -175,6 +172,13 @@ bt_init(prof_bt_t *bt, void **vec)
|
||||
bt->len = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bt_destroy(prof_bt_t *bt)
|
||||
{
|
||||
|
||||
idalloc(bt);
|
||||
}
|
||||
|
||||
static prof_bt_t *
|
||||
bt_dup(prof_bt_t *bt)
|
||||
{
|
||||
@ -487,23 +491,25 @@ prof_lookup(prof_bt_t *bt)
|
||||
prof_thr_cnt_t *p;
|
||||
void *v;
|
||||
} ret;
|
||||
ckh_t *bt2cnt = BT2CNT_GET();
|
||||
prof_tcache_t *prof_tcache = PROF_TCACHE_GET();
|
||||
|
||||
if (bt2cnt == NULL) {
|
||||
if (prof_tcache == NULL) {
|
||||
/* Initialize an empty cache for this thread. */
|
||||
bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t));
|
||||
if (bt2cnt == NULL)
|
||||
prof_tcache = (prof_tcache_t *)imalloc(sizeof(prof_tcache_t));
|
||||
if (prof_tcache == NULL)
|
||||
return (NULL);
|
||||
if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash,
|
||||
prof_bt_keycomp)) {
|
||||
idalloc(bt2cnt);
|
||||
|
||||
if (ckh_new(&prof_tcache->bt2cnt, PROF_CKH_MINITEMS,
|
||||
prof_bt_hash, prof_bt_keycomp)) {
|
||||
idalloc(prof_tcache);
|
||||
return (NULL);
|
||||
}
|
||||
ql_new(&prof_tcache->lru_ql);
|
||||
|
||||
BT2CNT_SET(bt2cnt);
|
||||
PROF_TCACHE_SET(prof_tcache);
|
||||
}
|
||||
|
||||
if (ckh_search(bt2cnt, bt, NULL, &ret.v)) {
|
||||
if (ckh_search(&prof_tcache->bt2cnt, bt, NULL, &ret.v)) {
|
||||
union {
|
||||
prof_bt_t *p;
|
||||
void *v;
|
||||
@ -519,7 +525,6 @@ prof_lookup(prof_bt_t *bt)
|
||||
*/
|
||||
prof_enter();
|
||||
if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
|
||||
|
||||
/* bt has never been seen before. Insert it. */
|
||||
ctx.v = imalloc(sizeof(prof_ctx_t));
|
||||
if (ctx.v == NULL) {
|
||||
@ -544,28 +549,60 @@ prof_lookup(prof_bt_t *bt)
|
||||
if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
|
||||
/* OOM. */
|
||||
prof_leave();
|
||||
malloc_mutex_destroy(&ctx.p->lock);
|
||||
idalloc(btkey.v);
|
||||
idalloc(ctx.v);
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Acquire ctx's lock before releasing bt2ctx_mtx, in order to
|
||||
* avoid a race condition with prof_ctx_destroy().
|
||||
*/
|
||||
malloc_mutex_lock(&ctx.p->lock);
|
||||
prof_leave();
|
||||
|
||||
/* Link a prof_thd_cnt_t into ctx for this thread. */
|
||||
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
||||
if (ret.p == NULL)
|
||||
return (NULL);
|
||||
ql_elm_new(ret.p, link);
|
||||
if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tcache->bt2cnt)
|
||||
== (ZU(1) << opt_lg_prof_tcmax)) {
|
||||
assert(ckh_count(&prof_tcache->bt2cnt) > 0);
|
||||
/*
|
||||
* Flush the least least recently used cnt in order to
|
||||
* keep bt2cnt from becoming too large.
|
||||
*/
|
||||
ret.p = ql_last(&prof_tcache->lru_ql, lru_link);
|
||||
assert(ret.v != NULL);
|
||||
ckh_remove(&prof_tcache->bt2cnt, ret.p->ctx->bt, NULL,
|
||||
NULL);
|
||||
ql_remove(&prof_tcache->lru_ql, ret.p, lru_link);
|
||||
prof_ctx_merge(ret.p->ctx, ret.p);
|
||||
/* ret can now be re-used. */
|
||||
} else {
|
||||
assert(opt_lg_prof_tcmax < 0 ||
|
||||
ckh_count(&prof_tcache->bt2cnt) < (ZU(1) <<
|
||||
opt_lg_prof_tcmax));
|
||||
/* Allocate and partially initialize a new cnt. */
|
||||
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
||||
if (ret.p == NULL)
|
||||
return (NULL);
|
||||
ql_elm_new(ret.p, cnts_link);
|
||||
ql_elm_new(ret.p, lru_link);
|
||||
}
|
||||
/* Finish initializing ret. */
|
||||
ret.p->ctx = ctx.p;
|
||||
ret.p->epoch = 0;
|
||||
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
||||
if (ckh_insert(bt2cnt, btkey.v, ret.v)) {
|
||||
if (ckh_insert(&prof_tcache->bt2cnt, btkey.v, ret.v)) {
|
||||
idalloc(ret.v);
|
||||
return (NULL);
|
||||
}
|
||||
malloc_mutex_lock(&ctx.p->lock);
|
||||
ql_tail_insert(&ctx.p->cnts_ql, ret.p, link);
|
||||
ql_head_insert(&prof_tcache->lru_ql, ret.p, lru_link);
|
||||
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
|
||||
malloc_mutex_unlock(&ctx.p->lock);
|
||||
} else {
|
||||
/* Move ret to the front of the LRU. */
|
||||
ql_remove(&prof_tcache->lru_ql, ret.p, lru_link);
|
||||
ql_head_insert(&prof_tcache->lru_ql, ret.p, lru_link);
|
||||
}
|
||||
|
||||
return (ret.p);
|
||||
@ -729,8 +766,10 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
|
||||
/*********/
|
||||
cnt->cnts.curobjs++;
|
||||
cnt->cnts.curbytes += size;
|
||||
cnt->cnts.accumobjs++;
|
||||
cnt->cnts.accumbytes += size;
|
||||
if (opt_prof_accum) {
|
||||
cnt->cnts.accumobjs++;
|
||||
cnt->cnts.accumbytes += size;
|
||||
}
|
||||
/*********/
|
||||
mb_write();
|
||||
/*********/
|
||||
@ -796,8 +835,10 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
||||
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||
cnt->cnts.curobjs++;
|
||||
cnt->cnts.curbytes += size;
|
||||
cnt->cnts.accumobjs++;
|
||||
cnt->cnts.accumbytes += size;
|
||||
if (opt_prof_accum) {
|
||||
cnt->cnts.accumobjs++;
|
||||
cnt->cnts.accumbytes += size;
|
||||
}
|
||||
}
|
||||
/*********/
|
||||
mb_write();
|
||||
@ -896,15 +937,15 @@ prof_write(const char *s, bool propagate_err)
|
||||
}
|
||||
|
||||
static void
|
||||
prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
|
||||
prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
|
||||
{
|
||||
prof_thr_cnt_t *thr_cnt;
|
||||
prof_cnt_t tcnt;
|
||||
|
||||
malloc_mutex_lock(&ctx->lock);
|
||||
|
||||
memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t));
|
||||
ql_foreach(thr_cnt, &ctx->cnts_ql, link) {
|
||||
memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
|
||||
ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
|
||||
volatile unsigned *epoch = &thr_cnt->epoch;
|
||||
|
||||
while (true) {
|
||||
@ -921,39 +962,101 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->cnt_dump.curobjs += tcnt.curobjs;
|
||||
ctx->cnt_dump.curbytes += tcnt.curbytes;
|
||||
ctx->cnt_dump.accumobjs += tcnt.accumobjs;
|
||||
ctx->cnt_dump.accumbytes += tcnt.accumbytes;
|
||||
ctx->cnt_summed.curobjs += tcnt.curobjs;
|
||||
ctx->cnt_summed.curbytes += tcnt.curbytes;
|
||||
if (opt_prof_accum) {
|
||||
ctx->cnt_summed.accumobjs += tcnt.accumobjs;
|
||||
ctx->cnt_summed.accumbytes += tcnt.accumbytes;
|
||||
}
|
||||
|
||||
if (tcnt.curobjs != 0)
|
||||
(*leak_nctx)++;
|
||||
}
|
||||
|
||||
/* Merge into cnt_all. */
|
||||
cnt_all->curobjs += ctx->cnt_dump.curobjs;
|
||||
cnt_all->curbytes += ctx->cnt_dump.curbytes;
|
||||
cnt_all->accumobjs += ctx->cnt_dump.accumobjs;
|
||||
cnt_all->accumbytes += ctx->cnt_dump.accumbytes;
|
||||
/* Add to cnt_all. */
|
||||
cnt_all->curobjs += ctx->cnt_summed.curobjs;
|
||||
cnt_all->curbytes += ctx->cnt_summed.curbytes;
|
||||
if (opt_prof_accum) {
|
||||
cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
|
||||
cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
|
||||
}
|
||||
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
prof_ctx_destroy(prof_ctx_t *ctx)
|
||||
{
|
||||
|
||||
/*
|
||||
* Check that ctx is still unused by any thread cache before destroying
|
||||
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
|
||||
* avoid a race condition with this function.
|
||||
*/
|
||||
prof_enter();
|
||||
malloc_mutex_lock(&ctx->lock);
|
||||
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) {
|
||||
assert(ctx->cnt_merged.curbytes == 0);
|
||||
assert(ctx->cnt_merged.accumobjs == 0);
|
||||
assert(ctx->cnt_merged.accumbytes == 0);
|
||||
/* Remove ctx from bt2ctx. */
|
||||
ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
|
||||
prof_leave();
|
||||
/* Destroy ctx. */
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
bt_destroy(ctx->bt);
|
||||
malloc_mutex_destroy(&ctx->lock);
|
||||
idalloc(ctx);
|
||||
} else {
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
prof_leave();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
|
||||
{
|
||||
bool destroy;
|
||||
|
||||
/* Merge cnt stats and detach from ctx. */
|
||||
malloc_mutex_lock(&ctx->lock);
|
||||
ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
|
||||
ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
|
||||
ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
|
||||
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
|
||||
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
|
||||
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
|
||||
ctx->cnt_merged.curobjs == 0)
|
||||
destroy = true;
|
||||
else
|
||||
destroy = false;
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
if (destroy)
|
||||
prof_ctx_destroy(ctx);
|
||||
}
|
||||
|
||||
static bool
|
||||
prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
|
||||
{
|
||||
char buf[UMAX2S_BUFSIZE];
|
||||
unsigned i;
|
||||
|
||||
if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err)
|
||||
if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
|
||||
assert(ctx->cnt_summed.curbytes == 0);
|
||||
assert(ctx->cnt_summed.accumobjs == 0);
|
||||
assert(ctx->cnt_summed.accumbytes == 0);
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (prof_write(umax2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
|
||||
|| prof_write(": ", propagate_err)
|
||||
|| prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf),
|
||||
|| prof_write(umax2s(ctx->cnt_summed.curbytes, 10, buf),
|
||||
propagate_err)
|
||||
|| prof_write(" [", propagate_err)
|
||||
|| prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf),
|
||||
|| prof_write(umax2s(ctx->cnt_summed.accumobjs, 10, buf),
|
||||
propagate_err)
|
||||
|| prof_write(": ", propagate_err)
|
||||
|| prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf),
|
||||
|| prof_write(umax2s(ctx->cnt_summed.accumbytes, 10, buf),
|
||||
propagate_err)
|
||||
|| prof_write("] @", propagate_err))
|
||||
return (true);
|
||||
@ -1060,7 +1163,7 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err)
|
||||
leak_nctx = 0;
|
||||
for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v)
|
||||
== false;) {
|
||||
prof_ctx_merge(ctx.p, &cnt_all, &leak_nctx);
|
||||
prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
|
||||
}
|
||||
|
||||
/* Dump profile header. */
|
||||
@ -1319,54 +1422,33 @@ prof_bt_keycomp(const void *k1, const void *k2)
|
||||
}
|
||||
|
||||
static void
|
||||
bt2cnt_thread_cleanup(void *arg)
|
||||
prof_tcache_cleanup(void *arg)
|
||||
{
|
||||
ckh_t *bt2cnt;
|
||||
prof_tcache_t *prof_tcache;
|
||||
|
||||
bt2cnt = BT2CNT_GET();
|
||||
if (bt2cnt != NULL) {
|
||||
ql_head(prof_thr_cnt_t) cnts_ql;
|
||||
size_t tabind;
|
||||
union {
|
||||
prof_thr_cnt_t *p;
|
||||
void *v;
|
||||
} cnt;
|
||||
|
||||
/* Iteratively merge cnt's into the global stats. */
|
||||
ql_new(&cnts_ql);
|
||||
tabind = 0;
|
||||
while (ckh_iter(bt2cnt, &tabind, NULL, &cnt.v) ==
|
||||
false) {
|
||||
prof_ctx_t *ctx = cnt.p->ctx;
|
||||
/* Merge stats and detach from ctx. */
|
||||
malloc_mutex_lock(&ctx->lock);
|
||||
ctx->cnt_merged.curobjs += cnt.p->cnts.curobjs;
|
||||
ctx->cnt_merged.curbytes += cnt.p->cnts.curbytes;
|
||||
ctx->cnt_merged.accumobjs += cnt.p->cnts.accumobjs;
|
||||
ctx->cnt_merged.accumbytes += cnt.p->cnts.accumbytes;
|
||||
ql_remove(&ctx->cnts_ql, cnt.p, link);
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
|
||||
/*
|
||||
* Stash cnt for deletion after finishing with
|
||||
* ckh_iter().
|
||||
*/
|
||||
ql_tail_insert(&cnts_ql, cnt.p, link);
|
||||
}
|
||||
prof_tcache = PROF_TCACHE_GET();
|
||||
if (prof_tcache != NULL) {
|
||||
prof_thr_cnt_t *cnt;
|
||||
|
||||
/*
|
||||
* Delete the hash table now that cnts_ql has a list of all
|
||||
* cnt's.
|
||||
* Delete the hash table. All of its contents can still be
|
||||
* iterated over via the LRU.
|
||||
*/
|
||||
ckh_delete(bt2cnt);
|
||||
idalloc(bt2cnt);
|
||||
BT2CNT_SET(NULL);
|
||||
ckh_delete(&prof_tcache->bt2cnt);
|
||||
|
||||
/* Delete cnt's. */
|
||||
while ((cnt.p = ql_last(&cnts_ql, link)) != NULL) {
|
||||
ql_remove(&cnts_ql, cnt.p, link);
|
||||
idalloc(cnt.v);
|
||||
/*
|
||||
* Iteratively merge cnt's into the global stats and delete
|
||||
* them.
|
||||
*/
|
||||
while ((cnt = ql_last(&prof_tcache->lru_ql, lru_link)) !=
|
||||
NULL) {
|
||||
prof_ctx_merge(cnt->ctx, cnt);
|
||||
ql_remove(&prof_tcache->lru_ql, cnt, lru_link);
|
||||
idalloc(cnt);
|
||||
}
|
||||
|
||||
idalloc(prof_tcache);
|
||||
PROF_TCACHE_SET(NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1419,7 +1501,7 @@ prof_boot1(void)
|
||||
return (true);
|
||||
if (malloc_mutex_init(&bt2ctx_mtx))
|
||||
return (true);
|
||||
if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup)
|
||||
if (pthread_key_create(&prof_tcache_tsd, prof_tcache_cleanup)
|
||||
!= 0) {
|
||||
malloc_write(
|
||||
"<jemalloc>: Error in pthread_key_create()\n");
|
||||
|
@ -469,6 +469,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.stats_print", &bv, &bsz,
|
||||
NULL, 0)) == 0)
|
||||
write_cb(cbopaque, bv ? "P" : "p");
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.prof_accum", &bv, &bsz,
|
||||
NULL, 0)) == 0)
|
||||
write_cb(cbopaque, bv ? "R" : "r");
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.prof_udump", &bv, &bsz,
|
||||
NULL, 0)) == 0)
|
||||
write_cb(cbopaque, bv ? "U" : "u");
|
||||
@ -580,6 +583,17 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
write_cb(cbopaque, umax2s((1U << sv), 10, s));
|
||||
write_cb(cbopaque, "\n");
|
||||
|
||||
CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
|
||||
write_cb(cbopaque,
|
||||
"Maximum per thread backtrace cache: ");
|
||||
if (ssv >= 0) {
|
||||
write_cb(cbopaque, umax2s((1U << ssv), 10, s));
|
||||
write_cb(cbopaque, " (2^");
|
||||
write_cb(cbopaque, umax2s(ssv, 10, s));
|
||||
write_cb(cbopaque, ")\n");
|
||||
} else
|
||||
write_cb(cbopaque, "N/A\n");
|
||||
|
||||
CTL_GET("opt.lg_prof_sample", &sv, size_t);
|
||||
write_cb(cbopaque, "Average profile sample interval: ");
|
||||
write_cb(cbopaque, umax2s((1U << sv), 10, s));
|
||||
|
Loading…
Reference in New Issue
Block a user