Implement sampling for heap profiling.
This commit is contained in:
parent
f3ff75289b
commit
b9477e782b
@ -647,6 +647,7 @@ if test "x$enable_tls" = "x0" ; then
|
|||||||
enable_prof="0"
|
enable_prof="0"
|
||||||
fi
|
fi
|
||||||
if test "x$enable_prof" = "x1" ; then
|
if test "x$enable_prof" = "x1" ; then
|
||||||
|
LIBS="$LIBS -lm"
|
||||||
AC_DEFINE([JEMALLOC_PROF], [ ])
|
AC_DEFINE([JEMALLOC_PROF], [ ])
|
||||||
if test "x$enable_prof_libunwind" = "x1" ; then
|
if test "x$enable_prof_libunwind" = "x1" ; then
|
||||||
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
|
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
|
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
|
||||||
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
|
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
|
||||||
.\"
|
.\"
|
||||||
.Dd February 11, 2010
|
.Dd March 1, 2010
|
||||||
.Dt JEMALLOC 3
|
.Dt JEMALLOC 3
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
@ -355,6 +355,9 @@ will disable dirty page purging.
|
|||||||
@roff_prof@.Dq B
|
@roff_prof@.Dq B
|
||||||
@roff_prof@option for backtrace depth control.
|
@roff_prof@option for backtrace depth control.
|
||||||
@roff_prof@See the
|
@roff_prof@See the
|
||||||
|
@roff_prof@.Dq S
|
||||||
|
@roff_prof@option for probabilistic sampling control.
|
||||||
|
@roff_prof@See the
|
||||||
@roff_prof@.Dq I
|
@roff_prof@.Dq I
|
||||||
@roff_prof@option for information on interval-triggered profile dumping, and the
|
@roff_prof@option for information on interval-triggered profile dumping, and the
|
||||||
@roff_prof@.Dq U
|
@roff_prof@.Dq U
|
||||||
@ -464,6 +467,15 @@ Double/halve the size of the maximum size class that is a multiple of the
|
|||||||
quantum (8 or 16 bytes, depending on architecture).
|
quantum (8 or 16 bytes, depending on architecture).
|
||||||
Above this size, cacheline spacing is used for size classes.
|
Above this size, cacheline spacing is used for size classes.
|
||||||
The default value is 128 bytes.
|
The default value is 128 bytes.
|
||||||
|
@roff_prof@.It S
|
||||||
|
@roff_prof@Double/halve the average interval between allocation samples, as
|
||||||
|
@roff_prof@measured in bytes of allocation activity.
|
||||||
|
@roff_prof@Increasing the sampling interval decreases profile fidelity, but
|
||||||
|
@roff_prof@also decreases the computational overhead.
|
||||||
|
@roff_prof@The default sample interval is one (i.e. all allocations are
|
||||||
|
@roff_prof@sampled).
|
||||||
|
@roff_prof@A sample interval greater than one implicitly disables leak
|
||||||
|
@roff_prof@reporting.
|
||||||
@roff_prof@.It U
|
@roff_prof@.It U
|
||||||
@roff_prof@Trigger a memory profile dump every time the total virtual memory
|
@roff_prof@Trigger a memory profile dump every time the total virtual memory
|
||||||
@roff_prof@exceeds the previous maximum.
|
@roff_prof@exceeds the previous maximum.
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
* uint32_t state : Seed value.
|
* uint32_t state : Seed value.
|
||||||
* const uint32_t a, c : See above discussion.
|
* const uint32_t a, c : See above discussion.
|
||||||
*/
|
*/
|
||||||
#define prn(r, lg_range, state, a, c) do { \
|
#define prn32(r, lg_range, state, a, c) do { \
|
||||||
assert(lg_range > 0); \
|
assert(lg_range > 0); \
|
||||||
assert(lg_range <= 32); \
|
assert(lg_range <= 32); \
|
||||||
\
|
\
|
||||||
@ -34,6 +34,16 @@
|
|||||||
r >>= (32 - lg_range); \
|
r >>= (32 - lg_range); \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
|
/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */
|
||||||
|
#define prn64(r, lg_range, state, a, c) do { \
|
||||||
|
assert(lg_range > 0); \
|
||||||
|
assert(lg_range <= 64); \
|
||||||
|
\
|
||||||
|
r = (state * (a)) + (c); \
|
||||||
|
state = r; \
|
||||||
|
r >>= (64 - lg_range); \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
#endif /* JEMALLOC_H_TYPES */
|
#endif /* JEMALLOC_H_TYPES */
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
#ifdef JEMALLOC_H_STRUCTS
|
#ifdef JEMALLOC_H_STRUCTS
|
||||||
|
@ -8,6 +8,9 @@ typedef struct prof_thr_cnt_s prof_thr_cnt_t;
|
|||||||
typedef struct prof_ctx_s prof_ctx_t;
|
typedef struct prof_ctx_s prof_ctx_t;
|
||||||
typedef struct prof_s prof_t;
|
typedef struct prof_s prof_t;
|
||||||
|
|
||||||
|
/* Option defaults. */
|
||||||
|
#define LG_PROF_BT_MAX_DEFAULT 2
|
||||||
|
#define LG_PROF_SAMPLE_DEFAULT 0
|
||||||
#define LG_PROF_INTERVAL_DEFAULT 30
|
#define LG_PROF_INTERVAL_DEFAULT 30
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -16,7 +19,7 @@ typedef struct prof_s prof_t;
|
|||||||
* a hard-coded number of backtrace frame handlers, so increasing
|
* a hard-coded number of backtrace frame handlers, so increasing
|
||||||
* LG_PROF_BT_MAX requires changing prof_backtrace().
|
* LG_PROF_BT_MAX requires changing prof_backtrace().
|
||||||
*/
|
*/
|
||||||
#define LG_PROF_BT_MAX 7
|
#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
|
||||||
#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
|
#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
|
||||||
|
|
||||||
/* Initial hash table size. */
|
/* Initial hash table size. */
|
||||||
@ -117,7 +120,8 @@ struct prof_ctx_s {
|
|||||||
|
|
||||||
extern bool opt_prof;
|
extern bool opt_prof;
|
||||||
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
|
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
|
||||||
extern size_t opt_lg_prof_interval;
|
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
||||||
|
extern size_t opt_lg_prof_interval; /* lg(prof_interval). */
|
||||||
extern bool opt_prof_udump; /* High-water memory dumping. */
|
extern bool opt_prof_udump; /* High-water memory dumping. */
|
||||||
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||||
|
|
||||||
@ -133,7 +137,7 @@ extern uint64_t prof_interval;
|
|||||||
bool prof_init(prof_t *prof, bool master);
|
bool prof_init(prof_t *prof, bool master);
|
||||||
void prof_destroy(prof_t *prof);
|
void prof_destroy(prof_t *prof);
|
||||||
|
|
||||||
prof_thr_cnt_t *prof_alloc_prep(void);
|
prof_thr_cnt_t *prof_alloc_prep(size_t size);
|
||||||
prof_thr_cnt_t *prof_cnt_get(const void *ptr);
|
prof_thr_cnt_t *prof_cnt_get(const void *ptr);
|
||||||
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
|
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
|
||||||
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
||||||
|
@ -100,7 +100,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
|
|||||||
* Cycle through the cells in the bucket, starting at a random position.
|
* Cycle through the cells in the bucket, starting at a random position.
|
||||||
* The randomness avoids worst-case search overhead as buckets fill up.
|
* The randomness avoids worst-case search overhead as buckets fill up.
|
||||||
*/
|
*/
|
||||||
prn(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
|
prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
|
||||||
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
|
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
|
||||||
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
|
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
|
||||||
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
|
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
|
||||||
@ -142,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
|
|||||||
* were an item for which both hashes indicated the same
|
* were an item for which both hashes indicated the same
|
||||||
* bucket.
|
* bucket.
|
||||||
*/
|
*/
|
||||||
prn(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
|
prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
|
||||||
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
|
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
|
||||||
assert(cell->key != NULL);
|
assert(cell->key != NULL);
|
||||||
|
|
||||||
|
@ -69,6 +69,7 @@ CTL_PROTO(opt_lg_tcache_gc_sweep)
|
|||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
CTL_PROTO(opt_prof)
|
CTL_PROTO(opt_prof)
|
||||||
CTL_PROTO(opt_lg_prof_bt_max)
|
CTL_PROTO(opt_lg_prof_bt_max)
|
||||||
|
CTL_PROTO(opt_lg_prof_sample)
|
||||||
CTL_PROTO(opt_lg_prof_interval)
|
CTL_PROTO(opt_lg_prof_interval)
|
||||||
CTL_PROTO(opt_prof_udump)
|
CTL_PROTO(opt_prof_udump)
|
||||||
CTL_PROTO(opt_prof_leak)
|
CTL_PROTO(opt_prof_leak)
|
||||||
@ -234,6 +235,7 @@ static const ctl_node_t opt_node[] = {
|
|||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
{NAME("prof"), CTL(opt_prof)},
|
{NAME("prof"), CTL(opt_prof)},
|
||||||
{NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)},
|
{NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)},
|
||||||
|
{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
|
||||||
{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
|
{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
|
||||||
{NAME("prof_udump"), CTL(opt_prof_udump)},
|
{NAME("prof_udump"), CTL(opt_prof_udump)},
|
||||||
{NAME("prof_leak"), CTL(opt_prof_leak)},
|
{NAME("prof_leak"), CTL(opt_prof_leak)},
|
||||||
@ -1066,6 +1068,7 @@ CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
|
|||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
CTL_RO_GEN(opt_prof, opt_prof, bool)
|
CTL_RO_GEN(opt_prof, opt_prof, bool)
|
||||||
CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
|
CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
|
||||||
|
CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
|
||||||
CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, size_t)
|
CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, size_t)
|
||||||
CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
|
CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
|
||||||
CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
|
CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
|
||||||
|
@ -582,6 +582,15 @@ MALLOC_OUT:
|
|||||||
opt_lg_qspace_max++;
|
opt_lg_qspace_max++;
|
||||||
break;
|
break;
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
|
case 's':
|
||||||
|
if (opt_lg_prof_sample > 0)
|
||||||
|
opt_lg_prof_sample--;
|
||||||
|
break;
|
||||||
|
case 'S':
|
||||||
|
if (opt_lg_prof_sample + 1 <
|
||||||
|
(sizeof(uint64_t) << 3))
|
||||||
|
opt_lg_prof_sample++;
|
||||||
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
opt_prof_udump = false;
|
opt_prof_udump = false;
|
||||||
break;
|
break;
|
||||||
@ -870,7 +879,7 @@ JEMALLOC_P(malloc)(size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
|
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
goto OOM;
|
goto OOM;
|
||||||
}
|
}
|
||||||
@ -955,7 +964,7 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
|
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||||
result = NULL;
|
result = NULL;
|
||||||
ret = EINVAL;
|
ret = EINVAL;
|
||||||
} else
|
} else
|
||||||
@ -1030,7 +1039,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
|
if (opt_prof && (cnt = prof_alloc_prep(num_size)) == NULL) {
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
goto RETURN;
|
goto RETURN;
|
||||||
}
|
}
|
||||||
@ -1106,7 +1115,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
|
|||||||
if (opt_prof) {
|
if (opt_prof) {
|
||||||
old_size = isalloc(ptr);
|
old_size = isalloc(ptr);
|
||||||
old_cnt = prof_cnt_get(ptr);
|
old_cnt = prof_cnt_get(ptr);
|
||||||
if ((cnt = prof_alloc_prep()) == NULL) {
|
if ((cnt = prof_alloc_prep(size)) == NULL) {
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
goto OOM;
|
goto OOM;
|
||||||
}
|
}
|
||||||
@ -1144,7 +1153,7 @@ OOM:
|
|||||||
ret = NULL;
|
ret = NULL;
|
||||||
} else {
|
} else {
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
|
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||||
ret = NULL;
|
ret = NULL;
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
|
@ -12,11 +12,14 @@
|
|||||||
#include <libunwind.h>
|
#include <libunwind.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* Data. */
|
/* Data. */
|
||||||
|
|
||||||
bool opt_prof = false;
|
bool opt_prof = false;
|
||||||
size_t opt_lg_prof_bt_max = 2;
|
size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
|
||||||
|
size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
|
||||||
size_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
|
size_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
|
||||||
bool opt_prof_udump = false;
|
bool opt_prof_udump = false;
|
||||||
bool opt_prof_leak = false;
|
bool opt_prof_leak = false;
|
||||||
@ -52,6 +55,13 @@ static pthread_key_t bt2cnt_tsd;
|
|||||||
/* (1U << opt_lg_prof_bt_max). */
|
/* (1U << opt_lg_prof_bt_max). */
|
||||||
static unsigned prof_bt_max;
|
static unsigned prof_bt_max;
|
||||||
|
|
||||||
|
static __thread uint64_t prof_sample_prn_state
|
||||||
|
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
|
static __thread uint64_t prof_sample_threshold
|
||||||
|
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
|
static __thread uint64_t prof_sample_accum
|
||||||
|
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
|
|
||||||
static malloc_mutex_t prof_dump_seq_mtx;
|
static malloc_mutex_t prof_dump_seq_mtx;
|
||||||
static uint64_t prof_dump_seq;
|
static uint64_t prof_dump_seq;
|
||||||
static uint64_t prof_dump_iseq;
|
static uint64_t prof_dump_iseq;
|
||||||
@ -500,15 +510,27 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
prof_thr_cnt_t *
|
prof_thr_cnt_t *
|
||||||
prof_alloc_prep(void)
|
prof_alloc_prep(size_t size)
|
||||||
{
|
{
|
||||||
prof_thr_cnt_t *ret;
|
prof_thr_cnt_t *ret;
|
||||||
void *vec[prof_bt_max];
|
void *vec[prof_bt_max];
|
||||||
prof_bt_t bt;
|
prof_bt_t bt;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether to capture a backtrace based on whether size is
|
||||||
|
* enough for prof_accum to reach prof_sample_threshold. However,
|
||||||
|
* delay updating these variables until prof_{m,re}alloc(), because we
|
||||||
|
* don't know for sure that the allocation will succeed.
|
||||||
|
*
|
||||||
|
* Use subtraction rather than addition to avoid potential integer
|
||||||
|
* overflow.
|
||||||
|
*/
|
||||||
|
if (size >= prof_sample_threshold - prof_sample_accum) {
|
||||||
bt_init(&bt, vec);
|
bt_init(&bt, vec);
|
||||||
prof_backtrace(&bt, 2, prof_bt_max);
|
prof_backtrace(&bt, 2, prof_bt_max);
|
||||||
ret = prof_lookup(&bt);
|
ret = prof_lookup(&bt);
|
||||||
|
} else
|
||||||
|
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
|
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
@ -550,13 +572,68 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
|
|||||||
huge_prof_cnt_set(ptr, cnt);
|
huge_prof_cnt_set(ptr, cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
prof_sample_threshold_update(void)
|
||||||
|
{
|
||||||
|
uint64_t r;
|
||||||
|
double u;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute prof_sample_threshold as a geometrically distributed random
|
||||||
|
* variable with mean (2^opt_lg_prof_sample).
|
||||||
|
*/
|
||||||
|
prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
|
||||||
|
1058392653243283975);
|
||||||
|
u = (double)r * (1.0/9007199254740992.0L);
|
||||||
|
prof_sample_threshold = (uint64_t)(log(u) /
|
||||||
|
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
|
||||||
|
+ (uint64_t)1U;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
prof_sample_accum_update(size_t size)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (opt_lg_prof_sample == 0) {
|
||||||
|
/*
|
||||||
|
* Don't bother with sampling logic, since sampling interval is
|
||||||
|
* 1.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prof_sample_threshold == 0) {
|
||||||
|
/* Initialize. Seed the prng differently for each thread. */
|
||||||
|
prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
|
||||||
|
prof_sample_threshold_update();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Take care to avoid integer overflow. */
|
||||||
|
if (size >= prof_sample_threshold - prof_sample_accum) {
|
||||||
|
prof_sample_accum -= (prof_sample_threshold - size);
|
||||||
|
/*
|
||||||
|
* Compute new geometrically distributed prof_sample_threshold.
|
||||||
|
*/
|
||||||
|
prof_sample_threshold_update();
|
||||||
|
while (prof_sample_accum >= prof_sample_threshold) {
|
||||||
|
prof_sample_accum -= prof_sample_threshold;
|
||||||
|
prof_sample_threshold_update();
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
prof_sample_accum += size;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
|
prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
|
||||||
{
|
{
|
||||||
size_t size = isalloc(ptr);
|
size_t size = isalloc(ptr);
|
||||||
|
|
||||||
prof_cnt_set(ptr, cnt);
|
assert(ptr != NULL);
|
||||||
|
|
||||||
|
prof_cnt_set(ptr, cnt);
|
||||||
|
prof_sample_accum_update(size);
|
||||||
|
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||||
cnt->epoch++;
|
cnt->epoch++;
|
||||||
/*********/
|
/*********/
|
||||||
mb_write();
|
mb_write();
|
||||||
@ -573,6 +650,7 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
|
|||||||
mb_write();
|
mb_write();
|
||||||
/*********/
|
/*********/
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
||||||
@ -580,20 +658,23 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
|||||||
{
|
{
|
||||||
size_t size = isalloc(ptr);
|
size_t size = isalloc(ptr);
|
||||||
|
|
||||||
|
if (ptr != NULL) {
|
||||||
prof_cnt_set(ptr, cnt);
|
prof_cnt_set(ptr, cnt);
|
||||||
|
prof_sample_accum_update(size);
|
||||||
|
}
|
||||||
|
|
||||||
if (old_cnt != NULL)
|
if ((uintptr_t)old_cnt > (uintptr_t)1U)
|
||||||
old_cnt->epoch++;
|
old_cnt->epoch++;
|
||||||
if (cnt != NULL)
|
if ((uintptr_t)cnt > (uintptr_t)1U)
|
||||||
cnt->epoch++;
|
cnt->epoch++;
|
||||||
/*********/
|
/*********/
|
||||||
mb_write();
|
mb_write();
|
||||||
/*********/
|
/*********/
|
||||||
if (old_cnt != NULL) {
|
if ((uintptr_t)old_cnt > (uintptr_t)1U) {
|
||||||
old_cnt->cnts.curobjs--;
|
old_cnt->cnts.curobjs--;
|
||||||
old_cnt->cnts.curbytes -= old_size;
|
old_cnt->cnts.curbytes -= old_size;
|
||||||
}
|
}
|
||||||
if (cnt != NULL) {
|
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||||
cnt->cnts.curobjs++;
|
cnt->cnts.curobjs++;
|
||||||
cnt->cnts.curbytes += size;
|
cnt->cnts.curbytes += size;
|
||||||
cnt->cnts.accumobjs++;
|
cnt->cnts.accumobjs++;
|
||||||
@ -602,9 +683,9 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
|
|||||||
/*********/
|
/*********/
|
||||||
mb_write();
|
mb_write();
|
||||||
/*********/
|
/*********/
|
||||||
if (old_cnt != NULL)
|
if ((uintptr_t)old_cnt > (uintptr_t)1U)
|
||||||
old_cnt->epoch++;
|
old_cnt->epoch++;
|
||||||
if (cnt != NULL)
|
if ((uintptr_t)cnt > (uintptr_t)1U)
|
||||||
cnt->epoch++;
|
cnt->epoch++;
|
||||||
/*********/
|
/*********/
|
||||||
mb_write(); /* Not strictly necessary. */
|
mb_write(); /* Not strictly necessary. */
|
||||||
@ -614,6 +695,8 @@ void
|
|||||||
prof_free(const void *ptr)
|
prof_free(const void *ptr)
|
||||||
{
|
{
|
||||||
prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
|
prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
|
||||||
|
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1) {
|
||||||
size_t size = isalloc(ptr);
|
size_t size = isalloc(ptr);
|
||||||
|
|
||||||
cnt->epoch++;
|
cnt->epoch++;
|
||||||
@ -630,6 +713,7 @@ prof_free(const void *ptr)
|
|||||||
mb_write();
|
mb_write();
|
||||||
/*********/
|
/*********/
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
prof_flush(void)
|
prof_flush(void)
|
||||||
@ -825,7 +909,13 @@ prof_dump(const char *filename, bool leakcheck)
|
|||||||
prof_write(umax2s(cnt_all.accumobjs, 10, buf));
|
prof_write(umax2s(cnt_all.accumobjs, 10, buf));
|
||||||
prof_write(": ");
|
prof_write(": ");
|
||||||
prof_write(umax2s(cnt_all.accumbytes, 10, buf));
|
prof_write(umax2s(cnt_all.accumbytes, 10, buf));
|
||||||
|
if (opt_lg_prof_sample == 0)
|
||||||
prof_write("] @ heapprofile\n");
|
prof_write("] @ heapprofile\n");
|
||||||
|
else {
|
||||||
|
prof_write("] @ heap_v2/");
|
||||||
|
prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, buf));
|
||||||
|
prof_write("\n");
|
||||||
|
}
|
||||||
|
|
||||||
/* Dump per ctx profile stats. */
|
/* Dump per ctx profile stats. */
|
||||||
for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx)
|
for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx)
|
||||||
@ -1104,6 +1194,14 @@ prof_boot0(void)
|
|||||||
* initialized, so this function must be executed early.
|
* initialized, so this function must be executed early.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if (opt_lg_prof_sample > 0) {
|
||||||
|
/*
|
||||||
|
* Disable leak checking, since not all allocations will be
|
||||||
|
* sampled.
|
||||||
|
*/
|
||||||
|
opt_prof_leak = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (opt_prof_leak && opt_prof == false) {
|
if (opt_prof_leak && opt_prof == false) {
|
||||||
/*
|
/*
|
||||||
* Enable opt_prof, but in such a way that profiles are never
|
* Enable opt_prof, but in such a way that profiles are never
|
||||||
|
@ -540,13 +540,18 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *,
|
|||||||
tcache_nslots && ssv >= 0 ? umax2s(tcache_gc_sweep,
|
tcache_nslots && ssv >= 0 ? umax2s(tcache_gc_sweep,
|
||||||
10, s) : "N/A", "\n", "");
|
10, s) : "N/A", "\n", "");
|
||||||
}
|
}
|
||||||
if ((err = JEMALLOC_P(mallctl)("opt.lg_prof_bt_max", &sv, &ssz,
|
if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
|
||||||
NULL, 0)) == 0) {
|
== 0 && bv) {
|
||||||
|
xmallctl("opt.lg_prof_bt_max", &sv, &ssz, NULL, 0);
|
||||||
write4(w4opaque, "Maximum profile backtrace depth: ",
|
write4(w4opaque, "Maximum profile backtrace depth: ",
|
||||||
umax2s((1U << sv), 10, s), "\n", "");
|
umax2s((1U << sv), 10, s), "\n", "");
|
||||||
}
|
|
||||||
if ((err = JEMALLOC_P(mallctl)("opt.lg_prof_interval", &sv,
|
xmallctl("opt.lg_prof_sample", &sv, &ssz, NULL, 0);
|
||||||
&ssz, NULL, 0)) == 0) {
|
write4(w4opaque, "Average profile sample interval: ",
|
||||||
|
umax2s((1U << sv), 10, s), "", "");
|
||||||
|
write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", "");
|
||||||
|
|
||||||
|
xmallctl("opt.lg_prof_interval", &sv, &ssz, NULL, 0);
|
||||||
write4(w4opaque, "Average profile dump interval: ",
|
write4(w4opaque, "Average profile dump interval: ",
|
||||||
umax2s((1U << sv), 10, s), "", "");
|
umax2s((1U << sv), 10, s), "", "");
|
||||||
write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", "");
|
write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", "");
|
||||||
|
Loading…
Reference in New Issue
Block a user