Simplify backtracing.

Simplify backtracing to not ignore any frames, and compensate for this
in pprof in order to increase flexibility with respect to function-based
refactoring even in the presence of non-deterministic inlining.  Modify
pprof to blacklist all jemalloc allocation entry points including
non-standard ones like mallocx(), and ignore all allocator-internal
frames.  Prior to this change, pprof excluded the specifically
blacklisted functions from backtraces, but it left allocator-internal
frames intact.
This commit is contained in:
Jason Evans 2014-04-22 18:41:15 -07:00
parent 05125b8377
commit 6f001059aa
4 changed files with 60 additions and 91 deletions

View File

@ -2811,9 +2811,14 @@ sub RemoveUninterestingFrames {
'free', 'free',
'memalign', 'memalign',
'posix_memalign', 'posix_memalign',
'aligned_alloc',
'pvalloc', 'pvalloc',
'valloc', 'valloc',
'realloc', 'realloc',
'mallocx', # jemalloc
'rallocx', # jemalloc
'xallocx', # jemalloc
'dallocx', # jemalloc
'tc_calloc', 'tc_calloc',
'tc_cfree', 'tc_cfree',
'tc_malloc', 'tc_malloc',
@ -2923,6 +2928,10 @@ sub RemoveUninterestingFrames {
if (exists($symbols->{$a})) { if (exists($symbols->{$a})) {
my $func = $symbols->{$a}->[0]; my $func = $symbols->{$a}->[0];
if ($skip{$func} || ($func =~ m/$skip_regexp/)) { if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
# Throw away the portion of the backtrace seen so far, under the
# assumption that previous frames were for functions internal to the
# allocator.
@path = ();
next; next;
} }
} }

View File

@ -63,7 +63,6 @@ struct prof_bt_s {
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct { typedef struct {
prof_bt_t *bt; prof_bt_t *bt;
unsigned nignore;
unsigned max; unsigned max;
} prof_unwind_data_t; } prof_unwind_data_t;
#endif #endif
@ -220,7 +219,7 @@ extern char opt_prof_prefix[
extern uint64_t prof_interval; extern uint64_t prof_interval;
void bt_init(prof_bt_t *bt, void **vec); void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt, unsigned nignore); void prof_backtrace(prof_bt_t *bt);
prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
#ifdef JEMALLOC_JET #ifdef JEMALLOC_JET
size_t prof_bt_count(void); size_t prof_bt_count(void);
@ -244,7 +243,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_INLINES #ifdef JEMALLOC_H_INLINES
#define PROF_ALLOC_PREP(nignore, size, ret) do { \ #define PROF_ALLOC_PREP(size, ret) do { \
prof_tdata_t *prof_tdata; \ prof_tdata_t *prof_tdata; \
prof_bt_t bt; \ prof_bt_t bt; \
\ \
@ -255,7 +254,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
} else { \ } else { \
bt_init(&bt, prof_tdata->vec); \ bt_init(&bt, prof_tdata->vec); \
prof_backtrace(&bt, nignore); \ prof_backtrace(&bt); \
ret = prof_lookup(&bt); \ ret = prof_lookup(&bt); \
} \ } \
} while (0) } while (0)

View File

@ -881,10 +881,12 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) imalloc_prof(size_t usize)
{ {
void *p; void *p;
prof_thr_cnt_t *cnt;
PROF_ALLOC_PREP(usize, cnt);
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)cnt != (uintptr_t)1U)
p = imalloc_prof_sample(usize, cnt); p = imalloc_prof_sample(usize, cnt);
else else
@ -896,42 +898,22 @@ imalloc_prof(size_t usize, prof_thr_cnt_t *cnt)
return (p); return (p);
} }
/* JEMALLOC_ALWAYS_INLINE_C void *
* MALLOC_BODY() is a macro rather than a function because its contents are in imalloc_body(size_t size, size_t *usize)
* the fast path, but inlining would cause reliability issues when determining {
* how many frames to discard from heap profiling backtraces.
*/ if (malloc_init())
#define MALLOC_BODY(ret, size, usize) do { \ return (NULL);
if (malloc_init()) \
ret = NULL; \ if (config_prof && opt_prof) {
else { \ *usize = s2u(size);
if (config_prof && opt_prof) { \ return (imalloc_prof(*usize));
prof_thr_cnt_t *cnt; \ }
\
usize = s2u(size); \ if (config_stats || (config_valgrind && in_valgrind))
/* \ *usize = s2u(size);
* Call PROF_ALLOC_PREP() here rather than in \ return (imalloc(size));
* imalloc_prof() so that imalloc_prof() can be \ }
* inlined without introducing uncertainty \
* about the number of backtrace frames to \
* ignore. imalloc_prof() is in the fast path \
* when heap profiling is enabled, so inlining \
* is critical to performance. (For \
* consistency all callers of PROF_ALLOC_PREP() \
* are structured similarly, even though e.g. \
* realloc() isn't called enough for inlining \
* to be critical.) \
*/ \
PROF_ALLOC_PREP(1, usize, cnt); \
ret = imalloc_prof(usize, cnt); \
} else { \
if (config_stats || (config_valgrind && \
in_valgrind)) \
usize = s2u(size); \
ret = imalloc(size); \
} \
} \
} while (0)
void * void *
je_malloc(size_t size) je_malloc(size_t size)
@ -942,8 +924,7 @@ je_malloc(size_t size)
if (size == 0) if (size == 0)
size = 1; size = 1;
MALLOC_BODY(ret, size, usize); ret = imalloc_body(size, &usize);
if (ret == NULL) { if (ret == NULL) {
if (config_xmalloc && opt_xmalloc) { if (config_xmalloc && opt_xmalloc) {
malloc_write("<jemalloc>: Error in malloc(): " malloc_write("<jemalloc>: Error in malloc(): "
@ -998,13 +979,6 @@ imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
} }
JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(nonnull(1))
#ifdef JEMALLOC_PROF
/*
* Avoid any uncertainty as to how many backtrace frames to ignore in
* PROF_ALLOC_PREP().
*/
JEMALLOC_NOINLINE
#endif
static int static int
imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
{ {
@ -1043,7 +1017,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_thr_cnt_t *cnt;
PROF_ALLOC_PREP(2, usize, cnt); PROF_ALLOC_PREP(usize, cnt);
result = imemalign_prof(alignment, usize, cnt); result = imemalign_prof(alignment, usize, cnt);
} else } else
result = ipalloc(usize, alignment, false); result = ipalloc(usize, alignment, false);
@ -1166,7 +1140,7 @@ je_calloc(size_t num, size_t size)
prof_thr_cnt_t *cnt; prof_thr_cnt_t *cnt;
usize = s2u(num_size); usize = s2u(num_size);
PROF_ALLOC_PREP(1, usize, cnt); PROF_ALLOC_PREP(usize, cnt);
ret = icalloc_prof(usize, cnt); ret = icalloc_prof(usize, cnt);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
@ -1282,7 +1256,7 @@ je_realloc(void *ptr, size_t size)
prof_thr_cnt_t *cnt; prof_thr_cnt_t *cnt;
usize = s2u(size); usize = s2u(size);
PROF_ALLOC_PREP(1, usize, cnt); PROF_ALLOC_PREP(usize, cnt);
ret = irealloc_prof(ptr, old_usize, usize, cnt); ret = irealloc_prof(ptr, old_usize, usize, cnt);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
@ -1291,7 +1265,7 @@ je_realloc(void *ptr, size_t size)
} }
} else { } else {
/* realloc(NULL, size) is equivalent to malloc(size). */ /* realloc(NULL, size) is equivalent to malloc(size). */
MALLOC_BODY(ret, size, usize); ret = imalloc_body(size, &usize);
} }
if (ret == NULL) { if (ret == NULL) {
@ -1475,7 +1449,7 @@ je_mallocx(size_t size, int flags)
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_thr_cnt_t *cnt;
PROF_ALLOC_PREP(1, usize, cnt); PROF_ALLOC_PREP(usize, cnt);
p = imallocx_prof(usize, alignment, zero, try_tcache, arena, p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
cnt); cnt);
} else } else
@ -1600,7 +1574,7 @@ je_rallocx(void *ptr, size_t size, int flags)
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
assert(usize != 0); assert(usize != 0);
PROF_ALLOC_PREP(1, usize, cnt); PROF_ALLOC_PREP(usize, cnt);
p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, cnt); try_tcache_alloc, try_tcache_dalloc, arena, cnt);
if (p == NULL) if (p == NULL)
@ -1733,7 +1707,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
*/ */
size_t max_usize = (alignment == 0) ? s2u(size+extra) : size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment); sa2u(size+extra, alignment);
PROF_ALLOC_PREP(1, max_usize, cnt); PROF_ALLOC_PREP(max_usize, cnt);
usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, usize = ixallocx_prof(ptr, old_usize, size, extra, alignment,
max_usize, zero, arena, cnt); max_usize, zero, arena, cnt);
} else { } else {

View File

@ -158,23 +158,18 @@ prof_leave(prof_tdata_t *prof_tdata)
#ifdef JEMALLOC_PROF_LIBUNWIND #ifdef JEMALLOC_PROF_LIBUNWIND
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore) prof_backtrace(prof_bt_t *bt)
{ {
int nframes;
cassert(config_prof); cassert(config_prof);
assert(bt->len == 0); assert(bt->len == 0);
assert(bt->vec != NULL); assert(bt->vec != NULL);
VARIABLE_ARRAY(void *, frames, nignore + PROF_BT_MAX); nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
int n = unw_backtrace(frames, nignore + PROF_BT_MAX); if (nframes <= 0)
if (n <= 0)
return; return;
bt->len = nframes;
/* Throw away (nignore+1) stack frames, if that many exist. */
nignore++;
if (nignore >= n)
return;
memcpy(bt->vec, &frames[nignore], sizeof(frames[0]) * (n - nignore));
bt->len = n - nignore;
} }
#elif (defined(JEMALLOC_PROF_LIBGCC)) #elif (defined(JEMALLOC_PROF_LIBGCC))
static _Unwind_Reason_Code static _Unwind_Reason_Code
@ -190,25 +185,25 @@ static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context *context, void *arg) prof_unwind_callback(struct _Unwind_Context *context, void *arg)
{ {
prof_unwind_data_t *data = (prof_unwind_data_t *)arg; prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
void *ip;
cassert(config_prof); cassert(config_prof);
if (data->nignore > 0) ip = (void *)_Unwind_GetIP(context);
data->nignore--; if (ip == NULL)
else { return (_URC_END_OF_STACK);
data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); data->bt->vec[data->bt->len] = ip;
data->bt->len++; data->bt->len++;
if (data->bt->len == data->max) if (data->bt->len == data->max)
return (_URC_END_OF_STACK); return (_URC_END_OF_STACK);
}
return (_URC_NO_REASON); return (_URC_NO_REASON);
} }
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore) prof_backtrace(prof_bt_t *bt)
{ {
prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; prof_unwind_data_t data = {bt, PROF_BT_MAX};
cassert(config_prof); cassert(config_prof);
@ -216,25 +211,22 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
} }
#elif (defined(JEMALLOC_PROF_GCC)) #elif (defined(JEMALLOC_PROF_GCC))
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore) prof_backtrace(prof_bt_t *bt)
{ {
#define BT_FRAME(i) \ #define BT_FRAME(i) \
if ((i) < nignore + PROF_BT_MAX) { \ if ((i) < PROF_BT_MAX) { \
void *p; \ void *p; \
if (__builtin_frame_address(i) == 0) \ if (__builtin_frame_address(i) == 0) \
return; \ return; \
p = __builtin_return_address(i); \ p = __builtin_return_address(i); \
if (p == NULL) \ if (p == NULL) \
return; \ return; \
if (i >= nignore) { \ bt->vec[(i)] = p; \
bt->vec[(i) - nignore] = p; \ bt->len = (i) + 1; \
bt->len = (i) - nignore + 1; \
} \
} else \ } else \
return; return;
cassert(config_prof); cassert(config_prof);
assert(nignore <= 3);
BT_FRAME(0) BT_FRAME(0)
BT_FRAME(1) BT_FRAME(1)
@ -376,16 +368,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
BT_FRAME(125) BT_FRAME(125)
BT_FRAME(126) BT_FRAME(126)
BT_FRAME(127) BT_FRAME(127)
/* Extras to compensate for nignore. */
BT_FRAME(128)
BT_FRAME(129)
BT_FRAME(130)
#undef BT_FRAME #undef BT_FRAME
} }
#else #else
void void
prof_backtrace(prof_bt_t *bt, unsigned nignore) prof_backtrace(prof_bt_t *bt)
{ {
cassert(config_prof); cassert(config_prof);