From 6f001059aa33d77a3cb7799002044faf8dd08fc0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Apr 2014 18:41:15 -0700 Subject: [PATCH] Simplify backtracing. Simplify backtracing to not ignore any frames, and compensate for this in pprof in order to increase flexibility with respect to function-based refactoring even in the presence of non-deterministic inlining. Modify pprof to blacklist all jemalloc allocation entry points including non-standard ones like mallocx(), and ignore all allocator-internal frames. Prior to this change, pprof excluded the specifically blacklisted functions from backtraces, but it left allocator-internal frames intact. --- bin/pprof | 9 ++++ include/jemalloc/internal/prof.h | 7 ++- src/jemalloc.c | 80 +++++++++++--------------------- src/prof.c | 55 +++++++++------------- 4 files changed, 60 insertions(+), 91 deletions(-) diff --git a/bin/pprof b/bin/pprof index a309943c..328138cd 100755 --- a/bin/pprof +++ b/bin/pprof @@ -2811,9 +2811,14 @@ sub RemoveUninterestingFrames { 'free', 'memalign', 'posix_memalign', + 'aligned_alloc', 'pvalloc', 'valloc', 'realloc', + 'mallocx', # jemalloc + 'rallocx', # jemalloc + 'xallocx', # jemalloc + 'dallocx', # jemalloc 'tc_calloc', 'tc_cfree', 'tc_malloc', @@ -2923,6 +2928,10 @@ sub RemoveUninterestingFrames { if (exists($symbols->{$a})) { my $func = $symbols->{$a}->[0]; if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + # Throw away the portion of the backtrace seen so far, under the + # assumption that previous frames were for functions internal to the + # allocator. + @path = (); next; } } diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index d7422538..d82fbc4f 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -63,7 +63,6 @@ struct prof_bt_s { /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { prof_bt_t *bt; - unsigned nignore; unsigned max; } prof_unwind_data_t; #endif @@ -220,7 +219,7 @@ extern char opt_prof_prefix[ extern uint64_t prof_interval; void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore); +void prof_backtrace(prof_bt_t *bt); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); #ifdef JEMALLOC_JET size_t prof_bt_count(void); @@ -244,7 +243,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ +#define PROF_ALLOC_PREP(size, ret) do { \ prof_tdata_t *prof_tdata; \ prof_bt_t bt; \ \ @@ -255,7 +254,7 @@ void prof_sample_threshold_update(prof_tdata_t *prof_tdata); ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ } else { \ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ + prof_backtrace(&bt); \ ret = prof_lookup(&bt); \ } \ } while (0) diff --git a/src/jemalloc.c b/src/jemalloc.c index 36eae722..f1dda758 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -881,10 +881,12 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ALWAYS_INLINE_C void * -imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) +imalloc_prof(size_t usize) { void *p; + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(usize, cnt); if ((uintptr_t)cnt != (uintptr_t)1U) p = imalloc_prof_sample(usize, cnt); else @@ -896,42 +898,22 @@ imalloc_prof(size_t usize, prof_thr_cnt_t *cnt) return (p); } -/* - * MALLOC_BODY() is a macro rather than a function because its contents are in - * the fast path, but inlining would cause reliability issues when determining - * how many frames to discard from heap profiling backtraces. - */ -#define MALLOC_BODY(ret, size, usize) do { \ - if (malloc_init()) \ - ret = NULL; \ - else { \ - if (config_prof && opt_prof) { \ - prof_thr_cnt_t *cnt; \ - \ - usize = s2u(size); \ - /* \ - * Call PROF_ALLOC_PREP() here rather than in \ - * imalloc_prof() so that imalloc_prof() can be \ - * inlined without introducing uncertainty \ - * about the number of backtrace frames to \ - * ignore. imalloc_prof() is in the fast path \ - * when heap profiling is enabled, so inlining \ - * is critical to performance. (For \ - * consistency all callers of PROF_ALLOC_PREP() \ - * are structured similarly, even though e.g. \ - * realloc() isn't called enough for inlining \ - * to be critical.) \ - */ \ - PROF_ALLOC_PREP(1, usize, cnt); \ - ret = imalloc_prof(usize, cnt); \ - } else { \ - if (config_stats || (config_valgrind && \ - in_valgrind)) \ - usize = s2u(size); \ - ret = imalloc(size); \ - } \ - } \ -} while (0) +JEMALLOC_ALWAYS_INLINE_C void * +imalloc_body(size_t size, size_t *usize) +{ + + if (malloc_init()) + return (NULL); + + if (config_prof && opt_prof) { + *usize = s2u(size); + return (imalloc_prof(*usize)); + } + + if (config_stats || (config_valgrind && in_valgrind)) + *usize = s2u(size); + return (imalloc(size)); +} void * je_malloc(size_t size) @@ -942,8 +924,7 @@ je_malloc(size_t size) if (size == 0) size = 1; - MALLOC_BODY(ret, size, usize); - + ret = imalloc_body(size, &usize); if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -998,13 +979,6 @@ imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) } JEMALLOC_ATTR(nonnull(1)) -#ifdef JEMALLOC_PROF -/* - * Avoid any uncertainty as to how many backtrace frames to ignore in - * PROF_ALLOC_PREP(). - */ -JEMALLOC_NOINLINE -#endif static int imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) { @@ -1043,7 +1017,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - PROF_ALLOC_PREP(2, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); result = imemalign_prof(alignment, usize, cnt); } else result = ipalloc(usize, alignment, false); @@ -1166,7 +1140,7 @@ je_calloc(size_t num, size_t size) prof_thr_cnt_t *cnt; usize = s2u(num_size); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); ret = icalloc_prof(usize, cnt); } else { if (config_stats || (config_valgrind && in_valgrind)) @@ -1282,7 +1256,7 @@ je_realloc(void *ptr, size_t size) prof_thr_cnt_t *cnt; usize = s2u(size); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); ret = irealloc_prof(ptr, old_usize, usize, cnt); } else { if (config_stats || (config_valgrind && in_valgrind)) @@ -1291,7 +1265,7 @@ je_realloc(void *ptr, size_t size) } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - MALLOC_BODY(ret, size, usize); + ret = imalloc_body(size, &usize); } if (ret == NULL) { @@ -1475,7 +1449,7 @@ je_mallocx(size_t size, int flags) if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); p = imallocx_prof(usize, alignment, zero, try_tcache, arena, cnt); } else @@ -1600,7 +1574,7 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - PROF_ALLOC_PREP(1, usize, cnt); + PROF_ALLOC_PREP(usize, cnt); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, cnt); if (p == NULL) @@ -1733,7 +1707,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); - PROF_ALLOC_PREP(1, max_usize, cnt); + PROF_ALLOC_PREP(max_usize, cnt); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, max_usize, zero, arena, cnt); } else { diff --git a/src/prof.c b/src/prof.c index 11f1267f..b64386e3 100644 --- a/src/prof.c +++ b/src/prof.c @@ -158,23 +158,18 @@ prof_leave(prof_tdata_t *prof_tdata) #ifdef JEMALLOC_PROF_LIBUNWIND void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { + int nframes; + cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - VARIABLE_ARRAY(void *, frames, nignore + PROF_BT_MAX); - int n = unw_backtrace(frames, nignore + PROF_BT_MAX); - if (n <= 0) + nframes = unw_backtrace(bt->vec, PROF_BT_MAX); + if (nframes <= 0) return; - - /* Throw away (nignore+1) stack frames, if that many exist. */ - nignore++; - if (nignore >= n) - return; - memcpy(bt->vec, &frames[nignore], sizeof(frames[0]) * (n - nignore)); - bt->len = n - nignore; + bt->len = nframes; } #elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code @@ -190,25 +185,25 @@ static _Unwind_Reason_Code prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + void *ip; cassert(config_prof); - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } + ip = (void *)_Unwind_GetIP(context); + if (ip == NULL) + return (_URC_END_OF_STACK); + data->bt->vec[data->bt->len] = ip; + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); return (_URC_NO_REASON); } void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { - prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; + prof_unwind_data_t data = {bt, PROF_BT_MAX}; cassert(config_prof); @@ -216,25 +211,22 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) } #elif (defined(JEMALLOC_PROF_GCC)) void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { #define BT_FRAME(i) \ - if ((i) < nignore + PROF_BT_MAX) { \ + if ((i) < PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= nignore) { \ - bt->vec[(i) - nignore] = p; \ - bt->len = (i) - nignore + 1; \ - } \ + bt->vec[(i)] = p; \ + bt->len = (i) + 1; \ } else \ return; cassert(config_prof); - assert(nignore <= 3); BT_FRAME(0) BT_FRAME(1) @@ -376,16 +368,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore) BT_FRAME(125) BT_FRAME(126) BT_FRAME(127) - - /* Extras to compensate for nignore. */ - BT_FRAME(128) - BT_FRAME(129) - BT_FRAME(130) #undef BT_FRAME } #else void -prof_backtrace(prof_bt_t *bt, unsigned nignore) +prof_backtrace(prof_bt_t *bt) { cassert(config_prof);