From b27805b36309681da1936eb33044584547552340 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 10 Feb 2010 18:15:53 -0800 Subject: [PATCH] Various heap profiling improvements. Add the --disable-prof-libgcc configure option, and add backtracing based on libgcc, which is used by default. Fix a bug in hash(). Fix various configuration-dependent compilation errors. --- jemalloc/INSTALL | 12 ++-- jemalloc/configure.ac | 23 +++++++ jemalloc/src/internal/hash.h | 5 +- jemalloc/src/internal/jemalloc_chunk.h | 2 +- jemalloc/src/internal/jemalloc_internal.h.in | 12 ++-- jemalloc/src/internal/jemalloc_stats.h | 2 + jemalloc/src/internal/mb.h | 2 +- jemalloc/src/internal/prof.h | 9 +++ jemalloc/src/jemalloc_defs.h.in | 3 + jemalloc/src/jemalloc_stats.c | 8 +-- jemalloc/src/prof.c | 70 ++++++++++++++++---- 11 files changed, 118 insertions(+), 30 deletions(-) diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index 057d8d5f..c02e2520 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -48,12 +48,16 @@ any of the following arguments (not a definitive list) to 'configure': Enable heap profiling and leak detection functionality. Use the 'B', 'F', 'I', 'L', and 'U' options to control these features. +--disable-prof-libgcc + Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's + backtracing functionality is superior to the alternatives, but it may fail + to capture backtraces on some systems. + --enable-prof-libunwind Use the libunwind library (http://www.nongnu.org/libunwind/) for stack - backtracing, rather than frame pointers. libunwind is quite slow in - comparison to frame pointer-based backtracing, but it has the advantage of - working on applications/libraries that were compiled with - -fomit-frame-pointer. + backtracing. libunwind is quite slow, but it tends to work across a wider + variety of system configurations than the default backtracing code, which is + based on libgcc functionality or gcc intrinsics. --disable-tiny Disable tiny (sub-quantum-sized) object support. Technically it is not diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index 1c4e335c..17c7aa64 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -373,6 +373,17 @@ fi ], [enable_prof="0"] ) +AC_ARG_ENABLE([prof-libgcc], + [AS_HELP_STRING([--disable-prof-libgcc], + [Do not use libgcc for backtracing])], +[if test "x$enable_prof_libgcc" = "xno" ; then + enable_prof_libgcc="0" +else + enable_prof_libgcc="1" +fi +], +[enable_prof_libgcc="1"] +) AC_ARG_ENABLE([prof-libunwind], [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], [if test "x$enable_prof_libunwind" = "xno" ; then @@ -682,6 +693,18 @@ else fi AC_SUBST([roff_prof]) +dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics +dnl for backtracing. +if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \ + -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then + enable_prof_libgcc="1" + AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) + AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) + if test "x${enable_prof_libgcc}" = "x1" ; then + AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) + fi +fi + dnl ============================================================================ dnl Configure libgd for mtrgraph. bins="${objroot}bin/jemtr2mtr${install_suffix}" diff --git a/jemalloc/src/internal/hash.h b/jemalloc/src/internal/hash.h index 182babd6..d12cdb83 100644 --- a/jemalloc/src/internal/hash.h +++ b/jemalloc/src/internal/hash.h @@ -30,8 +30,10 @@ hash(const void *key, size_t len, uint64_t seed) const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; - const unsigned char *data2 = (const unsigned char*)data; const uint64_t *end = data + (len/8); + const unsigned char *data2; + + assert(((uintptr_t)key & 0x7) == 0); while(data != end) { uint64_t k = *data++; @@ -44,6 +46,7 @@ hash(const void *key, size_t len, uint64_t seed) h *= m; } + data2 = (const unsigned char *)data; switch(len & 7) { case 7: h ^= ((uint64_t)(data2[6])) << 48; case 6: h ^= ((uint64_t)(data2[5])) << 40; diff --git a/jemalloc/src/internal/jemalloc_chunk.h b/jemalloc/src/internal/jemalloc_chunk.h index 40541e7a..00b2e1dc 100644 --- a/jemalloc/src/internal/jemalloc_chunk.h +++ b/jemalloc/src/internal/jemalloc_chunk.h @@ -32,7 +32,7 @@ extern size_t opt_lg_chunk; extern bool opt_overcommit; #endif -#ifdef JEMALLOC_STATS +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ diff --git a/jemalloc/src/internal/jemalloc_internal.h.in b/jemalloc/src/internal/jemalloc_internal.h.in index 88e33e3f..8f52fa3f 100644 --- a/jemalloc/src/internal/jemalloc_internal.h.in +++ b/jemalloc/src/internal/jemalloc_internal.h.in @@ -32,10 +32,8 @@ #endif #include "internal/rb.h" -#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) #include "internal/qr.h" #include "internal/ql.h" -#endif extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, const char *p2, const char *p3, const char *p4); @@ -106,7 +104,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, #ifdef __sparc64__ # define LG_QUANTUM 4 #endif -#ifdef __amd64__ +#if (defined(__amd64__) || defined(__x86_64__)) # define LG_QUANTUM 4 #endif #ifdef __arm__ @@ -172,7 +170,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, (((s) + PAGE_MASK) & ~PAGE_MASK) #include "internal/prn.h" -#include "internal/hash.h" #include "internal/mb.h" #include "internal/ckh.h" #include "internal/jemalloc_stats.h" @@ -185,6 +182,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, #include "internal/jemalloc_huge.h" #include "internal/jemalloc_tcache.h" #include "internal/jemalloc_trace.h" +#include "internal/hash.h" #include "internal/prof.h" #undef JEMALLOC_H_TYPES @@ -192,7 +190,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, #define JEMALLOC_H_STRUCTS #include "internal/prn.h" -#include "internal/hash.h" #include "internal/mb.h" #include "internal/ckh.h" #include "internal/jemalloc_stats.h" @@ -205,6 +202,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, #include "internal/jemalloc_huge.h" #include "internal/jemalloc_tcache.h" #include "internal/jemalloc_trace.h" +#include "internal/hash.h" #include "internal/prof.h" #undef JEMALLOC_H_STRUCTS @@ -255,7 +253,6 @@ arena_t *choose_arena_hard(void); #endif #include "internal/prn.h" -#include "internal/hash.h" #include "internal/mb.h" #include "internal/ckh.h" #include "internal/jemalloc_stats.h" @@ -268,6 +265,7 @@ arena_t *choose_arena_hard(void); #include "internal/jemalloc_huge.h" #include "internal/jemalloc_tcache.h" #include "internal/jemalloc_trace.h" +#include "internal/hash.h" #include "internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -275,7 +273,6 @@ arena_t *choose_arena_hard(void); #define JEMALLOC_H_INLINES #include "internal/prn.h" -#include "internal/hash.h" #include "internal/mb.h" #include "internal/ckh.h" #include "internal/jemalloc_stats.h" @@ -376,6 +373,7 @@ choose_arena(void) #include "internal/jemalloc_tcache.h" #include "internal/jemalloc_arena.h" #include "internal/jemalloc_trace.h" +#include "internal/hash.h" #include "internal/prof.h" #ifndef JEMALLOC_ENABLE_INLINE diff --git a/jemalloc/src/internal/jemalloc_stats.h b/jemalloc/src/internal/jemalloc_stats.h index 12f0676b..36dc5fe8 100644 --- a/jemalloc/src/internal/jemalloc_stats.h +++ b/jemalloc/src/internal/jemalloc_stats.h @@ -8,6 +8,8 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; typedef struct arena_stats_s arena_stats_t; +#endif +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) typedef struct chunk_stats_s chunk_stats_t; #endif diff --git a/jemalloc/src/internal/mb.h b/jemalloc/src/internal/mb.h index 0a272e74..1707aa91 100644 --- a/jemalloc/src/internal/mb.h +++ b/jemalloc/src/internal/mb.h @@ -54,7 +54,7 @@ mb_write(void) ); #endif } -#elif defined(__amd64_) +#elif (defined(__amd64_) || defined(__x86_64__)) JEMALLOC_INLINE void mb_write(void) { diff --git a/jemalloc/src/internal/prof.h b/jemalloc/src/internal/prof.h index 326d5586..1721ad8b 100644 --- a/jemalloc/src/internal/prof.h +++ b/jemalloc/src/internal/prof.h @@ -35,6 +35,15 @@ struct prof_bt_s { unsigned len; }; +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + struct prof_cnt_s { /* * Profiling counters. An allocation/deallocation pair can operate on diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in index 247b596b..942694f4 100644 --- a/jemalloc/src/jemalloc_defs.h.in +++ b/jemalloc/src/jemalloc_defs.h.in @@ -54,6 +54,9 @@ /* Use libunwind for profile backtracing if defined. */ #undef JEMALLOC_PROF_LIBUNWIND +/* Use libgcc for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBGCC + /* * JEMALLOC_TINY enables support for tiny objects, which are smaller than one * quantum. diff --git a/jemalloc/src/jemalloc_stats.c b/jemalloc/src/jemalloc_stats.c index 580f0fe4..b0efe746 100644 --- a/jemalloc/src/jemalloc_stats.c +++ b/jemalloc/src/jemalloc_stats.c @@ -219,8 +219,8 @@ stats_arena_bins_print(void (*write4)(void *, const char *, const char *, size_t); if (config_tcache) { malloc_cprintf(write4, w4opaque, - "%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64 - " %9"PRIu64" %9"PRIu64"" + "%13u %1s %5zu %4u %3zu %10"PRIu64 + " %9"PRIu64" %9"PRIu64" %9"PRIu64"" " %9"PRIu64" %7zu %7zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? @@ -232,8 +232,8 @@ stats_arena_bins_print(void (*write4)(void *, const char *, const char *, highruns, curruns); } else { malloc_cprintf(write4, w4opaque, - "%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64 - " %9"PRIu64" %7zu %7zu\n", + "%13u %1s %5zu %4u %3zu %10"PRIu64 + " %9"PRIu64" %9"PRIu64" %7zu %7zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? "Q" : j < ntbins_ + nqbins + ncbins ? "C" : diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index a7d9cc36..db56659b 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -3,6 +3,10 @@ #ifdef JEMALLOC_PROF /******************************************************************************/ +#ifdef JEMALLOC_PROF_LIBGCC +#include +#endif + #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include @@ -82,7 +86,13 @@ static bool enq_udump; static prof_bt_t *bt_dup(prof_bt_t *bt); static void bt_init(prof_bt_t *bt, void **vec); -static bool prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code prof_unwind_init_callback( + struct _Unwind_Context *context, void *arg); +static _Unwind_Reason_Code prof_unwind_callback( + struct _Unwind_Context *context, void *arg); +#endif +static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); static void prof_flush(void); @@ -160,8 +170,40 @@ prof_leave(void) prof_udump(); } -#ifdef JEMALLOC_PROF_LIBUNWIND -static bool +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +static void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + prof_unwind_data_t data = {bt, nignore, max}; + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#elif defined(JEMALLOC_PROF_LIBUNWIND) +static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { unw_context_t uc; @@ -180,7 +222,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) for (i = 0; i < nignore + 1; i++) { err = unw_step(&cursor); if (err <= 0) - return (false); + return; } /* @@ -195,11 +237,9 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } - - return (false); } #else -static bool +static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { #define NIGNORE 3 @@ -207,16 +247,16 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) if ((i) < NIGNORE + max) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ - return (false); \ + return; \ p = __builtin_return_address(i); \ if (p == NULL) \ - return (false); \ + return; \ if (i >= NIGNORE) { \ bt->vec[(i) - NIGNORE] = p; \ bt->len = (i) - NIGNORE + 1; \ } \ } else \ - return (false); + return; assert(max <= (1U << opt_lg_prof_bt_max)); @@ -376,9 +416,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) BT_FRAME(128) BT_FRAME(129) BT_FRAME(130) - #undef BT_FRAME - return (false); } #endif @@ -1039,6 +1077,14 @@ prof_boot1(void) } } +#ifdef JEMALLOC_PROF_LIBGCC + /* + * Cause the backtracing machinery to allocate its internal state + * before enabling profiling. + */ + _Unwind_Backtrace(prof_unwind_init_callback, NULL); +#endif + prof_booted = true; return (false);