Various heap profiling improvements.

Add the --disable-prof-libgcc configure option, and add backtracing
based on libgcc, which is used by default.

Fix a bug in hash().

Fix various configuration-dependent compilation errors.
This commit is contained in:
Jason Evans 2010-02-10 18:15:53 -08:00
parent 6109fe07a1
commit b27805b363
11 changed files with 118 additions and 30 deletions

View File

@ -48,12 +48,16 @@ any of the following arguments (not a definitive list) to 'configure':
Enable heap profiling and leak detection functionality. Use the 'B', 'F', Enable heap profiling and leak detection functionality. Use the 'B', 'F',
'I', 'L', and 'U' options to control these features. 'I', 'L', and 'U' options to control these features.
--disable-prof-libgcc
Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's
backtracing functionality is superior to the alternatives, but it may fail
to capture backtraces on some systems.
--enable-prof-libunwind --enable-prof-libunwind
Use the libunwind library (http://www.nongnu.org/libunwind/) for stack Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
backtracing, rather than frame pointers. libunwind is quite slow in backtracing. libunwind is quite slow, but it tends to work across a wider
comparison to frame pointer-based backtracing, but it has the advantage of variety of system configurations than the default backtracing code, which is
working on applications/libraries that were compiled with based on libgcc functionality or gcc intrinsics.
-fomit-frame-pointer.
--disable-tiny --disable-tiny
Disable tiny (sub-quantum-sized) object support. Technically it is not Disable tiny (sub-quantum-sized) object support. Technically it is not

View File

@ -373,6 +373,17 @@ fi
], ],
[enable_prof="0"] [enable_prof="0"]
) )
AC_ARG_ENABLE([prof-libgcc],
[AS_HELP_STRING([--disable-prof-libgcc],
[Do not use libgcc for backtracing])],
[if test "x$enable_prof_libgcc" = "xno" ; then
enable_prof_libgcc="0"
else
enable_prof_libgcc="1"
fi
],
[enable_prof_libgcc="1"]
)
AC_ARG_ENABLE([prof-libunwind], AC_ARG_ENABLE([prof-libunwind],
[AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
[if test "x$enable_prof_libunwind" = "xno" ; then [if test "x$enable_prof_libunwind" = "xno" ; then
@ -682,6 +693,18 @@ else
fi fi
AC_SUBST([roff_prof]) AC_SUBST([roff_prof])
dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics
dnl for backtracing.
if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \
-a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then
enable_prof_libgcc="1"
AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
if test "x${enable_prof_libgcc}" = "x1" ; then
AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
fi
fi
dnl ============================================================================ dnl ============================================================================
dnl Configure libgd for mtrgraph. dnl Configure libgd for mtrgraph.
bins="${objroot}bin/jemtr2mtr${install_suffix}" bins="${objroot}bin/jemtr2mtr${install_suffix}"

View File

@ -30,8 +30,10 @@ hash(const void *key, size_t len, uint64_t seed)
const int r = 47; const int r = 47;
uint64_t h = seed ^ (len * m); uint64_t h = seed ^ (len * m);
const uint64_t *data = (const uint64_t *)key; const uint64_t *data = (const uint64_t *)key;
const unsigned char *data2 = (const unsigned char*)data;
const uint64_t *end = data + (len/8); const uint64_t *end = data + (len/8);
const unsigned char *data2;
assert(((uintptr_t)key & 0x7) == 0);
while(data != end) { while(data != end) {
uint64_t k = *data++; uint64_t k = *data++;
@ -44,6 +46,7 @@ hash(const void *key, size_t len, uint64_t seed)
h *= m; h *= m;
} }
data2 = (const unsigned char *)data;
switch(len & 7) { switch(len & 7) {
case 7: h ^= ((uint64_t)(data2[6])) << 48; case 7: h ^= ((uint64_t)(data2[6])) << 48;
case 6: h ^= ((uint64_t)(data2[5])) << 40; case 6: h ^= ((uint64_t)(data2[5])) << 40;

View File

@ -32,7 +32,7 @@ extern size_t opt_lg_chunk;
extern bool opt_overcommit; extern bool opt_overcommit;
#endif #endif
#ifdef JEMALLOC_STATS #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
/* Protects stats_chunks; currently not used for any other purpose. */ /* Protects stats_chunks; currently not used for any other purpose. */
extern malloc_mutex_t chunks_mtx; extern malloc_mutex_t chunks_mtx;
/* Chunk statistics. */ /* Chunk statistics. */

View File

@ -32,10 +32,8 @@
#endif #endif
#include "internal/rb.h" #include "internal/rb.h"
#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS))
#include "internal/qr.h" #include "internal/qr.h"
#include "internal/ql.h" #include "internal/ql.h"
#endif
extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1, extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
const char *p2, const char *p3, const char *p4); const char *p2, const char *p3, const char *p4);
@ -106,7 +104,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#ifdef __sparc64__ #ifdef __sparc64__
# define LG_QUANTUM 4 # define LG_QUANTUM 4
#endif #endif
#ifdef __amd64__ #if (defined(__amd64__) || defined(__x86_64__))
# define LG_QUANTUM 4 # define LG_QUANTUM 4
#endif #endif
#ifdef __arm__ #ifdef __arm__
@ -172,7 +170,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
(((s) + PAGE_MASK) & ~PAGE_MASK) (((s) + PAGE_MASK) & ~PAGE_MASK)
#include "internal/prn.h" #include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h" #include "internal/mb.h"
#include "internal/ckh.h" #include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
@ -185,6 +182,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/hash.h"
#include "internal/prof.h" #include "internal/prof.h"
#undef JEMALLOC_H_TYPES #undef JEMALLOC_H_TYPES
@ -192,7 +190,6 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#define JEMALLOC_H_STRUCTS #define JEMALLOC_H_STRUCTS
#include "internal/prn.h" #include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h" #include "internal/mb.h"
#include "internal/ckh.h" #include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
@ -205,6 +202,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *w4opaque, const char *p1,
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/hash.h"
#include "internal/prof.h" #include "internal/prof.h"
#undef JEMALLOC_H_STRUCTS #undef JEMALLOC_H_STRUCTS
@ -255,7 +253,6 @@ arena_t *choose_arena_hard(void);
#endif #endif
#include "internal/prn.h" #include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h" #include "internal/mb.h"
#include "internal/ckh.h" #include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
@ -268,6 +265,7 @@ arena_t *choose_arena_hard(void);
#include "internal/jemalloc_huge.h" #include "internal/jemalloc_huge.h"
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/hash.h"
#include "internal/prof.h" #include "internal/prof.h"
#undef JEMALLOC_H_EXTERNS #undef JEMALLOC_H_EXTERNS
@ -275,7 +273,6 @@ arena_t *choose_arena_hard(void);
#define JEMALLOC_H_INLINES #define JEMALLOC_H_INLINES
#include "internal/prn.h" #include "internal/prn.h"
#include "internal/hash.h"
#include "internal/mb.h" #include "internal/mb.h"
#include "internal/ckh.h" #include "internal/ckh.h"
#include "internal/jemalloc_stats.h" #include "internal/jemalloc_stats.h"
@ -376,6 +373,7 @@ choose_arena(void)
#include "internal/jemalloc_tcache.h" #include "internal/jemalloc_tcache.h"
#include "internal/jemalloc_arena.h" #include "internal/jemalloc_arena.h"
#include "internal/jemalloc_trace.h" #include "internal/jemalloc_trace.h"
#include "internal/hash.h"
#include "internal/prof.h" #include "internal/prof.h"
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE

View File

@ -8,6 +8,8 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t;
typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t;
typedef struct malloc_large_stats_s malloc_large_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t;
typedef struct arena_stats_s arena_stats_t; typedef struct arena_stats_s arena_stats_t;
#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
typedef struct chunk_stats_s chunk_stats_t; typedef struct chunk_stats_s chunk_stats_t;
#endif #endif

View File

@ -54,7 +54,7 @@ mb_write(void)
); );
#endif #endif
} }
#elif defined(__amd64_) #elif (defined(__amd64_) || defined(__x86_64__))
JEMALLOC_INLINE void JEMALLOC_INLINE void
mb_write(void) mb_write(void)
{ {

View File

@ -35,6 +35,15 @@ struct prof_bt_s {
unsigned len; unsigned len;
}; };
#ifdef JEMALLOC_PROF_LIBGCC
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
prof_bt_t *bt;
unsigned nignore;
unsigned max;
} prof_unwind_data_t;
#endif
struct prof_cnt_s { struct prof_cnt_s {
/* /*
* Profiling counters. An allocation/deallocation pair can operate on * Profiling counters. An allocation/deallocation pair can operate on

View File

@ -54,6 +54,9 @@
/* Use libunwind for profile backtracing if defined. */ /* Use libunwind for profile backtracing if defined. */
#undef JEMALLOC_PROF_LIBUNWIND #undef JEMALLOC_PROF_LIBUNWIND
/* Use libgcc for profile backtracing if defined. */
#undef JEMALLOC_PROF_LIBGCC
/* /*
* JEMALLOC_TINY enables support for tiny objects, which are smaller than one * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
* quantum. * quantum.

View File

@ -219,8 +219,8 @@ stats_arena_bins_print(void (*write4)(void *, const char *, const char *,
size_t); size_t);
if (config_tcache) { if (config_tcache) {
malloc_cprintf(write4, w4opaque, malloc_cprintf(write4, w4opaque,
"%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64 "%13u %1s %5zu %4u %3zu %10"PRIu64
" %9"PRIu64" %9"PRIu64"" " %9"PRIu64" %9"PRIu64" %9"PRIu64""
" %9"PRIu64" %7zu %7zu\n", " %9"PRIu64" %7zu %7zu\n",
j, j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ? j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
@ -232,8 +232,8 @@ stats_arena_bins_print(void (*write4)(void *, const char *, const char *,
highruns, curruns); highruns, curruns);
} else { } else {
malloc_cprintf(write4, w4opaque, malloc_cprintf(write4, w4opaque,
"%13u %1s %5u %4u %3u %10"PRIu64" %9"PRIu64 "%13u %1s %5zu %4u %3zu %10"PRIu64
" %9"PRIu64" %7zu %7zu\n", " %9"PRIu64" %9"PRIu64" %7zu %7zu\n",
j, j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ? j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
"Q" : j < ntbins_ + nqbins + ncbins ? "C" : "Q" : j < ntbins_ + nqbins + ncbins ? "C" :

View File

@ -3,6 +3,10 @@
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_PROF_LIBGCC
#include <unwind.h>
#endif
#ifdef JEMALLOC_PROF_LIBUNWIND #ifdef JEMALLOC_PROF_LIBUNWIND
#define UNW_LOCAL_ONLY #define UNW_LOCAL_ONLY
#include <libunwind.h> #include <libunwind.h>
@ -82,7 +86,13 @@ static bool enq_udump;
static prof_bt_t *bt_dup(prof_bt_t *bt); static prof_bt_t *bt_dup(prof_bt_t *bt);
static void bt_init(prof_bt_t *bt, void **vec); static void bt_init(prof_bt_t *bt, void **vec);
static bool prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); #ifdef JEMALLOC_PROF_LIBGCC
static _Unwind_Reason_Code prof_unwind_init_callback(
struct _Unwind_Context *context, void *arg);
static _Unwind_Reason_Code prof_unwind_callback(
struct _Unwind_Context *context, void *arg);
#endif
static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
static void prof_flush(void); static void prof_flush(void);
@ -160,8 +170,40 @@ prof_leave(void)
prof_udump(); prof_udump();
} }
#ifdef JEMALLOC_PROF_LIBUNWIND #ifdef JEMALLOC_PROF_LIBGCC
static bool static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
{
return (_URC_NO_REASON);
}
static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context *context, void *arg)
{
prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
if (data->nignore > 0)
data->nignore--;
else {
data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
data->bt->len++;
if (data->bt->len == data->max)
return (_URC_END_OF_STACK);
}
return (_URC_NO_REASON);
}
static void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
prof_unwind_data_t data = {bt, nignore, max};
_Unwind_Backtrace(prof_unwind_callback, &data);
}
#elif defined(JEMALLOC_PROF_LIBUNWIND)
static void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{ {
unw_context_t uc; unw_context_t uc;
@ -180,7 +222,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
for (i = 0; i < nignore + 1; i++) { for (i = 0; i < nignore + 1; i++) {
err = unw_step(&cursor); err = unw_step(&cursor);
if (err <= 0) if (err <= 0)
return (false); return;
} }
/* /*
@ -195,11 +237,9 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
break; break;
} }
} }
return (false);
} }
#else #else
static bool static void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{ {
#define NIGNORE 3 #define NIGNORE 3
@ -207,16 +247,16 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
if ((i) < NIGNORE + max) { \ if ((i) < NIGNORE + max) { \
void *p; \ void *p; \
if (__builtin_frame_address(i) == 0) \ if (__builtin_frame_address(i) == 0) \
return (false); \ return; \
p = __builtin_return_address(i); \ p = __builtin_return_address(i); \
if (p == NULL) \ if (p == NULL) \
return (false); \ return; \
if (i >= NIGNORE) { \ if (i >= NIGNORE) { \
bt->vec[(i) - NIGNORE] = p; \ bt->vec[(i) - NIGNORE] = p; \
bt->len = (i) - NIGNORE + 1; \ bt->len = (i) - NIGNORE + 1; \
} \ } \
} else \ } else \
return (false); return;
assert(max <= (1U << opt_lg_prof_bt_max)); assert(max <= (1U << opt_lg_prof_bt_max));
@ -376,9 +416,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
BT_FRAME(128) BT_FRAME(128)
BT_FRAME(129) BT_FRAME(129)
BT_FRAME(130) BT_FRAME(130)
#undef BT_FRAME #undef BT_FRAME
return (false);
} }
#endif #endif
@ -1039,6 +1077,14 @@ prof_boot1(void)
} }
} }
#ifdef JEMALLOC_PROF_LIBGCC
/*
* Cause the backtracing machinery to allocate its internal state
* before enabling profiling.
*/
_Unwind_Backtrace(prof_unwind_init_callback, NULL);
#endif
prof_booted = true; prof_booted = true;
return (false); return (false);