diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index 23e6ca00..c8d2f29c 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -436,7 +436,7 @@ in these cases. @roff_prof@.It B @roff_prof@Double/halve the maximum backtrace depth when profiling memory @roff_prof@allocation activity. -@roff_prof@The default is 4. +@roff_prof@The default is 128. .It C Double/halve the size of the maximum size class that is a multiple of the cacheline size (64). diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index a8f67bbb..1aa85bb5 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -9,7 +9,7 @@ typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_tcache_s prof_tcache_t; /* Option defaults. */ -#define LG_PROF_BT_MAX_DEFAULT 2 +#define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 #define LG_PROF_TCMAX_DEFAULT -1 @@ -17,10 +17,13 @@ typedef struct prof_tcache_s prof_tcache_t; /* * Hard limit on stack backtrace depth. Note that the version of * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers, so increasing - * LG_PROF_BT_MAX requires changing prof_backtrace(). + * a hard-coded number of backtrace frame handlers. */ -#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) +# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) +#else +# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#endif #define PROF_BT_MAX (1U << LG_PROF_BT_MAX) /* Initial hash table size. */ diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 7ffda230..5b66f97b 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -52,7 +52,6 @@ static __thread prof_tcache_t *prof_tcache_tls pthread_setspecific(prof_tcache_tsd, (void *)(v)); \ } while (0) #endif - /* * Same contents as b2cnt_tls, but initialized such that the TSD destructor is * called when a thread exits, so that prof_tcache_tls contents can be merged, @@ -60,6 +59,29 @@ static __thread prof_tcache_t *prof_tcache_tls */ static pthread_key_t prof_tcache_tsd; +/* Thread-specific backtrace vector, used for calls to prof_backtrace(). */ +#ifndef NO_TLS +static __thread void **vec_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +# define VEC_GET() vec_tls +# define VEC_SET(v) do { \ + vec_tls = (v); \ + pthread_setspecific(vec_tsd, (void *)(v)); \ +} while (0) +#else +# define VEC_GET() ((ckh_t *)pthread_getspecific(vec_tsd)) +# define VEC_SET(v) do { \ + pthread_setspecific(vec_tsd, (void *)(v)); \ +} while (0) +#endif +/* + * Same contents as vec_tls, but initialized such that the TSD destructor is + * called when a thread exits, so that vec_tls contents can be merged, + * unlinked, and deallocated. + */ +static pthread_key_t vec_tsd; + + /* (1U << opt_lg_prof_bt_max). */ static unsigned prof_bt_max; @@ -158,6 +180,7 @@ static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); static void prof_tcache_cleanup(void *arg); +static void vec_cleanup(void *arg); #ifdef NO_TLS static void prof_sample_state_thread_cleanup(void *arg); #endif @@ -632,9 +655,17 @@ prof_thr_cnt_t * prof_alloc_prep(size_t size) { prof_thr_cnt_t *ret; - void *vec[prof_bt_max]; + void **vec; prof_bt_t bt; + vec = VEC_GET(); + if (vec == NULL) { + vec = imalloc(sizeof(void *) * prof_bt_max); + if (vec == NULL) + return (NULL); + VEC_SET(vec); + } + if (opt_prof_active == false) { /* Sampling is currently inactive, so avoid sampling. */ ret = (prof_thr_cnt_t *)(uintptr_t)1U; @@ -1161,10 +1192,8 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) - == false;) { + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); - } /* Dump profile header. */ if (prof_write("heap profile: ", propagate_err) @@ -1452,6 +1481,18 @@ prof_tcache_cleanup(void *arg) } } +static void +vec_cleanup(void *arg) +{ + void **vec; + + vec = VEC_GET(); + if (vec != NULL) { + idalloc(vec); + VEC_SET(NULL); + } +} + #ifdef NO_TLS static void prof_sample_state_thread_cleanup(void *arg) @@ -1507,6 +1548,11 @@ prof_boot1(void) ": Error in pthread_key_create()\n"); abort(); } + if (pthread_key_create(&vec_tsd, vec_cleanup) != 0) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } #ifdef NO_TLS if (pthread_key_create(&prof_sample_state_tsd, prof_sample_state_thread_cleanup) != 0) {