Fix heap profiling bugs.

Fix a potential deadlock that could occur during interval- and
growth-triggered heap profile dumps.

Fix an off-by-one heap profile statistics bug that could be observed in
interval- and growth-triggered heap profiles.

Fix heap profile dump filename sequence numbers (regression during
conversion to malloc_snprintf()).
This commit is contained in:
Jason Evans 2012-04-22 16:00:11 -07:00
parent a5288ca934
commit 52386b2dc6
4 changed files with 129 additions and 87 deletions

View File

@ -64,18 +64,22 @@ found in the git revision history:
- Remove the --enable-sysv configure option. - Remove the --enable-sysv configure option.
Bug fixes: Bug fixes:
- Fix fork-related bugs that could cause deadlock in children between fork
and exec.
- Fix a statistics-related bug in the "thread.arena" mallctl that could cause - Fix a statistics-related bug in the "thread.arena" mallctl that could cause
invalid statistics and crashes. invalid statistics and crashes.
- Work around TLS dallocation via free() on Linux. This bug could cause - Work around TLS dallocation via free() on Linux. This bug could cause
write-after-free memory corruption. write-after-free memory corruption.
- Fix a potential deadlock that could occur during interval- and
growth-triggered heap profile dumps.
- Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could
cause memory corruption and crashes with --enable-dss specified. cause memory corruption and crashes with --enable-dss specified.
- Fix fork-related bugs that could cause deadlock in children between fork
and exec.
- Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter.
- Fix realloc(p, 0) to act like free(p). - Fix realloc(p, 0) to act like free(p).
- Do not enforce minimum alignment in memalign(). - Do not enforce minimum alignment in memalign().
- Check for NULL pointer in malloc_usable_size(). - Check for NULL pointer in malloc_usable_size().
- Fix an off-by-one heap profile statistics bug that could be observed in
interval- and growth-triggered heap profiles.
- Fix bin->runcur management to fix a layout policy bug. This bug did not - Fix bin->runcur management to fix a layout policy bug. This bug did not
affect correctness. affect correctness.
- Fix a bug in choose_arena_hard() that potentially caused more arenas to be - Fix a bug in choose_arena_hard() that potentially caused more arenas to be

View File

@ -240,6 +240,7 @@
#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_booted JEMALLOC_N(prof_tdata_booted)
#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup)
#define prof_tdata_get JEMALLOC_N(prof_tdata_get)
#define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_init JEMALLOC_N(prof_tdata_init)
#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) #define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized)
#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls)

View File

@ -113,9 +113,19 @@ struct prof_ctx_s {
/* Associated backtrace. */ /* Associated backtrace. */
prof_bt_t *bt; prof_bt_t *bt;
/* Protects cnt_merged and cnts_ql. */ /* Protects nlimbo, cnt_merged, and cnts_ql. */
malloc_mutex_t *lock; malloc_mutex_t *lock;
/*
* Number of threads that currently cause this ctx to be in a state of
* limbo due to one of:
* - Initializing per thread counters associated with this ctx.
* - Preparing to destroy this ctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
* ctx.
*/
unsigned nlimbo;
/* Temporary storage for summation during dump. */ /* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed; prof_cnt_t cnt_summed;
@ -152,6 +162,11 @@ struct prof_tdata_s {
uint64_t prng_state; uint64_t prng_state;
uint64_t threshold; uint64_t threshold;
uint64_t accum; uint64_t accum;
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
bool enq_gdump;
}; };
#endif /* JEMALLOC_H_STRUCTS */ #endif /* JEMALLOC_H_STRUCTS */
@ -211,14 +226,9 @@ bool prof_boot2(void);
\ \
assert(size == s2u(size)); \ assert(size == s2u(size)); \
\ \
prof_tdata = *prof_tdata_tsd_get(); \ prof_tdata = prof_tdata_get(); \
if (prof_tdata == NULL) { \ if (prof_tdata == NULL) \
prof_tdata = prof_tdata_init(); \
if (prof_tdata == NULL) { \
ret = NULL; \
break; \ break; \
} \
} \
\ \
if (opt_prof_active == false) { \ if (opt_prof_active == false) { \
/* Sampling is currently inactive, so avoid sampling. */\ /* Sampling is currently inactive, so avoid sampling. */\
@ -260,6 +270,7 @@ bool prof_boot2(void);
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
prof_tdata_t *prof_tdata_get(void);
void prof_sample_threshold_update(prof_tdata_t *prof_tdata); void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
prof_ctx_t *prof_ctx_get(const void *ptr); prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@ -276,6 +287,20 @@ malloc_tsd_externs(prof_tdata, prof_tdata_t *)
malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
prof_tdata_cleanup) prof_tdata_cleanup)
JEMALLOC_INLINE prof_tdata_t *
prof_tdata_get(void)
{
prof_tdata_t *prof_tdata;
cassert(config_prof);
prof_tdata = *prof_tdata_tsd_get();
if (prof_tdata == NULL)
prof_tdata = prof_tdata_init();
return (prof_tdata);
}
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_sample_threshold_update(prof_tdata_t *prof_tdata) prof_sample_threshold_update(prof_tdata_t *prof_tdata)
{ {

View File

@ -64,11 +64,6 @@ static int prof_dump_fd;
/* Do not dump any profiles until bootstrapping is complete. */ /* Do not dump any profiles until bootstrapping is complete. */
static bool prof_booted = false; static bool prof_booted = false;
static malloc_mutex_t enq_mtx;
static bool enq;
static bool enq_idump;
static bool enq_gdump;
/******************************************************************************/ /******************************************************************************/
/* Function prototypes for non-inline static functions. */ /* Function prototypes for non-inline static functions. */
@ -148,20 +143,19 @@ bt_dup(prof_bt_t *bt)
} }
static inline void static inline void
prof_enter(void) prof_enter(prof_tdata_t *prof_tdata)
{ {
cassert(config_prof); cassert(config_prof);
malloc_mutex_lock(&enq_mtx); assert(prof_tdata->enq == false);
enq = true; prof_tdata->enq = true;
malloc_mutex_unlock(&enq_mtx);
malloc_mutex_lock(&bt2ctx_mtx); malloc_mutex_lock(&bt2ctx_mtx);
} }
static inline void static inline void
prof_leave(void) prof_leave(prof_tdata_t *prof_tdata)
{ {
bool idump, gdump; bool idump, gdump;
@ -169,13 +163,12 @@ prof_leave(void)
malloc_mutex_unlock(&bt2ctx_mtx); malloc_mutex_unlock(&bt2ctx_mtx);
malloc_mutex_lock(&enq_mtx); assert(prof_tdata->enq);
enq = false; prof_tdata->enq = false;
idump = enq_idump; idump = prof_tdata->enq_idump;
enq_idump = false; prof_tdata->enq_idump = false;
gdump = enq_gdump; gdump = prof_tdata->enq_gdump;
enq_gdump = false; prof_tdata->enq_gdump = false;
malloc_mutex_unlock(&enq_mtx);
if (idump) if (idump)
prof_idump(); prof_idump();
@ -446,12 +439,9 @@ prof_lookup(prof_bt_t *bt)
cassert(config_prof); cassert(config_prof);
prof_tdata = *prof_tdata_tsd_get(); prof_tdata = prof_tdata_get();
if (prof_tdata == NULL) {
prof_tdata = prof_tdata_init();
if (prof_tdata == NULL) if (prof_tdata == NULL)
return (NULL); return (NULL);
}
if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
union { union {
@ -468,52 +458,48 @@ prof_lookup(prof_bt_t *bt)
* This thread's cache lacks bt. Look for it in the global * This thread's cache lacks bt. Look for it in the global
* cache. * cache.
*/ */
prof_enter(); prof_enter(prof_tdata);
if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
/* bt has never been seen before. Insert it. */ /* bt has never been seen before. Insert it. */
ctx.v = imalloc(sizeof(prof_ctx_t)); ctx.v = imalloc(sizeof(prof_ctx_t));
if (ctx.v == NULL) { if (ctx.v == NULL) {
prof_leave(); prof_leave(prof_tdata);
return (NULL); return (NULL);
} }
btkey.p = bt_dup(bt); btkey.p = bt_dup(bt);
if (btkey.v == NULL) { if (btkey.v == NULL) {
prof_leave(); prof_leave(prof_tdata);
idalloc(ctx.v); idalloc(ctx.v);
return (NULL); return (NULL);
} }
ctx.p->bt = btkey.p; ctx.p->bt = btkey.p;
ctx.p->lock = prof_ctx_mutex_choose(); ctx.p->lock = prof_ctx_mutex_choose();
/*
* Set nlimbo to 1, in order to avoid a race condition
* with prof_ctx_merge()/prof_ctx_destroy().
*/
ctx.p->nlimbo = 1;
memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
ql_new(&ctx.p->cnts_ql); ql_new(&ctx.p->cnts_ql);
if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
/* OOM. */ /* OOM. */
prof_leave(); prof_leave(prof_tdata);
idalloc(btkey.v); idalloc(btkey.v);
idalloc(ctx.v); idalloc(ctx.v);
return (NULL); return (NULL);
} }
/*
* Artificially raise curobjs, in order to avoid a race
* condition with prof_ctx_merge()/prof_ctx_destroy().
*
* No locking is necessary for ctx here because no other
* threads have had the opportunity to fetch it from
* bt2ctx yet.
*/
ctx.p->cnt_merged.curobjs++;
new_ctx = true; new_ctx = true;
} else { } else {
/* /*
* Artificially raise curobjs, in order to avoid a race * Increment nlimbo, in order to avoid a race condition
* condition with prof_ctx_merge()/prof_ctx_destroy(). * with prof_ctx_merge()/prof_ctx_destroy().
*/ */
malloc_mutex_lock(ctx.p->lock); malloc_mutex_lock(ctx.p->lock);
ctx.p->cnt_merged.curobjs++; ctx.p->nlimbo++;
malloc_mutex_unlock(ctx.p->lock); malloc_mutex_unlock(ctx.p->lock);
new_ctx = false; new_ctx = false;
} }
prof_leave(); prof_leave(prof_tdata);
/* Link a prof_thd_cnt_t into ctx for this thread. */ /* Link a prof_thd_cnt_t into ctx for this thread. */
if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
@ -555,7 +541,7 @@ prof_lookup(prof_bt_t *bt)
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
malloc_mutex_lock(ctx.p->lock); malloc_mutex_lock(ctx.p->lock);
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
ctx.p->cnt_merged.curobjs--; ctx.p->nlimbo--;
malloc_mutex_unlock(ctx.p->lock); malloc_mutex_unlock(ctx.p->lock);
} else { } else {
/* Move ret to the front of the LRU. */ /* Move ret to the front of the LRU. */
@ -688,26 +674,30 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
static void static void
prof_ctx_destroy(prof_ctx_t *ctx) prof_ctx_destroy(prof_ctx_t *ctx)
{ {
prof_tdata_t *prof_tdata;
cassert(config_prof); cassert(config_prof);
/* /*
* Check that ctx is still unused by any thread cache before destroying * Check that ctx is still unused by any thread cache before destroying
* it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
* order to avoid a race condition with this function, as does * condition with this function, as does prof_ctx_merge() in order to
* prof_ctx_merge() in order to avoid a race between the main body of * avoid a race between the main body of prof_ctx_merge() and entry
* prof_ctx_merge() and entry into this function. * into this function.
*/ */
prof_enter(); prof_tdata = *prof_tdata_tsd_get();
assert(prof_tdata != NULL);
prof_enter(prof_tdata);
malloc_mutex_lock(ctx->lock); malloc_mutex_lock(ctx->lock);
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
ctx->nlimbo == 1) {
assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.curbytes == 0);
assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumobjs == 0);
assert(ctx->cnt_merged.accumbytes == 0); assert(ctx->cnt_merged.accumbytes == 0);
/* Remove ctx from bt2ctx. */ /* Remove ctx from bt2ctx. */
if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
assert(false); assert(false);
prof_leave(); prof_leave(prof_tdata);
/* Destroy ctx. */ /* Destroy ctx. */
malloc_mutex_unlock(ctx->lock); malloc_mutex_unlock(ctx->lock);
bt_destroy(ctx->bt); bt_destroy(ctx->bt);
@ -717,9 +707,9 @@ prof_ctx_destroy(prof_ctx_t *ctx)
* Compensate for increment in prof_ctx_merge() or * Compensate for increment in prof_ctx_merge() or
* prof_lookup(). * prof_lookup().
*/ */
ctx->cnt_merged.curobjs--; ctx->nlimbo--;
malloc_mutex_unlock(ctx->lock); malloc_mutex_unlock(ctx->lock);
prof_leave(); prof_leave(prof_tdata);
} }
} }
@ -738,12 +728,12 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
ql_remove(&ctx->cnts_ql, cnt, cnts_link); ql_remove(&ctx->cnts_ql, cnt, cnts_link);
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
ctx->cnt_merged.curobjs == 0) { ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
/* /*
* Artificially raise ctx->cnt_merged.curobjs in order to keep * Increment ctx->nlimbo in order to keep another thread from
* another thread from winning the race to destroy ctx while * winning the race to destroy ctx while this one has ctx->lock
* this one has ctx->lock dropped. Without this, it would be * dropped. Without this, it would be possible for another
* possible for another thread to: * thread to:
* *
* 1) Sample an allocation associated with ctx. * 1) Sample an allocation associated with ctx.
* 2) Deallocate the sampled object. * 2) Deallocate the sampled object.
@ -752,7 +742,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
* The result would be that ctx no longer exists by the time * The result would be that ctx no longer exists by the time
* this thread accesses it in prof_ctx_destroy(). * this thread accesses it in prof_ctx_destroy().
*/ */
ctx->cnt_merged.curobjs++; ctx->nlimbo++;
destroy = true; destroy = true;
} else } else
destroy = false; destroy = false;
@ -768,7 +758,16 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt)
cassert(config_prof); cassert(config_prof);
if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { /*
* Current statistics can sum to 0 as a result of unmerged per thread
* statistics. Additionally, interval- and growth-triggered dumps can
* occur between the time a ctx is created and when its statistics are
* filled in. Avoid dumping any ctx that is an artifact of either
* implementation detail.
*/
if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
(opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
assert(ctx->cnt_summed.curobjs == 0);
assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.curbytes == 0);
assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumobjs == 0);
assert(ctx->cnt_summed.accumbytes == 0); assert(ctx->cnt_summed.accumbytes == 0);
@ -831,6 +830,7 @@ prof_dump_maps(bool propagate_err)
static bool static bool
prof_dump(bool propagate_err, const char *filename, bool leakcheck) prof_dump(bool propagate_err, const char *filename, bool leakcheck)
{ {
prof_tdata_t *prof_tdata;
prof_cnt_t cnt_all; prof_cnt_t cnt_all;
size_t tabind; size_t tabind;
union { union {
@ -845,7 +845,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
cassert(config_prof); cassert(config_prof);
prof_enter(); prof_tdata = prof_tdata_get();
if (prof_tdata == NULL)
return (true);
prof_enter(prof_tdata);
prof_dump_fd = creat(filename, 0644); prof_dump_fd = creat(filename, 0644);
if (prof_dump_fd == -1) { if (prof_dump_fd == -1) {
if (propagate_err == false) { if (propagate_err == false) {
@ -896,7 +899,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
if (prof_flush(propagate_err)) if (prof_flush(propagate_err))
goto label_error; goto label_error;
close(prof_dump_fd); close(prof_dump_fd);
prof_leave(); prof_leave(prof_tdata);
if (leakcheck && cnt_all.curbytes != 0) { if (leakcheck && cnt_all.curbytes != 0) {
malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
@ -911,7 +914,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
return (false); return (false);
label_error: label_error:
prof_leave(); prof_leave(prof_tdata);
return (true); return (true);
} }
@ -933,6 +936,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq)
"%s.%d.%"PRIu64".%c.heap", "%s.%d.%"PRIu64".%c.heap",
opt_prof_prefix, (int)getpid(), prof_dump_seq, v); opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
} }
prof_dump_seq++;
} }
static void static void
@ -956,19 +960,24 @@ prof_fdump(void)
void void
prof_idump(void) prof_idump(void)
{ {
prof_tdata_t *prof_tdata;
char filename[PATH_MAX + 1]; char filename[PATH_MAX + 1];
cassert(config_prof); cassert(config_prof);
if (prof_booted == false) if (prof_booted == false)
return; return;
malloc_mutex_lock(&enq_mtx); /*
if (enq) { * Don't call prof_tdata_get() here, because it could cause recursive
enq_idump = true; * allocation.
malloc_mutex_unlock(&enq_mtx); */
prof_tdata = *prof_tdata_tsd_get();
if (prof_tdata == NULL)
return;
if (prof_tdata->enq) {
prof_tdata->enq_idump = true;
return; return;
} }
malloc_mutex_unlock(&enq_mtx);
if (opt_prof_prefix[0] != '\0') { if (opt_prof_prefix[0] != '\0') {
malloc_mutex_lock(&prof_dump_seq_mtx); malloc_mutex_lock(&prof_dump_seq_mtx);
@ -1005,19 +1014,24 @@ prof_mdump(const char *filename)
void void
prof_gdump(void) prof_gdump(void)
{ {
prof_tdata_t *prof_tdata;
char filename[DUMP_FILENAME_BUFSIZE]; char filename[DUMP_FILENAME_BUFSIZE];
cassert(config_prof); cassert(config_prof);
if (prof_booted == false) if (prof_booted == false)
return; return;
malloc_mutex_lock(&enq_mtx); /*
if (enq) { * Don't call prof_tdata_get() here, because it could cause recursive
enq_gdump = true; * allocation.
malloc_mutex_unlock(&enq_mtx); */
prof_tdata = *prof_tdata_tsd_get();
if (prof_tdata == NULL)
return;
if (prof_tdata->enq) {
prof_tdata->enq_gdump = true;
return; return;
} }
malloc_mutex_unlock(&enq_mtx);
if (opt_prof_prefix[0] != '\0') { if (opt_prof_prefix[0] != '\0') {
malloc_mutex_lock(&prof_dump_seq_mtx); malloc_mutex_lock(&prof_dump_seq_mtx);
@ -1110,6 +1124,10 @@ prof_tdata_init(void)
prof_tdata->threshold = 0; prof_tdata->threshold = 0;
prof_tdata->accum = 0; prof_tdata->accum = 0;
prof_tdata->enq = false;
prof_tdata->enq_idump = false;
prof_tdata->enq_gdump = false;
prof_tdata_tsd_set(&prof_tdata); prof_tdata_tsd_set(&prof_tdata);
return (prof_tdata); return (prof_tdata);
@ -1206,12 +1224,6 @@ prof_boot2(void)
if (malloc_mutex_init(&prof_dump_seq_mtx)) if (malloc_mutex_init(&prof_dump_seq_mtx))
return (true); return (true);
if (malloc_mutex_init(&enq_mtx))
return (true);
enq = false;
enq_idump = false;
enq_gdump = false;
if (atexit(prof_fdump) != 0) { if (atexit(prof_fdump) != 0) {
malloc_write("<jemalloc>: Error in atexit()\n"); malloc_write("<jemalloc>: Error in atexit()\n");
if (opt_abort) if (opt_abort)