From 6e73dc194ee9682d3eacaf725a989f04629718f7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Sep 2014 19:37:26 -0700 Subject: [PATCH] Fix a profile sampling race. Fix a profile sampling race that was due to preparing to sample, yet doing nothing to assure that the context remains valid until the stats are updated. These regressions were caused by 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (Implement per thread heap profiling.), which did not make it into any releases prior to these fixes. --- include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/prof.h | 37 +++--- src/jemalloc.c | 109 +++++++++--------- src/prof.c | 35 ++++++ 4 files changed, 109 insertions(+), 73 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 3b990b0e..b8990177 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -292,6 +292,7 @@ p2rz pages_purge pow2_ceil prof_alloc_prep +prof_alloc_rollback prof_backtrace prof_boot0 prof_boot1 diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a9903280..920ec63f 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -97,6 +97,12 @@ struct prof_tctx_s { /* Linkage into gctx's tctxs. */ rb_node(prof_tctx_t) tctx_link; + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + /* Current dump-related state, protected by gctx->lock. */ prof_tctx_state_t state; @@ -242,6 +248,7 @@ extern uint64_t prof_interval; */ extern size_t lg_prof_sample; +void prof_alloc_rollback(prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); @@ -282,14 +289,14 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); bool prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(size_t usize); +prof_tctx_t *prof_alloc_prep(size_t usize, bool update); prof_tctx_t *prof_tctx_get(const void *ptr); void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, - size_t old_usize, prof_tctx_t *old_tctx); + bool updated, size_t old_usize, prof_tctx_t *old_tctx); void prof_free(const void *ptr, size_t usize); #endif @@ -356,7 +363,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) +prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out) { prof_tdata_t *tdata; @@ -373,19 +380,19 @@ prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) return (true); if (tdata->bytes_until_sample >= usize) { - if (commit) + if (update) tdata->bytes_until_sample -= usize; return (true); } else { /* Compute new sample threshold. */ - if (commit) + if (update) prof_sample_threshold_update(tdata); return (tdata->active == false); } } JEMALLOC_INLINE prof_tctx_t * -prof_alloc_prep(size_t usize) +prof_alloc_prep(size_t usize, bool update) { prof_tctx_t *ret; prof_tdata_t *tdata; @@ -393,7 +400,7 @@ prof_alloc_prep(size_t usize) assert(usize == s2u(usize)); - if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata)) + if (!opt_prof_active || prof_sample_accum_update(usize, update, &tdata)) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); @@ -412,16 +419,6 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (prof_sample_accum_update(usize, true, NULL)) { - /* - * Don't sample. For malloc()-like allocation, it is always - * possible to tell in advance how large an object's usable size - * will be, so there should never be a difference between the - * usize passed to PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)tctx == (uintptr_t)1U); - } - if ((uintptr_t)tctx > (uintptr_t)1U) prof_malloc_sample_object(ptr, usize, tctx); else @@ -429,14 +426,14 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize, - prof_tctx_t *old_tctx) +prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, + size_t old_usize, prof_tctx_t *old_tctx) { cassert(config_prof); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (ptr != NULL) { + if (!updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(usize, true, NULL)) { /* diff --git a/src/jemalloc.c b/src/jemalloc.c index 3f29a857..1d4d1a8a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -886,13 +886,15 @@ imalloc_prof(size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imalloc_prof_sample(usize, tctx); else p = imalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -962,16 +964,20 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx) +imemalign_prof(size_t alignment, size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imemalign_prof_sample(alignment, usize, tctx); else p = ipalloc(usize, alignment, false); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1013,12 +1019,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) goto label_oom; } - if (config_prof && opt_prof) { - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(usize); - result = imemalign_prof(alignment, usize, tctx); - } else + if (config_prof && opt_prof) + result = imemalign_prof(alignment, usize); + else result = ipalloc(usize, alignment, false); if (result == NULL) goto label_oom; @@ -1087,16 +1090,20 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize, prof_tctx_t *tctx) +icalloc_prof(size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = icalloc_prof_sample(usize, tctx); else p = icalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1136,11 +1143,8 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(num_size); - tctx = prof_alloc_prep(usize); - ret = icalloc_prof(usize, tctx); + ret = icalloc_prof(usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(num_size); @@ -1184,19 +1188,20 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) +irealloc_prof(void *oldptr, size_t old_usize, size_t usize) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = irealloc_prof_sample(oldptr, usize, tctx); else p = iralloc(oldptr, usize, 0, false); if (p == NULL) return (NULL); - prof_realloc(p, usize, tctx, old_usize, old_tctx); + prof_realloc(p, usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1270,11 +1275,8 @@ je_realloc(void *ptr, size_t size) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(size); - tctx = prof_alloc_prep(usize); - ret = irealloc_prof(ptr, old_usize, usize, tctx); + ret = irealloc_prof(ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); @@ -1477,7 +1479,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) imallocx_flags_decode(size, flags, usize, &alignment, &zero, &try_tcache, &arena); - tctx = prof_alloc_prep(*usize); + tctx = prof_alloc_prep(*usize, true); if ((uintptr_t)tctx == (uintptr_t)1U) { p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, try_tcache, arena); @@ -1486,8 +1488,10 @@ imallocx_prof(size_t size, int flags, size_t *usize) try_tcache, arena); } else p = NULL; - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, *usize, tctx); return (p); @@ -1572,21 +1576,24 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_tctx_t *tctx) + arena_t *arena) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - if ((uintptr_t)tctx != (uintptr_t)1U) + tctx = prof_alloc_prep(*usize, true); + if ((uintptr_t)tctx != (uintptr_t)1U) { p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); - else { + } else { p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } if (p == oldptr && alignment != 0) { /* @@ -1599,7 +1606,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, tctx, old_usize, old_tctx); + prof_realloc(p, *usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1641,13 +1648,10 @@ je_rallocx(void *ptr, size_t size, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - tctx = prof_alloc_prep(usize); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, tctx); + try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) goto label_oom; } else { @@ -1720,13 +1724,21 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_tctx_t *tctx) + size_t alignment, bool zero, arena_t *arena) { - size_t usize; - prof_tctx_t *old_tctx; + size_t max_usize, usize; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(ptr); + /* + * usize isn't knowable before ixalloc() returns when extra is non-zero. + * Therefore, compute its maximum possible value and use that in + * prof_alloc_prep() to decide whether to capture a backtrace. + * prof_realloc() will use the actual usize to decide whether to sample. + */ + max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + alignment); + tctx = prof_alloc_prep(max_usize, false); if ((uintptr_t)tctx != (uintptr_t)1U) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, alignment, zero, max_usize, arena, tctx); @@ -1734,9 +1746,11 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } - if (usize == old_usize) + if (usize == old_usize) { + prof_alloc_rollback(tctx, false); return (usize); - prof_realloc(ptr, usize, tctx, old_usize, old_tctx); + } + prof_realloc(ptr, usize, tctx, false, old_usize, old_tctx); return (usize); } @@ -1767,19 +1781,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - /* - * usize isn't knowable before ixalloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - tctx = prof_alloc_prep(max_usize); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - max_usize, zero, arena, tctx); + zero, arena); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); diff --git a/src/prof.c b/src/prof.c index 941e53be..9495afc4 100644 --- a/src/prof.c +++ b/src/prof.c @@ -149,6 +149,35 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, /******************************************************************************/ +void +prof_alloc_rollback(prof_tctx_t *tctx, bool updated) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + if (updated) { + /* + * Compute a new sample threshold. This isn't very important in + * practice, because this function is rarely executed, so the + * potential for sample bias is minimal except in contrived + * programs. + */ + tdata = prof_tdata_get(true); + if ((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX) + prof_sample_threshold_update(tctx->tdata); + } + + if ((uintptr_t)tctx > (uintptr_t)1U) { + malloc_mutex_lock(tctx->tdata->lock); + tctx->prepared = false; + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); + } +} + void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { prof_tctx_set(ptr, tctx); @@ -160,6 +189,7 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { tctx->cnts.accumobjs++; tctx->cnts.accumbytes += usize; } + tctx->prepared = false; malloc_mutex_unlock(tctx->tdata->lock); } @@ -529,6 +559,8 @@ prof_tctx_should_destroy(prof_tctx_t *tctx) return (false); if (tctx->cnts.curobjs != 0) return (false); + if (tctx->prepared) + return (false); return (true); } @@ -659,6 +691,8 @@ prof_lookup(prof_bt_t *bt) malloc_mutex_lock(tdata->lock); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); + if (!not_found) /* Note double negative! */ + ret.p->prepared = true; malloc_mutex_unlock(tdata->lock); if (not_found) { void *btkey; @@ -683,6 +717,7 @@ prof_lookup(prof_bt_t *bt) ret.p->tdata = tdata; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; + ret.p->prepared = true; ret.p->state = prof_tctx_state_nominal; malloc_mutex_lock(tdata->lock); error = ckh_insert(&tdata->bt2tctx, btkey, ret.v);