Fix a profile sampling race.

Fix a profile sampling race that was due to preparing to sample, yet
doing nothing to assure that the context remains valid until the stats
are updated.

These regressions were caused by
602c8e0971 (Implement per thread heap
profiling.), which did not make it into any releases prior to these
fixes.
This commit is contained in:
Jason Evans 2014-09-09 19:37:26 -07:00
parent 6fd53da030
commit 6e73dc194e
4 changed files with 109 additions and 73 deletions

View File

@ -292,6 +292,7 @@ p2rz
pages_purge pages_purge
pow2_ceil pow2_ceil
prof_alloc_prep prof_alloc_prep
prof_alloc_rollback
prof_backtrace prof_backtrace
prof_boot0 prof_boot0
prof_boot1 prof_boot1

View File

@ -97,6 +97,12 @@ struct prof_tctx_s {
/* Linkage into gctx's tctxs. */ /* Linkage into gctx's tctxs. */
rb_node(prof_tctx_t) tctx_link; rb_node(prof_tctx_t) tctx_link;
/*
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
* sample vs destroy race.
*/
bool prepared;
/* Current dump-related state, protected by gctx->lock. */ /* Current dump-related state, protected by gctx->lock. */
prof_tctx_state_t state; prof_tctx_state_t state;
@ -242,6 +248,7 @@ extern uint64_t prof_interval;
*/ */
extern size_t lg_prof_sample; extern size_t lg_prof_sample;
void prof_alloc_rollback(prof_tctx_t *tctx, bool updated);
void prof_malloc_sample_object(const void *ptr, size_t usize, void prof_malloc_sample_object(const void *ptr, size_t usize,
prof_tctx_t *tctx); prof_tctx_t *tctx);
void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx);
@ -282,14 +289,14 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
prof_tdata_t *prof_tdata_get(bool create); prof_tdata_t *prof_tdata_get(bool create);
bool prof_sample_accum_update(size_t usize, bool commit, bool prof_sample_accum_update(size_t usize, bool commit,
prof_tdata_t **tdata_out); prof_tdata_t **tdata_out);
prof_tctx_t *prof_alloc_prep(size_t usize); prof_tctx_t *prof_alloc_prep(size_t usize, bool update);
prof_tctx_t *prof_tctx_get(const void *ptr); prof_tctx_t *prof_tctx_get(const void *ptr);
void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
void prof_malloc_sample_object(const void *ptr, size_t usize, void prof_malloc_sample_object(const void *ptr, size_t usize,
prof_tctx_t *tctx); prof_tctx_t *tctx);
void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx,
size_t old_usize, prof_tctx_t *old_tctx); bool updated, size_t old_usize, prof_tctx_t *old_tctx);
void prof_free(const void *ptr, size_t usize); void prof_free(const void *ptr, size_t usize);
#endif #endif
@ -356,7 +363,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
} }
JEMALLOC_INLINE bool JEMALLOC_INLINE bool
prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out)
{ {
prof_tdata_t *tdata; prof_tdata_t *tdata;
@ -373,19 +380,19 @@ prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out)
return (true); return (true);
if (tdata->bytes_until_sample >= usize) { if (tdata->bytes_until_sample >= usize) {
if (commit) if (update)
tdata->bytes_until_sample -= usize; tdata->bytes_until_sample -= usize;
return (true); return (true);
} else { } else {
/* Compute new sample threshold. */ /* Compute new sample threshold. */
if (commit) if (update)
prof_sample_threshold_update(tdata); prof_sample_threshold_update(tdata);
return (tdata->active == false); return (tdata->active == false);
} }
} }
JEMALLOC_INLINE prof_tctx_t * JEMALLOC_INLINE prof_tctx_t *
prof_alloc_prep(size_t usize) prof_alloc_prep(size_t usize, bool update)
{ {
prof_tctx_t *ret; prof_tctx_t *ret;
prof_tdata_t *tdata; prof_tdata_t *tdata;
@ -393,7 +400,7 @@ prof_alloc_prep(size_t usize)
assert(usize == s2u(usize)); assert(usize == s2u(usize));
if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata)) if (!opt_prof_active || prof_sample_accum_update(usize, update, &tdata))
ret = (prof_tctx_t *)(uintptr_t)1U; ret = (prof_tctx_t *)(uintptr_t)1U;
else { else {
bt_init(&bt, tdata->vec); bt_init(&bt, tdata->vec);
@ -412,16 +419,6 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
assert(ptr != NULL); assert(ptr != NULL);
assert(usize == isalloc(ptr, true)); assert(usize == isalloc(ptr, true));
if (prof_sample_accum_update(usize, true, NULL)) {
/*
* Don't sample. For malloc()-like allocation, it is always
* possible to tell in advance how large an object's usable size
* will be, so there should never be a difference between the
* usize passed to PROF_ALLOC_PREP() and prof_malloc().
*/
assert((uintptr_t)tctx == (uintptr_t)1U);
}
if ((uintptr_t)tctx > (uintptr_t)1U) if ((uintptr_t)tctx > (uintptr_t)1U)
prof_malloc_sample_object(ptr, usize, tctx); prof_malloc_sample_object(ptr, usize, tctx);
else else
@ -429,14 +426,14 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize, prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated,
prof_tctx_t *old_tctx) size_t old_usize, prof_tctx_t *old_tctx)
{ {
cassert(config_prof); cassert(config_prof);
assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
if (ptr != NULL) { if (!updated && ptr != NULL) {
assert(usize == isalloc(ptr, true)); assert(usize == isalloc(ptr, true));
if (prof_sample_accum_update(usize, true, NULL)) { if (prof_sample_accum_update(usize, true, NULL)) {
/* /*

View File

@ -886,13 +886,15 @@ imalloc_prof(size_t usize)
void *p; void *p;
prof_tctx_t *tctx; prof_tctx_t *tctx;
tctx = prof_alloc_prep(usize); tctx = prof_alloc_prep(usize, true);
if ((uintptr_t)tctx != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = imalloc_prof_sample(usize, tctx); p = imalloc_prof_sample(usize, tctx);
else else
p = imalloc(usize); p = imalloc(usize);
if (p == NULL) if (p == NULL) {
prof_alloc_rollback(tctx, true);
return (NULL); return (NULL);
}
prof_malloc(p, usize, tctx); prof_malloc(p, usize, tctx);
return (p); return (p);
@ -962,16 +964,20 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx) imemalign_prof(size_t alignment, size_t usize)
{ {
void *p; void *p;
prof_tctx_t *tctx;
tctx = prof_alloc_prep(usize, true);
if ((uintptr_t)tctx != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = imemalign_prof_sample(alignment, usize, tctx); p = imemalign_prof_sample(alignment, usize, tctx);
else else
p = ipalloc(usize, alignment, false); p = ipalloc(usize, alignment, false);
if (p == NULL) if (p == NULL) {
prof_alloc_rollback(tctx, true);
return (NULL); return (NULL);
}
prof_malloc(p, usize, tctx); prof_malloc(p, usize, tctx);
return (p); return (p);
@ -1013,12 +1019,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
goto label_oom; goto label_oom;
} }
if (config_prof && opt_prof) { if (config_prof && opt_prof)
prof_tctx_t *tctx; result = imemalign_prof(alignment, usize);
else
tctx = prof_alloc_prep(usize);
result = imemalign_prof(alignment, usize, tctx);
} else
result = ipalloc(usize, alignment, false); result = ipalloc(usize, alignment, false);
if (result == NULL) if (result == NULL)
goto label_oom; goto label_oom;
@ -1087,16 +1090,20 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
icalloc_prof(size_t usize, prof_tctx_t *tctx) icalloc_prof(size_t usize)
{ {
void *p; void *p;
prof_tctx_t *tctx;
tctx = prof_alloc_prep(usize, true);
if ((uintptr_t)tctx != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = icalloc_prof_sample(usize, tctx); p = icalloc_prof_sample(usize, tctx);
else else
p = icalloc(usize); p = icalloc(usize);
if (p == NULL) if (p == NULL) {
prof_alloc_rollback(tctx, true);
return (NULL); return (NULL);
}
prof_malloc(p, usize, tctx); prof_malloc(p, usize, tctx);
return (p); return (p);
@ -1136,11 +1143,8 @@ je_calloc(size_t num, size_t size)
} }
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_tctx_t *tctx;
usize = s2u(num_size); usize = s2u(num_size);
tctx = prof_alloc_prep(usize); ret = icalloc_prof(usize);
ret = icalloc_prof(usize, tctx);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(num_size); usize = s2u(num_size);
@ -1184,19 +1188,20 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) irealloc_prof(void *oldptr, size_t old_usize, size_t usize)
{ {
void *p; void *p;
prof_tctx_t *old_tctx; prof_tctx_t *old_tctx, *tctx;
old_tctx = prof_tctx_get(oldptr); old_tctx = prof_tctx_get(oldptr);
tctx = prof_alloc_prep(usize, true);
if ((uintptr_t)tctx != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = irealloc_prof_sample(oldptr, usize, tctx); p = irealloc_prof_sample(oldptr, usize, tctx);
else else
p = iralloc(oldptr, usize, 0, false); p = iralloc(oldptr, usize, 0, false);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_realloc(p, usize, tctx, old_usize, old_tctx); prof_realloc(p, usize, tctx, true, old_usize, old_tctx);
return (p); return (p);
} }
@ -1270,11 +1275,8 @@ je_realloc(void *ptr, size_t size)
old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_tctx_t *tctx;
usize = s2u(size); usize = s2u(size);
tctx = prof_alloc_prep(usize); ret = irealloc_prof(ptr, old_usize, usize);
ret = irealloc_prof(ptr, old_usize, usize, tctx);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(size); usize = s2u(size);
@ -1477,7 +1479,7 @@ imallocx_prof(size_t size, int flags, size_t *usize)
imallocx_flags_decode(size, flags, usize, &alignment, &zero, imallocx_flags_decode(size, flags, usize, &alignment, &zero,
&try_tcache, &arena); &try_tcache, &arena);
tctx = prof_alloc_prep(*usize); tctx = prof_alloc_prep(*usize, true);
if ((uintptr_t)tctx == (uintptr_t)1U) { if ((uintptr_t)tctx == (uintptr_t)1U) {
p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, p = imallocx_maybe_flags(size, flags, *usize, alignment, zero,
try_tcache, arena); try_tcache, arena);
@ -1486,8 +1488,10 @@ imallocx_prof(size_t size, int flags, size_t *usize)
try_tcache, arena); try_tcache, arena);
} else } else
p = NULL; p = NULL;
if (p == NULL) if (p == NULL) {
prof_alloc_rollback(tctx, true);
return (NULL); return (NULL);
}
prof_malloc(p, *usize, tctx); prof_malloc(p, *usize, tctx);
return (p); return (p);
@ -1572,21 +1576,24 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
arena_t *arena, prof_tctx_t *tctx) arena_t *arena)
{ {
void *p; void *p;
prof_tctx_t *old_tctx; prof_tctx_t *old_tctx, *tctx;
old_tctx = prof_tctx_get(oldptr); old_tctx = prof_tctx_get(oldptr);
if ((uintptr_t)tctx != (uintptr_t)1U) tctx = prof_alloc_prep(*usize, true);
if ((uintptr_t)tctx != (uintptr_t)1U) {
p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, tctx); try_tcache_alloc, try_tcache_dalloc, arena, tctx);
else { } else {
p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc,
try_tcache_dalloc, arena); try_tcache_dalloc, arena);
} }
if (p == NULL) if (p == NULL) {
prof_alloc_rollback(tctx, true);
return (NULL); return (NULL);
}
if (p == oldptr && alignment != 0) { if (p == oldptr && alignment != 0) {
/* /*
@ -1599,7 +1606,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
*/ */
*usize = isalloc(p, config_prof); *usize = isalloc(p, config_prof);
} }
prof_realloc(p, *usize, tctx, old_usize, old_tctx); prof_realloc(p, *usize, tctx, true, old_usize, old_tctx);
return (p); return (p);
} }
@ -1641,13 +1648,10 @@ je_rallocx(void *ptr, size_t size, int flags)
old_rzsize = u2rz(old_usize); old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_tctx_t *tctx;
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
assert(usize != 0); assert(usize != 0);
tctx = prof_alloc_prep(usize);
p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, tctx); try_tcache_alloc, try_tcache_dalloc, arena);
if (p == NULL) if (p == NULL)
goto label_oom; goto label_oom;
} else { } else {
@ -1720,13 +1724,21 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
JEMALLOC_ALWAYS_INLINE_C size_t JEMALLOC_ALWAYS_INLINE_C size_t
ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra,
size_t alignment, size_t max_usize, bool zero, arena_t *arena, size_t alignment, bool zero, arena_t *arena)
prof_tctx_t *tctx)
{ {
size_t usize; size_t max_usize, usize;
prof_tctx_t *old_tctx; prof_tctx_t *old_tctx, *tctx;
old_tctx = prof_tctx_get(ptr); old_tctx = prof_tctx_get(ptr);
/*
* usize isn't knowable before ixalloc() returns when extra is non-zero.
* Therefore, compute its maximum possible value and use that in
* prof_alloc_prep() to decide whether to capture a backtrace.
* prof_realloc() will use the actual usize to decide whether to sample.
*/
max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
alignment);
tctx = prof_alloc_prep(max_usize, false);
if ((uintptr_t)tctx != (uintptr_t)1U) { if ((uintptr_t)tctx != (uintptr_t)1U) {
usize = ixallocx_prof_sample(ptr, old_usize, size, extra, usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
alignment, zero, max_usize, arena, tctx); alignment, zero, max_usize, arena, tctx);
@ -1734,9 +1746,11 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra,
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
zero, arena); zero, arena);
} }
if (usize == old_usize) if (usize == old_usize) {
prof_alloc_rollback(tctx, false);
return (usize); return (usize);
prof_realloc(ptr, usize, tctx, old_usize, old_tctx); }
prof_realloc(ptr, usize, tctx, false, old_usize, old_tctx);
return (usize); return (usize);
} }
@ -1767,19 +1781,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
old_rzsize = u2rz(old_usize); old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_tctx_t *tctx;
/*
* usize isn't knowable before ixalloc() returns when extra is
* non-zero. Therefore, compute its maximum possible value and
* use that in prof_alloc_prep() to decide whether to capture a
* backtrace. prof_realloc() will use the actual usize to
* decide whether to sample.
*/
size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment);
tctx = prof_alloc_prep(max_usize);
usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, usize = ixallocx_prof(ptr, old_usize, size, extra, alignment,
max_usize, zero, arena, tctx); zero, arena);
} else { } else {
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
zero, arena); zero, arena);

View File

@ -149,6 +149,35 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
/******************************************************************************/ /******************************************************************************/
void
prof_alloc_rollback(prof_tctx_t *tctx, bool updated)
{
prof_tdata_t *tdata;
cassert(config_prof);
if (updated) {
/*
* Compute a new sample threshold. This isn't very important in
* practice, because this function is rarely executed, so the
* potential for sample bias is minimal except in contrived
* programs.
*/
tdata = prof_tdata_get(true);
if ((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX)
prof_sample_threshold_update(tctx->tdata);
}
if ((uintptr_t)tctx > (uintptr_t)1U) {
malloc_mutex_lock(tctx->tdata->lock);
tctx->prepared = false;
if (prof_tctx_should_destroy(tctx))
prof_tctx_destroy(tctx);
else
malloc_mutex_unlock(tctx->tdata->lock);
}
}
void void
prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) {
prof_tctx_set(ptr, tctx); prof_tctx_set(ptr, tctx);
@ -160,6 +189,7 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) {
tctx->cnts.accumobjs++; tctx->cnts.accumobjs++;
tctx->cnts.accumbytes += usize; tctx->cnts.accumbytes += usize;
} }
tctx->prepared = false;
malloc_mutex_unlock(tctx->tdata->lock); malloc_mutex_unlock(tctx->tdata->lock);
} }
@ -529,6 +559,8 @@ prof_tctx_should_destroy(prof_tctx_t *tctx)
return (false); return (false);
if (tctx->cnts.curobjs != 0) if (tctx->cnts.curobjs != 0)
return (false); return (false);
if (tctx->prepared)
return (false);
return (true); return (true);
} }
@ -659,6 +691,8 @@ prof_lookup(prof_bt_t *bt)
malloc_mutex_lock(tdata->lock); malloc_mutex_lock(tdata->lock);
not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
if (!not_found) /* Note double negative! */
ret.p->prepared = true;
malloc_mutex_unlock(tdata->lock); malloc_mutex_unlock(tdata->lock);
if (not_found) { if (not_found) {
void *btkey; void *btkey;
@ -683,6 +717,7 @@ prof_lookup(prof_bt_t *bt)
ret.p->tdata = tdata; ret.p->tdata = tdata;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
ret.p->gctx = gctx; ret.p->gctx = gctx;
ret.p->prepared = true;
ret.p->state = prof_tctx_state_nominal; ret.p->state = prof_tctx_state_nominal;
malloc_mutex_lock(tdata->lock); malloc_mutex_lock(tdata->lock);
error = ckh_insert(&tdata->bt2tctx, btkey, ret.v); error = ckh_insert(&tdata->bt2tctx, btkey, ret.v);