diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 3b990b0e..b8990177 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -292,6 +292,7 @@ p2rz pages_purge pow2_ceil prof_alloc_prep +prof_alloc_rollback prof_backtrace prof_boot0 prof_boot1 diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a9903280..920ec63f 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -97,6 +97,12 @@ struct prof_tctx_s { /* Linkage into gctx's tctxs. */ rb_node(prof_tctx_t) tctx_link; + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + /* Current dump-related state, protected by gctx->lock. */ prof_tctx_state_t state; @@ -242,6 +248,7 @@ extern uint64_t prof_interval; */ extern size_t lg_prof_sample; +void prof_alloc_rollback(prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx); @@ -282,14 +289,14 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); bool prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(size_t usize); +prof_tctx_t *prof_alloc_prep(size_t usize, bool update); prof_tctx_t *prof_tctx_get(const void *ptr); void prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, - size_t old_usize, prof_tctx_t *old_tctx); + bool updated, size_t old_usize, prof_tctx_t *old_tctx); void prof_free(const void *ptr, size_t usize); #endif @@ -356,7 +363,7 @@ prof_tctx_set(const void *ptr, prof_tctx_t *tctx) } JEMALLOC_INLINE bool -prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) +prof_sample_accum_update(size_t usize, bool update, prof_tdata_t **tdata_out) { prof_tdata_t *tdata; @@ -373,19 +380,19 @@ prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out) return (true); if (tdata->bytes_until_sample >= usize) { - if (commit) + if (update) tdata->bytes_until_sample -= usize; return (true); } else { /* Compute new sample threshold. */ - if (commit) + if (update) prof_sample_threshold_update(tdata); return (tdata->active == false); } } JEMALLOC_INLINE prof_tctx_t * -prof_alloc_prep(size_t usize) +prof_alloc_prep(size_t usize, bool update) { prof_tctx_t *ret; prof_tdata_t *tdata; @@ -393,7 +400,7 @@ prof_alloc_prep(size_t usize) assert(usize == s2u(usize)); - if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata)) + if (!opt_prof_active || prof_sample_accum_update(usize, update, &tdata)) ret = (prof_tctx_t *)(uintptr_t)1U; else { bt_init(&bt, tdata->vec); @@ -412,16 +419,6 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (prof_sample_accum_update(usize, true, NULL)) { - /* - * Don't sample. For malloc()-like allocation, it is always - * possible to tell in advance how large an object's usable size - * will be, so there should never be a difference between the - * usize passed to PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)tctx == (uintptr_t)1U); - } - if ((uintptr_t)tctx > (uintptr_t)1U) prof_malloc_sample_object(ptr, usize, tctx); else @@ -429,14 +426,14 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize, - prof_tctx_t *old_tctx) +prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, bool updated, + size_t old_usize, prof_tctx_t *old_tctx) { cassert(config_prof); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (ptr != NULL) { + if (!updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); if (prof_sample_accum_update(usize, true, NULL)) { /* diff --git a/src/jemalloc.c b/src/jemalloc.c index 3f29a857..1d4d1a8a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -886,13 +886,15 @@ imalloc_prof(size_t usize) void *p; prof_tctx_t *tctx; - tctx = prof_alloc_prep(usize); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imalloc_prof_sample(usize, tctx); else p = imalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -962,16 +964,20 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx) +imemalign_prof(size_t alignment, size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = imemalign_prof_sample(alignment, usize, tctx); else p = ipalloc(usize, alignment, false); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1013,12 +1019,9 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) goto label_oom; } - if (config_prof && opt_prof) { - prof_tctx_t *tctx; - - tctx = prof_alloc_prep(usize); - result = imemalign_prof(alignment, usize, tctx); - } else + if (config_prof && opt_prof) + result = imemalign_prof(alignment, usize); + else result = ipalloc(usize, alignment, false); if (result == NULL) goto label_oom; @@ -1087,16 +1090,20 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(size_t usize, prof_tctx_t *tctx) +icalloc_prof(size_t usize) { void *p; + prof_tctx_t *tctx; + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = icalloc_prof_sample(usize, tctx); else p = icalloc(usize); - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, usize, tctx); return (p); @@ -1136,11 +1143,8 @@ je_calloc(size_t num, size_t size) } if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(num_size); - tctx = prof_alloc_prep(usize); - ret = icalloc_prof(usize, tctx); + ret = icalloc_prof(usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(num_size); @@ -1184,19 +1188,20 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx) } JEMALLOC_ALWAYS_INLINE_C void * -irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx) +irealloc_prof(void *oldptr, size_t old_usize, size_t usize) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); + tctx = prof_alloc_prep(usize, true); if ((uintptr_t)tctx != (uintptr_t)1U) p = irealloc_prof_sample(oldptr, usize, tctx); else p = iralloc(oldptr, usize, 0, false); if (p == NULL) return (NULL); - prof_realloc(p, usize, tctx, old_usize, old_tctx); + prof_realloc(p, usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1270,11 +1275,8 @@ je_realloc(void *ptr, size_t size) old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = s2u(size); - tctx = prof_alloc_prep(usize); - ret = irealloc_prof(ptr, old_usize, usize, tctx); + ret = irealloc_prof(ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && in_valgrind)) usize = s2u(size); @@ -1477,7 +1479,7 @@ imallocx_prof(size_t size, int flags, size_t *usize) imallocx_flags_decode(size, flags, usize, &alignment, &zero, &try_tcache, &arena); - tctx = prof_alloc_prep(*usize); + tctx = prof_alloc_prep(*usize, true); if ((uintptr_t)tctx == (uintptr_t)1U) { p = imallocx_maybe_flags(size, flags, *usize, alignment, zero, try_tcache, arena); @@ -1486,8 +1488,10 @@ imallocx_prof(size_t size, int flags, size_t *usize) try_tcache, arena); } else p = NULL; - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } prof_malloc(p, *usize, tctx); return (p); @@ -1572,21 +1576,24 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena, prof_tctx_t *tctx) + arena_t *arena) { void *p; - prof_tctx_t *old_tctx; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(oldptr); - if ((uintptr_t)tctx != (uintptr_t)1U) + tctx = prof_alloc_prep(*usize, true); + if ((uintptr_t)tctx != (uintptr_t)1U) { p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, try_tcache_alloc, try_tcache_dalloc, arena, tctx); - else { + } else { p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); } - if (p == NULL) + if (p == NULL) { + prof_alloc_rollback(tctx, true); return (NULL); + } if (p == oldptr && alignment != 0) { /* @@ -1599,7 +1606,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, */ *usize = isalloc(p, config_prof); } - prof_realloc(p, *usize, tctx, old_usize, old_tctx); + prof_realloc(p, *usize, tctx, true, old_usize, old_tctx); return (p); } @@ -1641,13 +1648,10 @@ je_rallocx(void *ptr, size_t size, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); - tctx = prof_alloc_prep(usize); p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, - try_tcache_alloc, try_tcache_dalloc, arena, tctx); + try_tcache_alloc, try_tcache_dalloc, arena); if (p == NULL) goto label_oom; } else { @@ -1720,13 +1724,21 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, JEMALLOC_ALWAYS_INLINE_C size_t ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, size_t max_usize, bool zero, arena_t *arena, - prof_tctx_t *tctx) + size_t alignment, bool zero, arena_t *arena) { - size_t usize; - prof_tctx_t *old_tctx; + size_t max_usize, usize; + prof_tctx_t *old_tctx, *tctx; old_tctx = prof_tctx_get(ptr); + /* + * usize isn't knowable before ixalloc() returns when extra is non-zero. + * Therefore, compute its maximum possible value and use that in + * prof_alloc_prep() to decide whether to capture a backtrace. + * prof_realloc() will use the actual usize to decide whether to sample. + */ + max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, + alignment); + tctx = prof_alloc_prep(max_usize, false); if ((uintptr_t)tctx != (uintptr_t)1U) { usize = ixallocx_prof_sample(ptr, old_usize, size, extra, alignment, zero, max_usize, arena, tctx); @@ -1734,9 +1746,11 @@ ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); } - if (usize == old_usize) + if (usize == old_usize) { + prof_alloc_rollback(tctx, false); return (usize); - prof_realloc(ptr, usize, tctx, old_usize, old_tctx); + } + prof_realloc(ptr, usize, tctx, false, old_usize, old_tctx); return (usize); } @@ -1767,19 +1781,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_rzsize = u2rz(old_usize); if (config_prof && opt_prof) { - prof_tctx_t *tctx; - /* - * usize isn't knowable before ixalloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment); - tctx = prof_alloc_prep(max_usize); usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, - max_usize, zero, arena, tctx); + zero, arena); } else { usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero, arena); diff --git a/src/prof.c b/src/prof.c index 941e53be..9495afc4 100644 --- a/src/prof.c +++ b/src/prof.c @@ -149,6 +149,35 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, /******************************************************************************/ +void +prof_alloc_rollback(prof_tctx_t *tctx, bool updated) +{ + prof_tdata_t *tdata; + + cassert(config_prof); + + if (updated) { + /* + * Compute a new sample threshold. This isn't very important in + * practice, because this function is rarely executed, so the + * potential for sample bias is minimal except in contrived + * programs. + */ + tdata = prof_tdata_get(true); + if ((uintptr_t)tdata > (uintptr_t)PROF_TDATA_STATE_MAX) + prof_sample_threshold_update(tctx->tdata); + } + + if ((uintptr_t)tctx > (uintptr_t)1U) { + malloc_mutex_lock(tctx->tdata->lock); + tctx->prepared = false; + if (prof_tctx_should_destroy(tctx)) + prof_tctx_destroy(tctx); + else + malloc_mutex_unlock(tctx->tdata->lock); + } +} + void prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { prof_tctx_set(ptr, tctx); @@ -160,6 +189,7 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx) { tctx->cnts.accumobjs++; tctx->cnts.accumbytes += usize; } + tctx->prepared = false; malloc_mutex_unlock(tctx->tdata->lock); } @@ -529,6 +559,8 @@ prof_tctx_should_destroy(prof_tctx_t *tctx) return (false); if (tctx->cnts.curobjs != 0) return (false); + if (tctx->prepared) + return (false); return (true); } @@ -659,6 +691,8 @@ prof_lookup(prof_bt_t *bt) malloc_mutex_lock(tdata->lock); not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); + if (!not_found) /* Note double negative! */ + ret.p->prepared = true; malloc_mutex_unlock(tdata->lock); if (not_found) { void *btkey; @@ -683,6 +717,7 @@ prof_lookup(prof_bt_t *bt) ret.p->tdata = tdata; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); ret.p->gctx = gctx; + ret.p->prepared = true; ret.p->state = prof_tctx_state_nominal; malloc_mutex_lock(tdata->lock); error = ckh_insert(&tdata->bt2tctx, btkey, ret.v);