Rewrite profiling thread event

This commit is contained in:
Yinan Zhang 2020-03-09 15:49:15 -07:00
parent 0dcd576600
commit 441d88d1c7
6 changed files with 31 additions and 68 deletions

View File

@ -53,7 +53,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
size_t usize, prof_tctx_t *tctx);
void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);

View File

@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
}
JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
prof_sample_should_skip(tsd_t *tsd, size_t usize) {
cassert(config_prof);
/* Fastpath: no need to load tdata */
if (likely(prof_sample_event_wait_get(tsd) > 0)) {
if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
return true;
}
@ -102,21 +102,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
return true;
}
/* Compute new sample threshold. */
if (update) {
prof_sample_threshold_update(tsd);
}
return !tdata->active;
}
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
prof_tctx_t *ret;
assert(usize == sz_s2u(usize));
if (!prof_active ||
likely(prof_sample_accum_update(tsd, usize, update))) {
if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
ret = (prof_tctx_t *)(uintptr_t)1U;
} else {
ret = prof_tctx_create(tsd);
@ -150,7 +145,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
if (prof_active && ptr != NULL) {
assert(usize == isalloc(tsd_tsdn(tsd), ptr));
if (prof_sample_accum_update(tsd, usize, true)) {
if (prof_sample_should_skip(tsd, usize)) {
/*
* Don't sample. The usize passed to prof_alloc_prep()
* was larger than what actually got allocated, so a
@ -158,7 +153,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
* though its actual usize was insufficient to cross the
* sample threshold.
*/
prof_alloc_rollback(tsd, tctx, true);
prof_alloc_rollback(tsd, tctx);
tctx = (prof_tctx_t *)(uintptr_t)1U;
}
}

View File

@ -218,6 +218,13 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
}
}
JEMALLOC_ALWAYS_INLINE bool
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
return tsd_thread_allocated_get(tsd) + usize -
tsd_thread_allocated_last_event_get(tsd) >=
tsd_prof_sample_event_wait_get(tsd);
}
JEMALLOC_ALWAYS_INLINE void
te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
te_assert_invariants(tsd);

View File

@ -2177,8 +2177,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
dopts->arena_ind = 0;
}
thread_alloc_event(tsd, usize);
/*
* If dopts->alignment > 0, then ind is still 0, but usize was computed
* in the previous if statement. Down the positive alignment path,
@ -2187,8 +2185,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
/* If profiling is on, get our profiling context. */
if (config_prof && opt_prof) {
prof_tctx_t *tctx = prof_alloc_prep(
tsd, usize, prof_active_get_unlocked(), true);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
emap_alloc_ctx_t alloc_ctx;
if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
@ -2204,8 +2202,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
}
if (unlikely(allocation == NULL)) {
te_alloc_rollback(tsd, usize);
prof_alloc_rollback(tsd, tctx, true);
prof_alloc_rollback(tsd, tctx);
goto label_oom;
}
prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
@ -2214,7 +2211,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
ind);
if (unlikely(allocation == NULL)) {
te_alloc_rollback(tsd, usize);
goto label_oom;
}
}
@ -2223,6 +2219,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
* Allocation has been done at this point. We still have some
* post-allocation work to do though.
*/
thread_alloc_event(tsd, usize);
assert(dopts->alignment == 0
|| ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
@ -3132,7 +3131,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
prof_info_t old_prof_info;
prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
void *p;
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@ -3142,7 +3141,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
zero, tcache, arena, hook_args);
}
if (unlikely(p == NULL)) {
prof_alloc_rollback(tsd, tctx, false);
prof_alloc_rollback(tsd, tctx);
return NULL;
}
@ -3155,8 +3154,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
* be the same as the current usize because of in-place large
* reallocation. Therefore, query the actual value of usize.
*/
assert(*usize >= isalloc(tsd_tsdn(tsd), p));
*usize = isalloc(tsd_tsdn(tsd), p);
}
prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
old_usize, &old_prof_info);
@ -3214,11 +3215,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
goto label_oom;
}
thread_alloc_event(tsd, usize);
p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
zero, tcache, arena, &alloc_ctx, &hook_args);
if (unlikely(p == NULL)) {
te_alloc_rollback(tsd, usize);
goto label_oom;
}
} else {
@ -3228,9 +3227,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
goto label_oom;
}
usize = isalloc(tsd_tsdn(tsd), p);
thread_alloc_event(tsd, usize);
}
assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
thread_alloc_event(tsd, usize);
thread_dalloc_event(tsd, old_usize);
UTRACE(ptr, size, p);
@ -3416,9 +3415,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
usize_max = SC_LARGE_MAXCLASS;
}
}
thread_alloc_event(tsd, usize_max);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
size_t usize;
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@ -3428,18 +3426,6 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
extra, alignment, zero);
}
if (usize <= usize_max) {
te_alloc_rollback(tsd, usize_max - usize);
} else {
/*
* For downsizing request, usize_max can be less than usize.
* We here further increase thread event counters so as to
* record the true usize, and then when the execution goes back
* to xallocx(), the entire usize will be rolled back if it's
* equal to the old usize.
*/
thread_alloc_event(tsd, usize - usize_max);
}
/*
* At this point we can still safely get the original profiling
@ -3452,9 +3438,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
prof_info_t prof_info;
if (usize == old_usize) {
prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
prof_alloc_rollback(tsd, tctx, false);
prof_alloc_rollback(tsd, tctx);
} else {
prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
assert(usize <= usize_max);
prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
old_usize, &prof_info);
}
@ -3516,7 +3503,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
} else {
usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
extra, alignment, zero);
thread_alloc_event(tsd, usize);
}
/*
@ -3527,9 +3513,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
== old_edata);
if (unlikely(usize == old_usize)) {
te_alloc_rollback(tsd, usize);
goto label_not_resized;
}
thread_alloc_event(tsd, usize);
thread_dalloc_event(tsd, old_usize);
if (config_fill && malloc_slow) {

View File

@ -118,7 +118,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
}
void
prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
cassert(config_prof);
if (tsd_reentrancy_level_get(tsd) > 0) {
@ -126,21 +126,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
return;
}
prof_tdata_t *tdata;
if (updated) {
/*
* Compute a new sample threshold. This isn't very important in
* practice, because this function is rarely executed, so the
* potential for sample bias is minimal except in contrived
* programs.
*/
tdata = prof_tdata_get(tsd, true);
if (tdata != NULL) {
prof_sample_threshold_update(tsd);
}
}
if ((uintptr_t)tctx > (uintptr_t)1U) {
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
tctx->prepared = false;

View File

@ -78,17 +78,7 @@ te_prof_sample_event_handler(tsd_t *tsd) {
if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
prof_idump(tsd_tsdn(tsd));
}
if (!prof_active_get_unlocked()) {
/*
* If prof_active is off, we reset prof_sample_event_wait to be
* the sample interval when it drops to 0, so that there won't
* be excessive routings to the slow path, and that when
* prof_active is turned on later, the counting for sampling
* can immediately resume as normal.
*/
te_prof_sample_event_update(tsd,
(uint64_t)(1 << lg_prof_sample));
}
te_tsd_prof_sample_event_init(tsd);
}
static void