Rewrite profiling thread event
This commit is contained in:
parent
0dcd576600
commit
441d88d1c7
@ -53,7 +53,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
|
||||
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
|
||||
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
|
||||
|
||||
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
|
||||
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
|
||||
void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
|
||||
size_t usize, prof_tctx_t *tctx);
|
||||
void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
|
||||
|
@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
|
||||
prof_sample_should_skip(tsd_t *tsd, size_t usize) {
|
||||
cassert(config_prof);
|
||||
|
||||
/* Fastpath: no need to load tdata */
|
||||
if (likely(prof_sample_event_wait_get(tsd) > 0)) {
|
||||
if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -102,21 +102,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Compute new sample threshold. */
|
||||
if (update) {
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
return !tdata->active;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
|
||||
prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
|
||||
prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
|
||||
prof_tctx_t *ret;
|
||||
|
||||
assert(usize == sz_s2u(usize));
|
||||
|
||||
if (!prof_active ||
|
||||
likely(prof_sample_accum_update(tsd, usize, update))) {
|
||||
if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
|
||||
ret = (prof_tctx_t *)(uintptr_t)1U;
|
||||
} else {
|
||||
ret = prof_tctx_create(tsd);
|
||||
@ -150,7 +145,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
|
||||
|
||||
if (prof_active && ptr != NULL) {
|
||||
assert(usize == isalloc(tsd_tsdn(tsd), ptr));
|
||||
if (prof_sample_accum_update(tsd, usize, true)) {
|
||||
if (prof_sample_should_skip(tsd, usize)) {
|
||||
/*
|
||||
* Don't sample. The usize passed to prof_alloc_prep()
|
||||
* was larger than what actually got allocated, so a
|
||||
@ -158,7 +153,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
|
||||
* though its actual usize was insufficient to cross the
|
||||
* sample threshold.
|
||||
*/
|
||||
prof_alloc_rollback(tsd, tctx, true);
|
||||
prof_alloc_rollback(tsd, tctx);
|
||||
tctx = (prof_tctx_t *)(uintptr_t)1U;
|
||||
}
|
||||
}
|
||||
|
@ -218,6 +218,13 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
|
||||
}
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
|
||||
return tsd_thread_allocated_get(tsd) + usize -
|
||||
tsd_thread_allocated_last_event_get(tsd) >=
|
||||
tsd_prof_sample_event_wait_get(tsd);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
|
||||
te_assert_invariants(tsd);
|
||||
|
@ -2177,8 +2177,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
dopts->arena_ind = 0;
|
||||
}
|
||||
|
||||
thread_alloc_event(tsd, usize);
|
||||
|
||||
/*
|
||||
* If dopts->alignment > 0, then ind is still 0, but usize was computed
|
||||
* in the previous if statement. Down the positive alignment path,
|
||||
@ -2187,8 +2185,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
|
||||
/* If profiling is on, get our profiling context. */
|
||||
if (config_prof && opt_prof) {
|
||||
prof_tctx_t *tctx = prof_alloc_prep(
|
||||
tsd, usize, prof_active_get_unlocked(), true);
|
||||
bool prof_active = prof_active_get_unlocked();
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
|
||||
|
||||
emap_alloc_ctx_t alloc_ctx;
|
||||
if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
|
||||
@ -2204,8 +2202,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
}
|
||||
|
||||
if (unlikely(allocation == NULL)) {
|
||||
te_alloc_rollback(tsd, usize);
|
||||
prof_alloc_rollback(tsd, tctx, true);
|
||||
prof_alloc_rollback(tsd, tctx);
|
||||
goto label_oom;
|
||||
}
|
||||
prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
|
||||
@ -2214,7 +2211,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
|
||||
ind);
|
||||
if (unlikely(allocation == NULL)) {
|
||||
te_alloc_rollback(tsd, usize);
|
||||
goto label_oom;
|
||||
}
|
||||
}
|
||||
@ -2223,6 +2219,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
* Allocation has been done at this point. We still have some
|
||||
* post-allocation work to do though.
|
||||
*/
|
||||
|
||||
thread_alloc_event(tsd, usize);
|
||||
|
||||
assert(dopts->alignment == 0
|
||||
|| ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
|
||||
|
||||
@ -3132,7 +3131,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
|
||||
prof_info_t old_prof_info;
|
||||
prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
|
||||
bool prof_active = prof_active_get_unlocked();
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
|
||||
void *p;
|
||||
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
|
||||
p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
|
||||
@ -3142,7 +3141,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
|
||||
zero, tcache, arena, hook_args);
|
||||
}
|
||||
if (unlikely(p == NULL)) {
|
||||
prof_alloc_rollback(tsd, tctx, false);
|
||||
prof_alloc_rollback(tsd, tctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -3155,8 +3154,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
|
||||
* be the same as the current usize because of in-place large
|
||||
* reallocation. Therefore, query the actual value of usize.
|
||||
*/
|
||||
assert(*usize >= isalloc(tsd_tsdn(tsd), p));
|
||||
*usize = isalloc(tsd_tsdn(tsd), p);
|
||||
}
|
||||
|
||||
prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
|
||||
old_usize, &old_prof_info);
|
||||
|
||||
@ -3214,11 +3215,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
|
||||
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
|
||||
goto label_oom;
|
||||
}
|
||||
thread_alloc_event(tsd, usize);
|
||||
p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
|
||||
zero, tcache, arena, &alloc_ctx, &hook_args);
|
||||
if (unlikely(p == NULL)) {
|
||||
te_alloc_rollback(tsd, usize);
|
||||
goto label_oom;
|
||||
}
|
||||
} else {
|
||||
@ -3228,9 +3227,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
|
||||
goto label_oom;
|
||||
}
|
||||
usize = isalloc(tsd_tsdn(tsd), p);
|
||||
thread_alloc_event(tsd, usize);
|
||||
}
|
||||
assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
|
||||
thread_alloc_event(tsd, usize);
|
||||
thread_dalloc_event(tsd, old_usize);
|
||||
|
||||
UTRACE(ptr, size, p);
|
||||
@ -3416,9 +3415,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
|
||||
usize_max = SC_LARGE_MAXCLASS;
|
||||
}
|
||||
}
|
||||
thread_alloc_event(tsd, usize_max);
|
||||
bool prof_active = prof_active_get_unlocked();
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
|
||||
|
||||
size_t usize;
|
||||
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
|
||||
@ -3428,18 +3426,6 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
|
||||
usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
|
||||
extra, alignment, zero);
|
||||
}
|
||||
if (usize <= usize_max) {
|
||||
te_alloc_rollback(tsd, usize_max - usize);
|
||||
} else {
|
||||
/*
|
||||
* For downsizing request, usize_max can be less than usize.
|
||||
* We here further increase thread event counters so as to
|
||||
* record the true usize, and then when the execution goes back
|
||||
* to xallocx(), the entire usize will be rolled back if it's
|
||||
* equal to the old usize.
|
||||
*/
|
||||
thread_alloc_event(tsd, usize - usize_max);
|
||||
}
|
||||
|
||||
/*
|
||||
* At this point we can still safely get the original profiling
|
||||
@ -3452,9 +3438,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
|
||||
prof_info_t prof_info;
|
||||
if (usize == old_usize) {
|
||||
prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
|
||||
prof_alloc_rollback(tsd, tctx, false);
|
||||
prof_alloc_rollback(tsd, tctx);
|
||||
} else {
|
||||
prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
|
||||
assert(usize <= usize_max);
|
||||
prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
|
||||
old_usize, &prof_info);
|
||||
}
|
||||
@ -3516,7 +3503,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
|
||||
} else {
|
||||
usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
|
||||
extra, alignment, zero);
|
||||
thread_alloc_event(tsd, usize);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3527,9 +3513,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
|
||||
== old_edata);
|
||||
|
||||
if (unlikely(usize == old_usize)) {
|
||||
te_alloc_rollback(tsd, usize);
|
||||
goto label_not_resized;
|
||||
}
|
||||
thread_alloc_event(tsd, usize);
|
||||
thread_dalloc_event(tsd, old_usize);
|
||||
|
||||
if (config_fill && malloc_slow) {
|
||||
|
17
src/prof.c
17
src/prof.c
@ -118,7 +118,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
|
||||
}
|
||||
|
||||
void
|
||||
prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
|
||||
prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
|
||||
cassert(config_prof);
|
||||
|
||||
if (tsd_reentrancy_level_get(tsd) > 0) {
|
||||
@ -126,21 +126,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
|
||||
return;
|
||||
}
|
||||
|
||||
prof_tdata_t *tdata;
|
||||
|
||||
if (updated) {
|
||||
/*
|
||||
* Compute a new sample threshold. This isn't very important in
|
||||
* practice, because this function is rarely executed, so the
|
||||
* potential for sample bias is minimal except in contrived
|
||||
* programs.
|
||||
*/
|
||||
tdata = prof_tdata_get(tsd, true);
|
||||
if (tdata != NULL) {
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((uintptr_t)tctx > (uintptr_t)1U) {
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
tctx->prepared = false;
|
||||
|
@ -78,17 +78,7 @@ te_prof_sample_event_handler(tsd_t *tsd) {
|
||||
if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
|
||||
prof_idump(tsd_tsdn(tsd));
|
||||
}
|
||||
if (!prof_active_get_unlocked()) {
|
||||
/*
|
||||
* If prof_active is off, we reset prof_sample_event_wait to be
|
||||
* the sample interval when it drops to 0, so that there won't
|
||||
* be excessive routings to the slow path, and that when
|
||||
* prof_active is turned on later, the counting for sampling
|
||||
* can immediately resume as normal.
|
||||
*/
|
||||
te_prof_sample_event_update(tsd,
|
||||
(uint64_t)(1 << lg_prof_sample));
|
||||
}
|
||||
te_tsd_prof_sample_event_init(tsd);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user