Restructure profiling

Develop new data structure and code logic for holding profiling
related information stored in the extent that may be needed after the
extent is released, which in particular is the case for the
reallocation code path (e.g. in `rallocx()` and `xallocx()`).  The
data structure is a generalization of `prof_tctx_t`: we previously
only copy out the `prof_tctx` before the extent is released, but we
may be in need of additional fields. Currently the only additional
field is the allocation time field, but there may be more fields in
the future.

The restructuring also resolved a bug: `prof_realloc()` mistakenly
passed the new `ptr` to `prof_free_sampled_object()`, but passing in
the `old_ptr` would crash because it's already been released.  Now
the essential profiling information is collectively copied out early
and safely passed to `prof_free_sampled_object()` after the extent is
released.
This commit is contained in:
Yinan Zhang
2019-11-19 16:24:57 -08:00
parent 8b2c2a596d
commit b55419f9b9
12 changed files with 82 additions and 92 deletions

View File

@@ -3009,13 +3009,11 @@ JEMALLOC_ALWAYS_INLINE void *
irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
prof_info_t old_prof_info;
prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
void *p;
bool prof_active;
prof_tctx_t *old_tctx, *tctx;
prof_active = prof_active_get_unlocked();
old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
*usize, alignment, zero, tcache, arena, tctx, hook_args);
@@ -3040,7 +3038,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
*usize = isalloc(tsd_tsdn(tsd), p);
}
prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
old_usize, old_tctx);
old_usize, &old_prof_info);
return p;
}
@@ -3262,18 +3260,15 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
JEMALLOC_ALWAYS_INLINE size_t
ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
size_t usize_max, usize;
bool prof_active;
prof_tctx_t *old_tctx, *tctx;
prof_active = prof_active_get_unlocked();
old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
prof_info_t old_prof_info;
prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info);
/*
* usize isn't knowable before ixalloc() returns when extra is non-zero.
* Therefore, compute its maximum possible value and use that in
* prof_alloc_prep() to decide whether to capture a backtrace.
* prof_realloc() will use the actual usize to decide whether to sample.
*/
size_t usize_max;
if (alignment == 0) {
usize_max = sz_s2u(size+extra);
assert(usize_max > 0
@@ -3292,8 +3287,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
}
}
thread_event(tsd, usize_max);
tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
size_t usize;
if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
size, extra, alignment, zero, tctx);
@@ -3318,7 +3315,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
return usize;
}
prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
old_tctx);
&old_prof_info);
return usize;
}

View File

@@ -367,9 +367,10 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) {
return extent_usize_get(extent);
}
prof_tctx_t *
large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
return extent_prof_tctx_get(extent);
void
large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
prof_info_t *prof_info) {
extent_prof_info_get(extent, prof_info);
}
void
@@ -382,11 +383,6 @@ large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
}
nstime_t
large_prof_alloc_time_get(const extent_t *extent) {
return extent_prof_alloc_time_get(extent);
}
void
large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
extent_prof_alloc_time_set(extent, t);

View File

@@ -187,8 +187,11 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
}
void
prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
prof_tctx_t *tctx) {
prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
assert(prof_info != NULL);
prof_tctx_t *tctx = prof_info->prof_tctx;
assert((uintptr_t)tctx > (uintptr_t)1U);
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
assert(tctx->cnts.curobjs > 0);
@@ -196,7 +199,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
tctx->cnts.curobjs--;
tctx->cnts.curbytes -= usize;
prof_try_log(tsd, ptr, usize, tctx);
prof_try_log(tsd, usize, prof_info);
if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
prof_tctx_destroy(tsd, tctx);

View File

@@ -199,7 +199,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
}
void
prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
prof_tctx_t *tctx = prof_info->prof_tctx;
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
@@ -229,7 +230,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
log_tables_initialized = true;
}
nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr);
nstime_t alloc_time = prof_info->alloc_time;
nstime_t free_time = NSTIME_ZERO_INITIALIZER;
nstime_update(&free_time);