Last-N profiling mode
This commit is contained in:
45
src/ctl.c
45
src/ctl.c
@@ -113,6 +113,7 @@ CTL_PROTO(opt_prof_gdump)
|
||||
CTL_PROTO(opt_prof_final)
|
||||
CTL_PROTO(opt_prof_leak)
|
||||
CTL_PROTO(opt_prof_accum)
|
||||
CTL_PROTO(opt_prof_recent_alloc_max)
|
||||
CTL_PROTO(opt_zero_realloc)
|
||||
CTL_PROTO(tcache_create)
|
||||
CTL_PROTO(tcache_flush)
|
||||
@@ -232,6 +233,7 @@ CTL_PROTO(experimental_utilization_query)
|
||||
CTL_PROTO(experimental_utilization_batch_query)
|
||||
CTL_PROTO(experimental_arenas_i_pactivep)
|
||||
INDEX_PROTO(experimental_arenas_i)
|
||||
CTL_PROTO(experimental_prof_recent_alloc_max)
|
||||
|
||||
#define MUTEX_STATS_CTL_PROTO_GEN(n) \
|
||||
CTL_PROTO(stats_##n##_num_ops) \
|
||||
@@ -343,6 +345,7 @@ static const ctl_named_node_t opt_node[] = {
|
||||
{NAME("prof_final"), CTL(opt_prof_final)},
|
||||
{NAME("prof_leak"), CTL(opt_prof_leak)},
|
||||
{NAME("prof_accum"), CTL(opt_prof_accum)},
|
||||
{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
|
||||
{NAME("zero_realloc"), CTL(opt_zero_realloc)}
|
||||
};
|
||||
|
||||
@@ -620,10 +623,15 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
|
||||
{INDEX(experimental_arenas_i)}
|
||||
};
|
||||
|
||||
static const ctl_named_node_t experimental_prof_recent_node[] = {
|
||||
{NAME("alloc_max"), CTL(experimental_prof_recent_alloc_max)},
|
||||
};
|
||||
|
||||
static const ctl_named_node_t experimental_node[] = {
|
||||
{NAME("hooks"), CHILD(named, experimental_hooks)},
|
||||
{NAME("utilization"), CHILD(named, experimental_utilization)},
|
||||
{NAME("arenas"), CHILD(indexed, experimental_arenas)}
|
||||
{NAME("arenas"), CHILD(indexed, experimental_arenas)},
|
||||
{NAME("prof_recent"), CHILD(named, experimental_prof_recent)}
|
||||
};
|
||||
|
||||
static const ctl_named_node_t root_node[] = {
|
||||
@@ -1791,6 +1799,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
|
||||
CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
|
||||
CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
|
||||
CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
|
||||
CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
|
||||
opt_prof_recent_alloc_max, ssize_t)
|
||||
CTL_RO_NL_GEN(opt_zero_realloc,
|
||||
zero_realloc_mode_names[opt_zero_realloc_action], const char *)
|
||||
|
||||
@@ -3461,3 +3471,36 @@ label_return:
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
|
||||
size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
|
||||
int ret;
|
||||
|
||||
if (!(config_prof && opt_prof)) {
|
||||
ret = ENOENT;
|
||||
goto label_return;
|
||||
}
|
||||
|
||||
ssize_t old_max;
|
||||
if (newp != NULL) {
|
||||
if (newlen != sizeof(ssize_t)) {
|
||||
ret = EINVAL;
|
||||
goto label_return;
|
||||
}
|
||||
ssize_t max = *(ssize_t *)newp;
|
||||
if (max < -1) {
|
||||
ret = EINVAL;
|
||||
goto label_return;
|
||||
}
|
||||
old_max = prof_recent_alloc_max_ctl_write(tsd, max);
|
||||
} else {
|
||||
old_max = prof_recent_alloc_max_ctl_read();
|
||||
}
|
||||
READ(old_max, ssize_t);
|
||||
|
||||
ret = 0;
|
||||
|
||||
label_return:
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1562,6 +1562,8 @@ extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
|
||||
|
||||
bool
|
||||
extent_boot(void) {
|
||||
assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
|
||||
|
||||
if (rtree_new(&extents_rtree, true)) {
|
||||
return true;
|
||||
}
|
||||
|
@@ -1402,6 +1402,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||
CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
|
||||
CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
|
||||
CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
|
||||
CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
|
||||
"prof_recent_alloc_max", -1, SSIZE_MAX)
|
||||
}
|
||||
if (config_log) {
|
||||
if (CONF_MATCH("log")) {
|
||||
@@ -3015,7 +3017,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
|
||||
size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
|
||||
arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
|
||||
prof_info_t old_prof_info;
|
||||
prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
|
||||
prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
|
||||
bool prof_active = prof_active_get_unlocked();
|
||||
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
|
||||
void *p;
|
||||
@@ -3265,8 +3267,13 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
|
||||
size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
|
||||
/*
|
||||
* old_prof_info is only used for asserting that the profiling info
|
||||
* isn't changed by the ixalloc() call.
|
||||
*/
|
||||
prof_info_t old_prof_info;
|
||||
prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
|
||||
|
||||
/*
|
||||
* usize isn't knowable before ixalloc() returns when extra is non-zero.
|
||||
* Therefore, compute its maximum possible value and use that in
|
||||
@@ -3315,13 +3322,26 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
|
||||
*/
|
||||
thread_event(tsd, usize - usize_max);
|
||||
}
|
||||
if (usize == old_usize) {
|
||||
prof_alloc_rollback(tsd, tctx, false);
|
||||
return usize;
|
||||
}
|
||||
prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
|
||||
&old_prof_info);
|
||||
|
||||
/*
|
||||
* At this point we can still safely get the original profiling
|
||||
* information associated with the ptr, because (a) the edata_t object
|
||||
* associated with the ptr still lives and (b) the profiling info
|
||||
* fields are not touched. "(a)" is asserted in the outer je_xallocx()
|
||||
* function, and "(b)" is indirectly verified below by checking that
|
||||
* the alloc_tctx field is unchanged.
|
||||
*/
|
||||
prof_info_t prof_info;
|
||||
if (usize == old_usize) {
|
||||
prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
|
||||
prof_alloc_rollback(tsd, tctx, false);
|
||||
} else {
|
||||
prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
|
||||
prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
|
||||
old_usize, &prof_info);
|
||||
}
|
||||
|
||||
assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);
|
||||
return usize;
|
||||
}
|
||||
|
||||
@@ -3342,6 +3362,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
|
||||
tsd = tsd_fetch();
|
||||
check_entry_exit_locking(tsd_tsdn(tsd));
|
||||
|
||||
/*
|
||||
* old_edata is only for verifying that xallocx() keeps the edata_t
|
||||
* object associated with the ptr (though the content of the edata_t
|
||||
* object can be changed).
|
||||
*/
|
||||
edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
|
||||
|
||||
alloc_ctx_t alloc_ctx;
|
||||
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
|
||||
rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
|
||||
@@ -3374,6 +3401,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
|
||||
extra, alignment, zero);
|
||||
thread_event(tsd, usize);
|
||||
}
|
||||
|
||||
/*
|
||||
* xallocx() should keep using the same edata_t object (though its
|
||||
* content can be changed).
|
||||
*/
|
||||
assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
|
||||
|
||||
if (unlikely(usize == old_usize)) {
|
||||
thread_event_rollback(tsd, usize);
|
||||
goto label_not_resized;
|
||||
|
20
src/large.c
20
src/large.c
@@ -5,6 +5,7 @@
|
||||
#include "jemalloc/internal/assert.h"
|
||||
#include "jemalloc/internal/extent_mmap.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prof_recent.h"
|
||||
#include "jemalloc/internal/rtree.h"
|
||||
#include "jemalloc/internal/util.h"
|
||||
|
||||
@@ -368,8 +369,22 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
|
||||
}
|
||||
|
||||
void
|
||||
large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
|
||||
edata_prof_info_get(edata, prof_info);
|
||||
large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
|
||||
bool reset_recent) {
|
||||
assert(prof_info != NULL);
|
||||
nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
|
||||
|
||||
prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
|
||||
prof_info->alloc_tctx = alloc_tctx;
|
||||
|
||||
if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
|
||||
/*
|
||||
* This allocation was a prof sample. Reset the pointer on the
|
||||
* recent allocation record, so that this allocation is
|
||||
* recorded as released.
|
||||
*/
|
||||
prof_recent_alloc_reset(tsd, edata);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -388,4 +403,5 @@ large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
|
||||
nstime_t t;
|
||||
nstime_init_update(&t);
|
||||
edata_prof_alloc_time_set(edata, &t);
|
||||
edata_prof_recent_alloc_init(edata);
|
||||
}
|
||||
|
16
src/prof.c
16
src/prof.c
@@ -7,6 +7,7 @@
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/prof_data.h"
|
||||
#include "jemalloc/internal/prof_log.h"
|
||||
#include "jemalloc/internal/prof_recent.h"
|
||||
#include "jemalloc/internal/thread_event.h"
|
||||
|
||||
/*
|
||||
@@ -146,7 +147,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
|
||||
void
|
||||
prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
|
||||
prof_tctx_t *tctx) {
|
||||
prof_info_set(tsd, ptr, tctx);
|
||||
edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
|
||||
prof_info_set(tsd, edata, tctx);
|
||||
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
tctx->cnts.curobjs++;
|
||||
@@ -155,8 +157,13 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
|
||||
tctx->cnts.accumobjs++;
|
||||
tctx->cnts.accumbytes += usize;
|
||||
}
|
||||
bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
|
||||
tctx->prepared = false;
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
if (record_recent) {
|
||||
assert(tctx == edata_prof_tctx_get(edata));
|
||||
prof_recent_alloc(tsd, edata, usize);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1068,6 +1075,10 @@ prof_boot2(tsd_t *tsd) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (prof_recent_init()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
|
||||
b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
|
||||
CACHELINE);
|
||||
@@ -1134,6 +1145,7 @@ prof_prefork1(tsdn_t *tsdn) {
|
||||
malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
|
||||
malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
|
||||
malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
|
||||
malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1142,6 +1154,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
|
||||
if (config_prof && opt_prof) {
|
||||
unsigned i;
|
||||
|
||||
malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
|
||||
malloc_mutex_postfork_parent(tsdn,
|
||||
&prof_thread_active_init_mtx);
|
||||
malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
|
||||
@@ -1166,6 +1179,7 @@ prof_postfork_child(tsdn_t *tsdn) {
|
||||
if (config_prof && opt_prof) {
|
||||
unsigned i;
|
||||
|
||||
malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
|
||||
malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
|
||||
malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
|
||||
malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
|
||||
|
@@ -378,6 +378,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
|
||||
ret.p->tdata = tdata;
|
||||
ret.p->thr_uid = tdata->thr_uid;
|
||||
ret.p->thr_discrim = tdata->thr_discrim;
|
||||
ret.p->recent_count = 0;
|
||||
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
||||
ret.p->gctx = gctx;
|
||||
ret.p->tctx_uid = tdata->tctx_uid_next++;
|
||||
@@ -405,8 +406,15 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
|
||||
|
||||
prof_tctx_t *
|
||||
prof_tctx_create(tsd_t *tsd) {
|
||||
prof_tdata_t *tdata = prof_tdata_get(tsd, false);
|
||||
assert(tdata != NULL);
|
||||
if (tsd_reentrancy_level_get(tsd) > 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prof_tdata_t *tdata = prof_tdata_get(tsd, true);
|
||||
if (tdata == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prof_bt_t bt;
|
||||
bt_init(&bt, tdata->vec);
|
||||
prof_backtrace(tsd, &bt);
|
||||
@@ -1417,6 +1425,9 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
|
||||
if (tctx->prepared) {
|
||||
return false;
|
||||
}
|
||||
if (tctx->recent_count != 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
553
src/prof_recent.c
Normal file
553
src/prof_recent.c
Normal file
@@ -0,0 +1,553 @@
|
||||
#define JEMALLOC_PROF_RECENT_C_
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
||||
|
||||
#include "jemalloc/internal/assert.h"
|
||||
#include "jemalloc/internal/emitter.h"
|
||||
#include "jemalloc/internal/prof_data.h"
|
||||
#include "jemalloc/internal/prof_recent.h"
|
||||
|
||||
#ifndef JEMALLOC_JET
|
||||
# define STATIC_INLINE_IF_NOT_TEST static inline
|
||||
#else
|
||||
# define STATIC_INLINE_IF_NOT_TEST
|
||||
#endif
|
||||
|
||||
ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
|
||||
malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
|
||||
static atomic_zd_t prof_recent_alloc_max;
|
||||
static ssize_t prof_recent_alloc_count = 0;
|
||||
static prof_recent_t *prof_recent_alloc_dummy = NULL;
|
||||
|
||||
static void
|
||||
prof_recent_alloc_max_init() {
|
||||
atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
|
||||
ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline ssize_t
|
||||
prof_recent_alloc_max_get_no_lock() {
|
||||
return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline ssize_t
|
||||
prof_recent_alloc_max_get(tsd_t *tsd) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
return prof_recent_alloc_max_get_no_lock();
|
||||
}
|
||||
|
||||
static inline ssize_t
|
||||
prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
ssize_t old_max = prof_recent_alloc_max_get(tsd);
|
||||
atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
|
||||
return old_max;
|
||||
}
|
||||
|
||||
static inline void
|
||||
increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
++tctx->recent_count;
|
||||
assert(tctx->recent_count > 0);
|
||||
}
|
||||
|
||||
bool
|
||||
prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
|
||||
assert(opt_prof && prof_booted);
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
|
||||
/*
|
||||
* Check whether last-N mode is turned on without trying to acquire the
|
||||
* lock, so as to optimize for the following two scenarios:
|
||||
* (1) Last-N mode is switched off;
|
||||
* (2) Dumping, during which last-N mode is temporarily turned off so
|
||||
* as not to block sampled allocations.
|
||||
*/
|
||||
if (prof_recent_alloc_max_get_no_lock() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment recent_count to hold the tctx so that it won't be gone
|
||||
* even after tctx->tdata->lock is released. This acts as a
|
||||
* "placeholder"; the real recording of the allocation requires a lock
|
||||
* on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
|
||||
* tctx->tdata->lock has been released).
|
||||
*/
|
||||
increment_recent_count(tsd, tctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
|
||||
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
assert(tctx != NULL);
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
assert(tctx->recent_count > 0);
|
||||
--tctx->recent_count;
|
||||
prof_tctx_try_destroy(tsd, tctx);
|
||||
}
|
||||
|
||||
void
|
||||
edata_prof_recent_alloc_init(edata_t *edata) {
|
||||
edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
|
||||
}
|
||||
|
||||
static inline prof_recent_t *
|
||||
edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
|
||||
return edata_prof_recent_alloc_get_dont_call_directly(edata);
|
||||
}
|
||||
|
||||
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
|
||||
edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_t *recent_alloc =
|
||||
edata_prof_recent_alloc_get_no_lock(edata);
|
||||
assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
|
||||
return recent_alloc;
|
||||
}
|
||||
|
||||
static prof_recent_t *
|
||||
edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
|
||||
prof_recent_t *recent_alloc) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_t *old_recent_alloc =
|
||||
edata_prof_recent_alloc_get(tsd, edata);
|
||||
edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
|
||||
return old_recent_alloc;
|
||||
}
|
||||
|
||||
static void
|
||||
edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
|
||||
prof_recent_t *recent_alloc) {
|
||||
assert(recent_alloc != NULL);
|
||||
prof_recent_t *old_recent_alloc =
|
||||
edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
|
||||
assert(old_recent_alloc == NULL);
|
||||
recent_alloc->alloc_edata = edata;
|
||||
}
|
||||
|
||||
static void
|
||||
edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
|
||||
prof_recent_t *recent_alloc) {
|
||||
assert(recent_alloc != NULL);
|
||||
prof_recent_t *old_recent_alloc =
|
||||
edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
|
||||
assert(old_recent_alloc == recent_alloc);
|
||||
assert(edata == recent_alloc->alloc_edata);
|
||||
recent_alloc->alloc_edata = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function should be called right before an allocation is released, so
|
||||
* that the associated recent allocation record can contain the following
|
||||
* information:
|
||||
* (1) The allocation is released;
|
||||
* (2) The time of the deallocation; and
|
||||
* (3) The prof_tctx associated with the deallocation.
|
||||
*/
|
||||
void
|
||||
prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
|
||||
/*
|
||||
* Check whether the recent allocation record still exists without
|
||||
* trying to acquire the lock.
|
||||
*/
|
||||
if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
|
||||
/*
|
||||
* In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
|
||||
* deallocation time / tctx, which is handled later, after we check
|
||||
* again when holding the lock.
|
||||
*/
|
||||
|
||||
if (dalloc_tctx != NULL) {
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
|
||||
increment_recent_count(tsd, dalloc_tctx);
|
||||
dalloc_tctx->prepared = false;
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
|
||||
}
|
||||
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
/* Check again after acquiring the lock. */
|
||||
prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
|
||||
if (recent != NULL) {
|
||||
edata_prof_recent_alloc_reset(tsd, edata, recent);
|
||||
assert(nstime_equals_zero(&recent->dalloc_time));
|
||||
assert(recent->dalloc_tctx == NULL);
|
||||
if (dalloc_tctx != NULL) {
|
||||
nstime_update(&recent->dalloc_time);
|
||||
recent->dalloc_tctx = dalloc_tctx;
|
||||
}
|
||||
} else if (dalloc_tctx != NULL) {
|
||||
/* We lost the rase - the allocation record was just gone. */
|
||||
decrement_recent_count(tsd, dalloc_tctx);
|
||||
}
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
if (recent->alloc_edata != NULL) {
|
||||
edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
|
||||
prof_recent_alloc_begin(tsd_t *tsd) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
assert(prof_recent_alloc_dummy != NULL);
|
||||
return prof_recent_alloc_dummy->next;
|
||||
}
|
||||
|
||||
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
|
||||
prof_recent_alloc_end(tsd_t *tsd) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
assert(prof_recent_alloc_dummy != NULL);
|
||||
return prof_recent_alloc_dummy;
|
||||
}
|
||||
|
||||
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
|
||||
prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
assert(prof_recent_alloc_dummy != NULL);
|
||||
assert(node != NULL && node != prof_recent_alloc_dummy);
|
||||
return node->next;
|
||||
}
|
||||
|
||||
static bool
|
||||
prof_recent_alloc_is_empty(tsd_t *tsd) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
|
||||
assert(prof_recent_alloc_count == 0);
|
||||
return true;
|
||||
} else {
|
||||
assert(prof_recent_alloc_count > 0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prof_recent_alloc_assert_count(tsd_t *tsd) {
|
||||
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
if (config_debug) {
|
||||
ssize_t count = 0;
|
||||
prof_recent_t *n = prof_recent_alloc_begin(tsd);
|
||||
while (n != prof_recent_alloc_end(tsd)) {
|
||||
++count;
|
||||
n = prof_recent_alloc_next(tsd, n);
|
||||
}
|
||||
assert(count == prof_recent_alloc_count);
|
||||
assert(prof_recent_alloc_max_get(tsd) == -1 ||
|
||||
count <= prof_recent_alloc_max_get(tsd));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
|
||||
assert(edata != NULL);
|
||||
prof_tctx_t *tctx = edata_prof_tctx_get(edata);
|
||||
|
||||
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
|
||||
/*
|
||||
* Reserve a new prof_recent_t node if needed. If needed, we release
|
||||
* the prof_recent_alloc_mtx lock and allocate. Then, rather than
|
||||
* immediately checking for OOM, we regain the lock and try to make use
|
||||
* of the reserve node if needed. There are six scenarios:
|
||||
*
|
||||
* \ now | no need | need but OOMed | need and allocated
|
||||
* later \ | | |
|
||||
* ------------------------------------------------------------
|
||||
* no need | (1) | (2) | (3)
|
||||
* ------------------------------------------------------------
|
||||
* need | (4) | (5) | (6)
|
||||
*
|
||||
* First, "(4)" never happens, because we don't release the lock in the
|
||||
* middle if there's no need for a new node; in such cases "(1)" always
|
||||
* takes place, which is trivial.
|
||||
*
|
||||
* Out of the remaining four scenarios, "(6)" is the common case and is
|
||||
* trivial. "(5)" is also trivial, in which case we'll rollback the
|
||||
* effect of prof_recent_alloc_prepare() as expected.
|
||||
*
|
||||
* "(2)" / "(3)" occurs when the need for a new node is gone after we
|
||||
* regain the lock. If the new node is successfully allocated, i.e. in
|
||||
* the case of "(3)", we'll release it in the end; otherwise, i.e. in
|
||||
* the case of "(2)", we do nothing - we're lucky that the OOM ends up
|
||||
* doing no harm at all.
|
||||
*
|
||||
* Therefore, the only performance cost of the "release lock" ->
|
||||
* "allocate" -> "regain lock" design is the "(3)" case, but it happens
|
||||
* very rarely, so the cost is relatively small compared to the gain of
|
||||
* not having to have the lock order of prof_recent_alloc_mtx above all
|
||||
* the allocation locks.
|
||||
*/
|
||||
prof_recent_t *reserve = NULL;
|
||||
if (prof_recent_alloc_max_get(tsd) == -1 ||
|
||||
prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
|
||||
assert(prof_recent_alloc_max_get(tsd) != 0);
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
|
||||
sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
|
||||
false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
|
||||
true);
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
}
|
||||
|
||||
if (prof_recent_alloc_max_get(tsd) == 0) {
|
||||
assert(prof_recent_alloc_is_empty(tsd));
|
||||
goto label_rollback;
|
||||
}
|
||||
|
||||
assert(prof_recent_alloc_dummy != NULL);
|
||||
{
|
||||
/* Fill content into the dummy node. */
|
||||
prof_recent_t *node = prof_recent_alloc_dummy;
|
||||
node->usize = usize;
|
||||
nstime_copy(&node->alloc_time,
|
||||
edata_prof_alloc_time_get(edata));
|
||||
node->alloc_tctx = tctx;
|
||||
edata_prof_recent_alloc_set(tsd, edata, node);
|
||||
nstime_init_zero(&node->dalloc_time);
|
||||
node->dalloc_tctx = NULL;
|
||||
}
|
||||
|
||||
prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
|
||||
if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
|
||||
/* If upper limit is reached, simply shift the dummy. */
|
||||
assert(prof_recent_alloc_max_get(tsd) != -1);
|
||||
assert(!prof_recent_alloc_is_empty(tsd));
|
||||
prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
|
||||
old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
|
||||
assert(old_alloc_tctx != NULL);
|
||||
old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
|
||||
prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
|
||||
} else {
|
||||
/* Otherwise use the new node as the dummy. */
|
||||
assert(prof_recent_alloc_max_get(tsd) == -1 ||
|
||||
prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
|
||||
if (reserve == NULL) {
|
||||
goto label_rollback;
|
||||
}
|
||||
reserve->next = prof_recent_alloc_dummy->next;
|
||||
prof_recent_alloc_dummy->next = reserve;
|
||||
prof_recent_alloc_dummy = reserve;
|
||||
reserve = NULL;
|
||||
old_alloc_tctx = NULL;
|
||||
old_dalloc_tctx = NULL;
|
||||
++prof_recent_alloc_count;
|
||||
}
|
||||
|
||||
assert(!prof_recent_alloc_is_empty(tsd));
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
|
||||
if (reserve != NULL) {
|
||||
idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronously handle the tctx of the old node, so that there's no
|
||||
* simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
|
||||
* In the worst case this may delay the tctx release but it's better
|
||||
* than holding prof_recent_alloc_mtx for longer.
|
||||
*/
|
||||
if (old_alloc_tctx != NULL) {
|
||||
decrement_recent_count(tsd, old_alloc_tctx);
|
||||
}
|
||||
if (old_dalloc_tctx != NULL) {
|
||||
decrement_recent_count(tsd, old_dalloc_tctx);
|
||||
}
|
||||
return;
|
||||
|
||||
label_rollback:
|
||||
assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
if (reserve != NULL) {
|
||||
idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
|
||||
}
|
||||
decrement_recent_count(tsd, tctx);
|
||||
}
|
||||
|
||||
ssize_t
|
||||
prof_recent_alloc_max_ctl_read() {
|
||||
/* Don't bother to acquire the lock. */
|
||||
return prof_recent_alloc_max_get_no_lock();
|
||||
}
|
||||
|
||||
ssize_t
|
||||
prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
|
||||
assert(max >= -1);
|
||||
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
|
||||
const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
|
||||
|
||||
if (max == -1 || prof_recent_alloc_count <= max) {
|
||||
/* Easy case - no need to alter the list. */
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
return old_max;
|
||||
}
|
||||
|
||||
prof_recent_t *begin = prof_recent_alloc_dummy->next;
|
||||
/* For verification purpose only. */
|
||||
ssize_t count = prof_recent_alloc_count - max;
|
||||
do {
|
||||
assert(!prof_recent_alloc_is_empty(tsd));
|
||||
prof_recent_t *node = prof_recent_alloc_dummy->next;
|
||||
assert(node != prof_recent_alloc_dummy);
|
||||
prof_recent_alloc_evict_edata(tsd, node);
|
||||
prof_recent_alloc_dummy->next = node->next;
|
||||
--prof_recent_alloc_count;
|
||||
} while (prof_recent_alloc_count > max);
|
||||
prof_recent_t *end = prof_recent_alloc_dummy->next;
|
||||
assert(begin != end);
|
||||
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
|
||||
/*
|
||||
* Asynchronously handle the tctx of the to-be-deleted nodes, so that
|
||||
* there's no simultaneous holdings of prof_recent_alloc_mtx and
|
||||
* tdata->lock. In the worst case there can be slightly extra space
|
||||
* overhead taken by these nodes, but the total number of nodes at any
|
||||
* time is bounded by (max + sum(decreases)), where "max" means the
|
||||
* most recent prof_recent_alloc_max and "sum(decreases)" means the
|
||||
* sum of the deltas of all decreases in prof_recent_alloc_max in the
|
||||
* past. This (max + sum(decreases)) value is completely transparent
|
||||
* to and controlled by application.
|
||||
*/
|
||||
do {
|
||||
prof_recent_t *node = begin;
|
||||
decrement_recent_count(tsd, node->alloc_tctx);
|
||||
if (node->dalloc_tctx != NULL) {
|
||||
decrement_recent_count(tsd, node->dalloc_tctx);
|
||||
}
|
||||
begin = node->next;
|
||||
idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
|
||||
--count;
|
||||
} while (begin != end);
|
||||
assert(count == 0);
|
||||
|
||||
return old_max;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
|
||||
char bt_buf[2 * sizeof(intptr_t) + 3];
|
||||
char *s = bt_buf;
|
||||
assert(tctx != NULL);
|
||||
prof_bt_t *bt = &tctx->gctx->bt;
|
||||
for (size_t i = 0; i < bt->len; ++i) {
|
||||
malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
|
||||
emitter_json_value(emitter, emitter_type_string, &s);
|
||||
}
|
||||
}
|
||||
|
||||
#define PROF_RECENT_PRINT_BUFSIZE 4096
|
||||
void
|
||||
prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
|
||||
void *cbopaque) {
|
||||
char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
|
||||
sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
|
||||
arena_get(tsd_tsdn(tsd), 0, false), true);
|
||||
buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
|
||||
PROF_RECENT_PRINT_BUFSIZE - 1, 0};
|
||||
emitter_t emitter;
|
||||
emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
|
||||
&buf_arg);
|
||||
emitter_begin(&emitter);
|
||||
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
prof_recent_alloc_assert_count(tsd);
|
||||
|
||||
/*
|
||||
* Set prof_recent_alloc_max to 0 so that dumping won't block sampled
|
||||
* allocations: the allocations can complete but will not be recorded.
|
||||
*/
|
||||
ssize_t max = prof_recent_alloc_max_update(tsd, 0);
|
||||
|
||||
emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
|
||||
|
||||
emitter_json_array_kv_begin(&emitter, "recent_alloc");
|
||||
for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
|
||||
n != prof_recent_alloc_end(tsd);
|
||||
n = prof_recent_alloc_next(tsd, n)) {
|
||||
emitter_json_object_begin(&emitter);
|
||||
|
||||
emitter_json_kv(&emitter, "usize", emitter_type_size,
|
||||
&n->usize);
|
||||
bool released = n->alloc_edata == NULL;
|
||||
emitter_json_kv(&emitter, "released", emitter_type_bool,
|
||||
&released);
|
||||
|
||||
emitter_json_kv(&emitter, "alloc_thread_uid",
|
||||
emitter_type_uint64, &n->alloc_tctx->thr_uid);
|
||||
uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
|
||||
emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
|
||||
&alloc_time_ns);
|
||||
emitter_json_array_kv_begin(&emitter, "alloc_trace");
|
||||
dump_bt(&emitter, n->alloc_tctx);
|
||||
emitter_json_array_end(&emitter);
|
||||
|
||||
if (n->dalloc_tctx != NULL) {
|
||||
assert(released);
|
||||
emitter_json_kv(&emitter, "dalloc_thread_uid",
|
||||
emitter_type_uint64, &n->dalloc_tctx->thr_uid);
|
||||
assert(!nstime_equals_zero(&n->dalloc_time));
|
||||
uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
|
||||
emitter_json_kv(&emitter, "dalloc_time",
|
||||
emitter_type_uint64, &dalloc_time_ns);
|
||||
emitter_json_array_kv_begin(&emitter, "dalloc_trace");
|
||||
dump_bt(&emitter, n->dalloc_tctx);
|
||||
emitter_json_array_end(&emitter);
|
||||
} else {
|
||||
assert(nstime_equals_zero(&n->dalloc_time));
|
||||
}
|
||||
|
||||
emitter_json_object_end(&emitter);
|
||||
}
|
||||
emitter_json_array_end(&emitter);
|
||||
|
||||
max = prof_recent_alloc_max_update(tsd, max);
|
||||
assert(max == 0);
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
|
||||
|
||||
emitter_end(&emitter);
|
||||
buf_writer_flush(&buf_arg);
|
||||
idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
|
||||
}
|
||||
#undef PROF_RECENT_PRINT_BUFSIZE
|
||||
|
||||
bool
|
||||
prof_recent_init() {
|
||||
prof_recent_alloc_max_init();
|
||||
|
||||
if (malloc_mutex_init(&prof_recent_alloc_mtx,
|
||||
"prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
|
||||
malloc_mutex_rank_exclusive)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(prof_recent_alloc_dummy == NULL);
|
||||
prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
|
||||
TSDN_NULL, sizeof(prof_recent_t),
|
||||
sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
|
||||
arena_get(TSDN_NULL, 0, true), true);
|
||||
if (prof_recent_alloc_dummy == NULL) {
|
||||
return true;
|
||||
}
|
||||
prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
|
||||
|
||||
return false;
|
||||
}
|
Reference in New Issue
Block a user