Refactor and optimize prof sampling initialization.
Makes the prof sample prng use the tsd prng_state. This allows us to properly initialize the sample interval event, without having to create tdata. As a result, tdata will be created on demand (when a thread reaches the sample interval bytes allocated), instead of on the first allocation.
This commit is contained in:
parent
bc774a3519
commit
da50d8ce87
@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
|
||||
void prof_prefork1(tsdn_t *tsdn);
|
||||
void prof_postfork_parent(tsdn_t *tsdn);
|
||||
void prof_postfork_child(tsdn_t *tsdn);
|
||||
void prof_sample_threshold_update(prof_tdata_t *tdata);
|
||||
void prof_sample_threshold_update(tsd_t *tsd);
|
||||
|
||||
void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
|
||||
bool prof_log_start(tsdn_t *tsdn, const char *filename);
|
||||
@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
|
||||
bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
|
||||
bool leakcheck);
|
||||
prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
|
||||
uint64_t thr_discrim, char *thread_name, bool active);
|
||||
uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
|
||||
void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
|
||||
void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
|
||||
|
||||
|
@ -82,9 +82,7 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
||||
prof_tdata_t **tdata_out) {
|
||||
prof_tdata_t *tdata;
|
||||
|
||||
prof_tdata_t **tdata_out) {
|
||||
cassert(config_prof);
|
||||
|
||||
/* Fastpath: no need to load tdata */
|
||||
@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool booted = prof_tdata_get(tsd, false);
|
||||
tdata = prof_tdata_get(tsd, true);
|
||||
prof_tdata_t *tdata = prof_tdata_get(tsd, true);
|
||||
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
|
||||
tdata = NULL;
|
||||
}
|
||||
@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!booted) {
|
||||
/*
|
||||
* If this was the first creation of tdata, then it means that
|
||||
* the previous thread_event() relied on the wrong prof_sample
|
||||
* wait time, and that it should have relied on the new
|
||||
* prof_sample wait time just set by prof_tdata_get(), so we
|
||||
* now manually check again.
|
||||
*
|
||||
* If the check fails, then even though we relied on the wrong
|
||||
* prof_sample wait time, we're now actually in perfect shape,
|
||||
* in the sense that we can pretend that we have used the right
|
||||
* prof_sample wait time.
|
||||
*
|
||||
* If the check succeeds, then we are now in a tougher
|
||||
* situation, in the sense that we cannot pretend that we have
|
||||
* used the right prof_sample wait time. A straightforward
|
||||
* solution would be to fully roll back thread_event(), set the
|
||||
* right prof_sample wait time, and then redo thread_event().
|
||||
* A simpler way, which is implemented below, is to just set a
|
||||
* new prof_sample wait time that is usize less, and do nothing
|
||||
* else. Strictly speaking, the thread event handler may end
|
||||
* up in a wrong state, since it has still recorded an event
|
||||
* whereas in reality there may be no event. However, the
|
||||
* difference in the wait time offsets the wrongly recorded
|
||||
* event, so that, functionally, the countdown to the next
|
||||
* event will behave exactly as if we have used the right
|
||||
* prof_sample wait time in the first place.
|
||||
*/
|
||||
uint64_t wait = prof_sample_event_wait_get(tsd);
|
||||
assert(wait > 0);
|
||||
if (usize < wait) {
|
||||
thread_prof_sample_event_update(tsd, wait - usize);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute new sample threshold. */
|
||||
if (update) {
|
||||
prof_sample_threshold_update(tdata);
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
return !tdata->active;
|
||||
}
|
||||
|
@ -167,9 +167,6 @@ struct prof_tdata_s {
|
||||
*/
|
||||
ckh_t bt2tctx;
|
||||
|
||||
/* Sampling state. */
|
||||
uint64_t prng_state;
|
||||
|
||||
/* State used to avoid dumping while operating on prof internals. */
|
||||
bool enq;
|
||||
bool enq_idump;
|
||||
|
13
src/prof.c
13
src/prof.c
@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
|
||||
*/
|
||||
tdata = prof_tdata_get(tsd, true);
|
||||
if (tdata != NULL) {
|
||||
prof_sample_threshold_update(tdata);
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
|
||||
* -mno-sse) in order for the workaround to be complete.
|
||||
*/
|
||||
void
|
||||
prof_sample_threshold_update(prof_tdata_t *tdata) {
|
||||
prof_sample_threshold_update(tsd_t *tsd) {
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (!config_prof) {
|
||||
return;
|
||||
}
|
||||
|
||||
tsd_t *tsd = tsd_fetch();
|
||||
|
||||
if (lg_prof_sample == 0) {
|
||||
thread_prof_sample_event_update(tsd,
|
||||
THREAD_EVENT_MIN_START_WAIT);
|
||||
@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
|
||||
* pp 500
|
||||
* (http://luc.devroye.org/rnbookindex.html)
|
||||
*/
|
||||
uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
|
||||
uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
|
||||
double u = (double)r * (1.0/9007199254740992.0L);
|
||||
uint64_t bytes_until_sample = (uint64_t)(log(u) /
|
||||
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
|
||||
+ (uint64_t)1U;
|
||||
thread_prof_sample_event_update(tsd, bytes_until_sample);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
|
||||
prof_tdata_t *
|
||||
prof_tdata_init(tsd_t *tsd) {
|
||||
return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
|
||||
NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
|
||||
NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
|
||||
}
|
||||
|
||||
static char *
|
||||
@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
|
||||
|
||||
prof_tdata_detach(tsd, tdata);
|
||||
return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
|
||||
active);
|
||||
active, true);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
|
||||
|
||||
prof_tdata_t *
|
||||
prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
|
||||
char *thread_name, bool active) {
|
||||
char *thread_name, bool active, bool reset_interval) {
|
||||
assert(tsd_reentrancy_level_get(tsd) == 0);
|
||||
|
||||
prof_tdata_t *tdata;
|
||||
@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tdata->prng_state = (uint64_t)(uintptr_t)tdata;
|
||||
prof_sample_threshold_update(tdata);
|
||||
if (reset_interval) {
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
|
||||
tdata->enq = false;
|
||||
tdata->enq_idump = false;
|
||||
|
@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
|
||||
static void
|
||||
tsd_thread_prof_sample_event_init(tsd_t *tsd) {
|
||||
assert(config_prof && opt_prof);
|
||||
/* Do not set sample interval until the first allocation. */
|
||||
prof_sample_threshold_update(tsd);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user