Refactor and optimize prof sampling initialization.

Makes the prof sample prng use the tsd prng_state.  This allows us to properly
initialize the sample interval event, without having to create tdata.  As a
result, tdata will be created on demand (when a thread reaches the sample
interval bytes allocated), instead of on the first allocation.
This commit is contained in:
Qi Wang 2019-11-04 17:22:25 -08:00 committed by Qi Wang
parent bc774a3519
commit da50d8ce87
7 changed files with 16 additions and 59 deletions

View File

@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
void prof_prefork1(tsdn_t *tsdn); void prof_prefork1(tsdn_t *tsdn);
void prof_postfork_parent(tsdn_t *tsdn); void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn); void prof_postfork_child(tsdn_t *tsdn);
void prof_sample_threshold_update(prof_tdata_t *tdata); void prof_sample_threshold_update(tsd_t *tsd);
void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx); void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
bool prof_log_start(tsdn_t *tsdn, const char *filename); bool prof_log_start(tsdn_t *tsdn, const char *filename);
@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
bool leakcheck); bool leakcheck);
prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
uint64_t thr_discrim, char *thread_name, bool active); uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata); void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);

View File

@ -83,8 +83,6 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
JEMALLOC_ALWAYS_INLINE bool JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
prof_tdata_t **tdata_out) { prof_tdata_t **tdata_out) {
prof_tdata_t *tdata;
cassert(config_prof); cassert(config_prof);
/* Fastpath: no need to load tdata */ /* Fastpath: no need to load tdata */
@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
return true; return true;
} }
bool booted = prof_tdata_get(tsd, false); prof_tdata_t *tdata = prof_tdata_get(tsd, true);
tdata = prof_tdata_get(tsd, true);
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
tdata = NULL; tdata = NULL;
} }
@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
return true; return true;
} }
if (!booted) {
/*
* If this was the first creation of tdata, then it means that
* the previous thread_event() relied on the wrong prof_sample
* wait time, and that it should have relied on the new
* prof_sample wait time just set by prof_tdata_get(), so we
* now manually check again.
*
* If the check fails, then even though we relied on the wrong
* prof_sample wait time, we're now actually in perfect shape,
* in the sense that we can pretend that we have used the right
* prof_sample wait time.
*
* If the check succeeds, then we are now in a tougher
* situation, in the sense that we cannot pretend that we have
* used the right prof_sample wait time. A straightforward
* solution would be to fully roll back thread_event(), set the
* right prof_sample wait time, and then redo thread_event().
* A simpler way, which is implemented below, is to just set a
* new prof_sample wait time that is usize less, and do nothing
* else. Strictly speaking, the thread event handler may end
* up in a wrong state, since it has still recorded an event
* whereas in reality there may be no event. However, the
* difference in the wait time offsets the wrongly recorded
* event, so that, functionally, the countdown to the next
* event will behave exactly as if we have used the right
* prof_sample wait time in the first place.
*/
uint64_t wait = prof_sample_event_wait_get(tsd);
assert(wait > 0);
if (usize < wait) {
thread_prof_sample_event_update(tsd, wait - usize);
return true;
}
}
/* Compute new sample threshold. */ /* Compute new sample threshold. */
if (update) { if (update) {
prof_sample_threshold_update(tdata); prof_sample_threshold_update(tsd);
} }
return !tdata->active; return !tdata->active;
} }

View File

@ -167,9 +167,6 @@ struct prof_tdata_s {
*/ */
ckh_t bt2tctx; ckh_t bt2tctx;
/* Sampling state. */
uint64_t prng_state;
/* State used to avoid dumping while operating on prof internals. */ /* State used to avoid dumping while operating on prof internals. */
bool enq; bool enq;
bool enq_idump; bool enq_idump;

View File

@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
*/ */
tdata = prof_tdata_get(tsd, true); tdata = prof_tdata_get(tsd, true);
if (tdata != NULL) { if (tdata != NULL) {
prof_sample_threshold_update(tdata); prof_sample_threshold_update(tsd);
} }
} }
@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
* -mno-sse) in order for the workaround to be complete. * -mno-sse) in order for the workaround to be complete.
*/ */
void void
prof_sample_threshold_update(prof_tdata_t *tdata) { prof_sample_threshold_update(tsd_t *tsd) {
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (!config_prof) { if (!config_prof) {
return; return;
} }
tsd_t *tsd = tsd_fetch();
if (lg_prof_sample == 0) { if (lg_prof_sample == 0) {
thread_prof_sample_event_update(tsd, thread_prof_sample_event_update(tsd,
THREAD_EVENT_MIN_START_WAIT); THREAD_EVENT_MIN_START_WAIT);
@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
* pp 500 * pp 500
* (http://luc.devroye.org/rnbookindex.html) * (http://luc.devroye.org/rnbookindex.html)
*/ */
uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53); uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
double u = (double)r * (1.0/9007199254740992.0L); double u = (double)r * (1.0/9007199254740992.0L);
uint64_t bytes_until_sample = (uint64_t)(log(u) / uint64_t bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+ (uint64_t)1U; + (uint64_t)1U;
thread_prof_sample_event_update(tsd, bytes_until_sample); thread_prof_sample_event_update(tsd, bytes_until_sample);
#endif #endif
} }
@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
prof_tdata_t * prof_tdata_t *
prof_tdata_init(tsd_t *tsd) { prof_tdata_init(tsd_t *tsd) {
return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
NULL, prof_thread_active_init_get(tsd_tsdn(tsd))); NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
} }
static char * static char *
@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
prof_tdata_detach(tsd, tdata); prof_tdata_detach(tsd, tdata);
return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
active); active, true);
} }
void void

View File

@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
prof_tdata_t * prof_tdata_t *
prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
char *thread_name, bool active) { char *thread_name, bool active, bool reset_interval) {
assert(tsd_reentrancy_level_get(tsd) == 0); assert(tsd_reentrancy_level_get(tsd) == 0);
prof_tdata_t *tdata; prof_tdata_t *tdata;
@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
return NULL; return NULL;
} }
tdata->prng_state = (uint64_t)(uintptr_t)tdata; if (reset_interval) {
prof_sample_threshold_update(tdata); prof_sample_threshold_update(tsd);
}
tdata->enq = false; tdata->enq = false;
tdata->enq_idump = false; tdata->enq_idump = false;

View File

@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
static void static void
tsd_thread_prof_sample_event_init(tsd_t *tsd) { tsd_thread_prof_sample_event_init(tsd_t *tsd) {
assert(config_prof && opt_prof); assert(config_prof && opt_prof);
/* Do not set sample interval until the first allocation. */ prof_sample_threshold_update(tsd);
} }
static void static void

View File

@ -233,6 +233,7 @@ tsd_data_init(tsd_t *tsd) {
*tsd_prng_statep_get(tsd) = config_debug ? 0 : *tsd_prng_statep_get(tsd) = config_debug ? 0 :
(uint64_t)(uintptr_t)tsd; (uint64_t)(uintptr_t)tsd;
/* event_init may use the prng state above. */
tsd_thread_event_init(tsd); tsd_thread_event_init(tsd);
return tsd_tcache_enabled_data_init(tsd); return tsd_tcache_enabled_data_init(tsd);