Refactor and optimize prof sampling initialization.
Makes the prof sample prng use the tsd prng_state. This allows us to properly initialize the sample interval event, without having to create tdata. As a result, tdata will be created on demand (when a thread reaches the sample interval bytes allocated), instead of on the first allocation.
This commit is contained in:
parent
bc774a3519
commit
da50d8ce87
@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
|
|||||||
void prof_prefork1(tsdn_t *tsdn);
|
void prof_prefork1(tsdn_t *tsdn);
|
||||||
void prof_postfork_parent(tsdn_t *tsdn);
|
void prof_postfork_parent(tsdn_t *tsdn);
|
||||||
void prof_postfork_child(tsdn_t *tsdn);
|
void prof_postfork_child(tsdn_t *tsdn);
|
||||||
void prof_sample_threshold_update(prof_tdata_t *tdata);
|
void prof_sample_threshold_update(tsd_t *tsd);
|
||||||
|
|
||||||
void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
|
void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
|
||||||
bool prof_log_start(tsdn_t *tsdn, const char *filename);
|
bool prof_log_start(tsdn_t *tsdn, const char *filename);
|
||||||
@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
|
|||||||
bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
|
bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
|
||||||
bool leakcheck);
|
bool leakcheck);
|
||||||
prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
|
prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
|
||||||
uint64_t thr_discrim, char *thread_name, bool active);
|
uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
|
||||||
void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
|
void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
|
||||||
void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
|
void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
|
||||||
|
|
||||||
|
@ -83,8 +83,6 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
|
|||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
||||||
prof_tdata_t **tdata_out) {
|
prof_tdata_t **tdata_out) {
|
||||||
prof_tdata_t *tdata;
|
|
||||||
|
|
||||||
cassert(config_prof);
|
cassert(config_prof);
|
||||||
|
|
||||||
/* Fastpath: no need to load tdata */
|
/* Fastpath: no need to load tdata */
|
||||||
@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool booted = prof_tdata_get(tsd, false);
|
prof_tdata_t *tdata = prof_tdata_get(tsd, true);
|
||||||
tdata = prof_tdata_get(tsd, true);
|
|
||||||
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
|
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
|
||||||
tdata = NULL;
|
tdata = NULL;
|
||||||
}
|
}
|
||||||
@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!booted) {
|
|
||||||
/*
|
|
||||||
* If this was the first creation of tdata, then it means that
|
|
||||||
* the previous thread_event() relied on the wrong prof_sample
|
|
||||||
* wait time, and that it should have relied on the new
|
|
||||||
* prof_sample wait time just set by prof_tdata_get(), so we
|
|
||||||
* now manually check again.
|
|
||||||
*
|
|
||||||
* If the check fails, then even though we relied on the wrong
|
|
||||||
* prof_sample wait time, we're now actually in perfect shape,
|
|
||||||
* in the sense that we can pretend that we have used the right
|
|
||||||
* prof_sample wait time.
|
|
||||||
*
|
|
||||||
* If the check succeeds, then we are now in a tougher
|
|
||||||
* situation, in the sense that we cannot pretend that we have
|
|
||||||
* used the right prof_sample wait time. A straightforward
|
|
||||||
* solution would be to fully roll back thread_event(), set the
|
|
||||||
* right prof_sample wait time, and then redo thread_event().
|
|
||||||
* A simpler way, which is implemented below, is to just set a
|
|
||||||
* new prof_sample wait time that is usize less, and do nothing
|
|
||||||
* else. Strictly speaking, the thread event handler may end
|
|
||||||
* up in a wrong state, since it has still recorded an event
|
|
||||||
* whereas in reality there may be no event. However, the
|
|
||||||
* difference in the wait time offsets the wrongly recorded
|
|
||||||
* event, so that, functionally, the countdown to the next
|
|
||||||
* event will behave exactly as if we have used the right
|
|
||||||
* prof_sample wait time in the first place.
|
|
||||||
*/
|
|
||||||
uint64_t wait = prof_sample_event_wait_get(tsd);
|
|
||||||
assert(wait > 0);
|
|
||||||
if (usize < wait) {
|
|
||||||
thread_prof_sample_event_update(tsd, wait - usize);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Compute new sample threshold. */
|
/* Compute new sample threshold. */
|
||||||
if (update) {
|
if (update) {
|
||||||
prof_sample_threshold_update(tdata);
|
prof_sample_threshold_update(tsd);
|
||||||
}
|
}
|
||||||
return !tdata->active;
|
return !tdata->active;
|
||||||
}
|
}
|
||||||
|
@ -167,9 +167,6 @@ struct prof_tdata_s {
|
|||||||
*/
|
*/
|
||||||
ckh_t bt2tctx;
|
ckh_t bt2tctx;
|
||||||
|
|
||||||
/* Sampling state. */
|
|
||||||
uint64_t prng_state;
|
|
||||||
|
|
||||||
/* State used to avoid dumping while operating on prof internals. */
|
/* State used to avoid dumping while operating on prof internals. */
|
||||||
bool enq;
|
bool enq;
|
||||||
bool enq_idump;
|
bool enq_idump;
|
||||||
|
13
src/prof.c
13
src/prof.c
@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
|
|||||||
*/
|
*/
|
||||||
tdata = prof_tdata_get(tsd, true);
|
tdata = prof_tdata_get(tsd, true);
|
||||||
if (tdata != NULL) {
|
if (tdata != NULL) {
|
||||||
prof_sample_threshold_update(tdata);
|
prof_sample_threshold_update(tsd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
|
|||||||
* -mno-sse) in order for the workaround to be complete.
|
* -mno-sse) in order for the workaround to be complete.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
prof_sample_threshold_update(prof_tdata_t *tdata) {
|
prof_sample_threshold_update(tsd_t *tsd) {
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (!config_prof) {
|
if (!config_prof) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
tsd_t *tsd = tsd_fetch();
|
|
||||||
|
|
||||||
if (lg_prof_sample == 0) {
|
if (lg_prof_sample == 0) {
|
||||||
thread_prof_sample_event_update(tsd,
|
thread_prof_sample_event_update(tsd,
|
||||||
THREAD_EVENT_MIN_START_WAIT);
|
THREAD_EVENT_MIN_START_WAIT);
|
||||||
@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
|
|||||||
* pp 500
|
* pp 500
|
||||||
* (http://luc.devroye.org/rnbookindex.html)
|
* (http://luc.devroye.org/rnbookindex.html)
|
||||||
*/
|
*/
|
||||||
uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
|
uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
|
||||||
double u = (double)r * (1.0/9007199254740992.0L);
|
double u = (double)r * (1.0/9007199254740992.0L);
|
||||||
uint64_t bytes_until_sample = (uint64_t)(log(u) /
|
uint64_t bytes_until_sample = (uint64_t)(log(u) /
|
||||||
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
|
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
|
||||||
+ (uint64_t)1U;
|
+ (uint64_t)1U;
|
||||||
thread_prof_sample_event_update(tsd, bytes_until_sample);
|
thread_prof_sample_event_update(tsd, bytes_until_sample);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
|
|||||||
prof_tdata_t *
|
prof_tdata_t *
|
||||||
prof_tdata_init(tsd_t *tsd) {
|
prof_tdata_init(tsd_t *tsd) {
|
||||||
return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
|
return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
|
||||||
NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
|
NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
|
|||||||
|
|
||||||
prof_tdata_detach(tsd, tdata);
|
prof_tdata_detach(tsd, tdata);
|
||||||
return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
|
return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
|
||||||
active);
|
active, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
|
|||||||
|
|
||||||
prof_tdata_t *
|
prof_tdata_t *
|
||||||
prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
|
prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
|
||||||
char *thread_name, bool active) {
|
char *thread_name, bool active, bool reset_interval) {
|
||||||
assert(tsd_reentrancy_level_get(tsd) == 0);
|
assert(tsd_reentrancy_level_get(tsd) == 0);
|
||||||
|
|
||||||
prof_tdata_t *tdata;
|
prof_tdata_t *tdata;
|
||||||
@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
tdata->prng_state = (uint64_t)(uintptr_t)tdata;
|
if (reset_interval) {
|
||||||
prof_sample_threshold_update(tdata);
|
prof_sample_threshold_update(tsd);
|
||||||
|
}
|
||||||
|
|
||||||
tdata->enq = false;
|
tdata->enq = false;
|
||||||
tdata->enq_idump = false;
|
tdata->enq_idump = false;
|
||||||
|
@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
|
|||||||
static void
|
static void
|
||||||
tsd_thread_prof_sample_event_init(tsd_t *tsd) {
|
tsd_thread_prof_sample_event_init(tsd_t *tsd) {
|
||||||
assert(config_prof && opt_prof);
|
assert(config_prof && opt_prof);
|
||||||
/* Do not set sample interval until the first allocation. */
|
prof_sample_threshold_update(tsd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -233,6 +233,7 @@ tsd_data_init(tsd_t *tsd) {
|
|||||||
*tsd_prng_statep_get(tsd) = config_debug ? 0 :
|
*tsd_prng_statep_get(tsd) = config_debug ? 0 :
|
||||||
(uint64_t)(uintptr_t)tsd;
|
(uint64_t)(uintptr_t)tsd;
|
||||||
|
|
||||||
|
/* event_init may use the prng state above. */
|
||||||
tsd_thread_event_init(tsd);
|
tsd_thread_event_init(tsd);
|
||||||
|
|
||||||
return tsd_tcache_enabled_data_init(tsd);
|
return tsd_tcache_enabled_data_init(tsd);
|
||||||
|
Loading…
Reference in New Issue
Block a user