From da50d8ce87cb21963596825ebc5faf6d8abd4d2c Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Mon, 4 Nov 2019 17:22:25 -0800 Subject: [PATCH] Refactor and optimize prof sampling initialization. Makes the prof sample prng use the tsd prng_state. This allows us to properly initialize the sample interval event, without having to create tdata. As a result, tdata will be created on demand (when a thread reaches the sample interval bytes allocated), instead of on the first allocation. --- include/jemalloc/internal/prof_externs.h | 4 +- include/jemalloc/internal/prof_inlines_b.h | 45 ++-------------------- include/jemalloc/internal/prof_structs.h | 3 -- src/prof.c | 13 +++---- src/prof_data.c | 7 ++-- src/thread_event.c | 2 +- src/tsd.c | 1 + 7 files changed, 16 insertions(+), 59 deletions(-) diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index 94fbd752..fd18ac48 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn); void prof_prefork1(tsdn_t *tsdn); void prof_postfork_parent(tsdn_t *tsdn); void prof_postfork_child(tsdn_t *tsdn); -void prof_sample_threshold_update(prof_tdata_t *tdata); +void prof_sample_threshold_update(tsd_t *tsd); void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx); bool prof_log_start(tsdn_t *tsdn, const char *filename); @@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd); bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck); prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, - uint64_t thr_discrim, char *thread_name, bool active); + uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval); void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata); void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h index b4e65c05..388537e6 100644 --- a/include/jemalloc/internal/prof_inlines_b.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -82,9 +82,7 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) { JEMALLOC_ALWAYS_INLINE bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) { - prof_tdata_t *tdata; - + prof_tdata_t **tdata_out) { cassert(config_prof); /* Fastpath: no need to load tdata */ @@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, return true; } - bool booted = prof_tdata_get(tsd, false); - tdata = prof_tdata_get(tsd, true); + prof_tdata_t *tdata = prof_tdata_get(tsd, true); if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { tdata = NULL; } @@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, return true; } - if (!booted) { - /* - * If this was the first creation of tdata, then it means that - * the previous thread_event() relied on the wrong prof_sample - * wait time, and that it should have relied on the new - * prof_sample wait time just set by prof_tdata_get(), so we - * now manually check again. - * - * If the check fails, then even though we relied on the wrong - * prof_sample wait time, we're now actually in perfect shape, - * in the sense that we can pretend that we have used the right - * prof_sample wait time. - * - * If the check succeeds, then we are now in a tougher - * situation, in the sense that we cannot pretend that we have - * used the right prof_sample wait time. A straightforward - * solution would be to fully roll back thread_event(), set the - * right prof_sample wait time, and then redo thread_event(). - * A simpler way, which is implemented below, is to just set a - * new prof_sample wait time that is usize less, and do nothing - * else. Strictly speaking, the thread event handler may end - * up in a wrong state, since it has still recorded an event - * whereas in reality there may be no event. However, the - * difference in the wait time offsets the wrongly recorded - * event, so that, functionally, the countdown to the next - * event will behave exactly as if we have used the right - * prof_sample wait time in the first place. - */ - uint64_t wait = prof_sample_event_wait_get(tsd); - assert(wait > 0); - if (usize < wait) { - thread_prof_sample_event_update(tsd, wait - usize); - return true; - } - } - /* Compute new sample threshold. */ if (update) { - prof_sample_threshold_update(tdata); + prof_sample_threshold_update(tsd); } return !tdata->active; } diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index 34ed4822..9a00a189 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -167,9 +167,6 @@ struct prof_tdata_s { */ ckh_t bt2tctx; - /* Sampling state. */ - uint64_t prng_state; - /* State used to avoid dumping while operating on prof internals. */ bool enq; bool enq_idump; diff --git a/src/prof.c b/src/prof.c index 5360662b..0590482c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) { */ tdata = prof_tdata_get(tsd, true); if (tdata != NULL) { - prof_sample_threshold_update(tdata); + prof_sample_threshold_update(tsd); } } @@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) { * -mno-sse) in order for the workaround to be complete. */ void -prof_sample_threshold_update(prof_tdata_t *tdata) { +prof_sample_threshold_update(tsd_t *tsd) { #ifdef JEMALLOC_PROF if (!config_prof) { return; } - tsd_t *tsd = tsd_fetch(); - if (lg_prof_sample == 0) { thread_prof_sample_event_update(tsd, THREAD_EVENT_MIN_START_WAIT); @@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) { * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53); + uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53); double u = (double)r * (1.0/9007199254740992.0L); uint64_t bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + (uint64_t)1U; thread_prof_sample_event_update(tsd, bytes_until_sample); - #endif } @@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) { prof_tdata_t * prof_tdata_init(tsd_t *tsd) { return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, - NULL, prof_thread_active_init_get(tsd_tsdn(tsd))); + NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false); } static char * @@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { prof_tdata_detach(tsd, tdata); return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, - active); + active, true); } void diff --git a/src/prof_data.c b/src/prof_data.c index cd92ee61..2f8bd2de 100644 --- a/src/prof_data.c +++ b/src/prof_data.c @@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) { prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, - char *thread_name, bool active) { + char *thread_name, bool active, bool reset_interval) { assert(tsd_reentrancy_level_get(tsd) == 0); prof_tdata_t *tdata; @@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, return NULL; } - tdata->prng_state = (uint64_t)(uintptr_t)tdata; - prof_sample_threshold_update(tdata); + if (reset_interval) { + prof_sample_threshold_update(tsd); + } tdata->enq = false; tdata->enq_idump = false; diff --git a/src/thread_event.c b/src/thread_event.c index f27a37aa..9f6c9271 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) { static void tsd_thread_prof_sample_event_init(tsd_t *tsd) { assert(config_prof && opt_prof); - /* Do not set sample interval until the first allocation. */ + prof_sample_threshold_update(tsd); } static void diff --git a/src/tsd.c b/src/tsd.c index 5053f12f..6e0ee93c 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -233,6 +233,7 @@ tsd_data_init(tsd_t *tsd) { *tsd_prng_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd; + /* event_init may use the prng state above. */ tsd_thread_event_init(tsd); return tsd_tcache_enabled_data_init(tsd);