Refactor and optimize prof sampling initialization.

Makes the prof sample prng use the tsd prng_state. This allows us to properly initialize the sample interval event, without having to create tdata. As a result, tdata will be created on demand (when a thread reaches the sample interval bytes allocated), instead of on the first allocation.
2019-11-04 17:22:25 -08:00
parent bc774a3519
commit da50d8ce87
7 changed files with 16 additions and 59 deletions
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(prof_tdata_t *tdata);
+void prof_sample_threshold_update(tsd_t *tsd);

 void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
@@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
    bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active);
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);

--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -82,9 +82,7 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {

 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-			 prof_tdata_t **tdata_out) {
-	prof_tdata_t *tdata;
-
+    prof_tdata_t **tdata_out) {
 	cassert(config_prof);

 	/* Fastpath: no need to load tdata */
@@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}

-	bool booted = prof_tdata_get(tsd, false);
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
 	}
@@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}

-	if (!booted) {
-		/*
-		 * If this was the first creation of tdata, then it means that
-		 * the previous thread_event() relied on the wrong prof_sample
-		 * wait time, and that it should have relied on the new
-		 * prof_sample wait time just set by prof_tdata_get(), so we
-		 * now manually check again.
-		 *
-		 * If the check fails, then even though we relied on the wrong
-		 * prof_sample wait time, we're now actually in perfect shape,
-		 * in the sense that we can pretend that we have used the right
-		 * prof_sample wait time.
-		 *
-		 * If the check succeeds, then we are now in a tougher
-		 * situation, in the sense that we cannot pretend that we have
-		 * used the right prof_sample wait time.  A straightforward
-		 * solution would be to fully roll back thread_event(), set the
-		 * right prof_sample wait time, and then redo thread_event().
-		 * A simpler way, which is implemented below, is to just set a
-		 * new prof_sample wait time that is usize less, and do nothing
-		 * else.  Strictly speaking, the thread event handler may end
-		 * up in a wrong state, since it has still recorded an event
-		 * whereas in reality there may be no event.  However, the
-		 * difference in the wait time offsets the wrongly recorded
-		 * event, so that, functionally, the countdown to the next
-		 * event will behave exactly as if we have used the right
-		 * prof_sample wait time in the first place.
-		 */
-		uint64_t wait = prof_sample_event_wait_get(tsd);
-		assert(wait > 0);
-		if (usize < wait) {
-			thread_prof_sample_event_update(tsd, wait - usize);
-			return true;
-		}
-	}
-
 	/* Compute new sample threshold. */
 	if (update) {
-		prof_sample_threshold_update(tdata);
+		prof_sample_threshold_update(tsd);
 	}
 	return !tdata->active;
 }
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -167,9 +167,6 @@ struct prof_tdata_s {
 	 */
 	ckh_t			bt2tctx;

-	/* Sampling state. */
-	uint64_t		prng_state;
-
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
 	bool			enq_idump;