HPA: Use a whole-shard purging heuristic.

Previously, we used only hpdata-local information to decide whether to purge.
2020-12-06 13:48:46 -08:00 · 2020-12-06 13:48:46 -08:00 · 56e85c0e47
commit 56e85c0e47
parent dc886e5608
4 changed files with 86 additions and 34 deletions
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@ -93,6 +93,12 @@ struct hpa_shard_s {
 	unsigned ind;
 	emap_t *emap;
 	/*
 	 * How many pages have we started but not yet finished purging in this
 	 * hpa shard.
 	 */
 	size_t npending_purge;
 	/*
 	 * Those stats which are copied directly into the CTL-centric hpa shard
 	 * stats.
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@ -171,6 +171,7 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
 	assert(purge_allowed == false || !hpdata->h_mid_purge);
 	hpdata->h_purge_allowed = purge_allowed;
 }
@ -192,6 +193,7 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
 static inline void
 hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
 	assert(hugify_allowed == false || !hpdata->h_mid_hugify);
 	hpdata->h_hugify_allowed = hugify_allowed;
 }
@ -313,6 +315,18 @@ hpdata_consistent(hpdata_t *hpdata) {
 	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
 		return false;
 	}
 	if (hpdata_changing_state_get(hpdata)
 	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
 		return false;
 	}
 	if (hpdata_purge_allowed_get(hpdata)
 	    != hpdata_in_psset_purge_container_get(hpdata)) {
 		return false;
 	}
 	if (hpdata_hugify_allowed_get(hpdata)
 	    != hpdata_in_psset_hugify_container_get(hpdata)) {
 		return false;
 	}
 	return true;
 }
--- a/src/hpa.c
+++ b/src/hpa.c
@ -74,6 +74,8 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->ind = ind;
 	shard->emap = emap;
 	shard->npending_purge = 0;
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
@ -141,26 +143,58 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 }
 static bool
-hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
+hpa_should_purge(hpa_shard_t *shard) {
-	if (hpdata_changing_state_get(ps) || hpdata_huge_get(ps)) {
+	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
-		return false;
+	    - shard->npending_purge;
-	}
+	/*
-	return hpa_good_hugification_candidate(shard, ps);
+	 * Another simple static check; purge whenever dirty exceeds 25% of
 	 * active.
 	 */
 	return adjusted_ndirty > psset_nactive(&shard->psset) / 4;
 }
-/*
+static void
- * Whether or not the given pageslab meets the criteria for being purged (and,
+hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 * if necessary, dehugified).
 */
 static bool
 hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
 	/* Ditto. */
 	if (hpdata_changing_state_get(ps)) {
-		return false;
+		hpdata_purge_allowed_set(ps, false);
 		hpdata_hugify_allowed_set(ps, false);
 		return;
 	}
 	/*
 	 * Hugepages are distinctly costly to purge, so do it only if they're
 	 * *particularly* full of dirty pages.  Eventually, we should use a
 	 * smarter / more dynamic heuristic for situations where we have to
 	 * manually hugify.
 	 *
 	 * In situations where we don't manually hugify, this problem is
 	 * reduced.  The "bad" situation we're trying to avoid is one's that's
 	 * common in some Linux configurations (where both enabled and defrag
 	 * are set to madvise) that can lead to long latency spikes on the first
 	 * access after a hugification.  The ideal policy in such configurations
 	 * is probably time-based for both purging and hugifying; only hugify a
 	 * hugepage if it's met the criteria for some extended period of time,
 	 * and only dehugify it if it's failed to meet the criteria for an
 	 * extended period of time.  When background threads are on, we should
 	 * try to take this hit on one of them, as well.
 	 *
 	 * I think the ideal setting is THP always enabled, and defrag set to
 	 * deferred; in that case we don't need any explicit calls on the
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 *
 	 * Anyways, our strategy to delay dehugification is to only consider
 	 * purging a hugified hugepage if it's individually dirtier than the
 	 * overall max dirty pages setting.  That setting is 1 dirty page per 4
 	 * active pages; i.e. 4/5s of hugepage pages must be active.
 	 */
 	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
 	    || hpdata_ndirty_get(ps) > HUGEPAGE_PAGES / 5) {
 		hpdata_purge_allowed_set(ps, true);
 	}
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		hpdata_hugify_allowed_set(ps, true);
 	}
 	size_t purgeable = hpdata_ndirty_get(ps);
 	return purgeable > HUGEPAGE_PAGES * 25 / 100
 	    || (purgeable > 0 && hpdata_empty(ps));
 }
 static hpdata_t *
@ -262,7 +296,9 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Gather all the metadata we'll need during the purge. */
 	bool dehugify = hpdata_huge_get(to_purge);
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(to_purge, &purge_state);
+	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
 	shard->npending_purge += num_to_purge;
 	malloc_mutex_unlock(tsdn, &shard->mtx);
@ -284,6 +320,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/* The shard updates */
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
 	if (dehugify) {
@ -299,8 +336,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_mid_purge_set(to_purge, false);
 	hpdata_alloc_allowed_set(to_purge, true);
-	hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge));
+	hpa_update_purge_hugify_eligibility(shard, to_purge);
 	hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge));
 	psset_update_end(&shard->psset, to_purge);
@ -349,15 +385,12 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
 	hpdata_mid_hugify_set(to_hugify, false);
-	hpdata_purge_allowed_set(to_hugify,
+	hpa_update_purge_hugify_eligibility(shard, to_hugify);
 	    hpa_should_purge(shard, to_hugify));
 	hpdata_hugify_allowed_set(to_hugify, false);
 	psset_update_end(&shard->psset, to_hugify);
 	return true;
 }
 static void
 hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	bool hugified;
@ -368,7 +401,11 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	do {
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		hugified = hpa_try_hugify(tsdn, shard);
-		purged = hpa_try_purge(tsdn, shard);
+
 		purged = false;
 		if (hpa_should_purge(shard)) {
 			purged = hpa_try_purge(tsdn, shard);
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	} while ((hugified || purged) && nloop++ < maxloops);
 }
@ -441,9 +478,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
-	if (hpa_should_hugify(shard, ps)) {
+	hpa_update_purge_hugify_eligibility(shard, ps);
 		hpdata_hugify_allowed_set(ps, true);
 	}
 	psset_update_end(&shard->psset, ps);
 	hpa_do_deferred_work(tsdn, shard);
@ -543,9 +578,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
-	if (hpa_should_hugify(shard, ps)) {
+	hpa_update_purge_hugify_eligibility(shard, ps);
 		hpdata_hugify_allowed_set(ps, true);
 	}
 	psset_update_end(&shard->psset, ps);
 	/*
@ -653,9 +686,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	if (hpa_should_purge(shard, ps)) {
+
-		hpdata_purge_allowed_set(ps, true);
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	}
 	psset_update_end(&shard->psset, ps);
 	hpa_do_deferred_work(tsdn, shard);
--- a/src/psset.c
+++ b/src/psset.c
@ -225,7 +225,6 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 void
 psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
 	psset_stats_insert(psset, ps);
@ -258,6 +257,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 		hpdata_in_psset_hugify_container_set(ps, false);
 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
 	}
 	hpdata_assert_consistent(ps);
 }
 hpdata_t *