HPA: Make purging/hugifying more principled.

Before this change, purge/hugify decisions had several sharp edges that could
lead to pathological behavior if tuning parameters weren't carefully chosen.
It's the first of a series; this introduces basic "make every hugepage with
dirty pages purgeable" functionality, and the next commit expands that
functionality to have a smarter policy for picking hugepages to purge.

Previously, the dehugify logic would *never* dehugify a hugepage unless it was
dirtier than the dehugification threshold.  This can lead to situations in which
these pages (which themselves could never be purged) would push us above the
maximum allowed dirty pages in the shard.  This forces immediate purging of any
pages deallocated in non-hugified hugepages, which in turn places nonobvious
practical limitations on the relationships between various config settings.

Instead, we make our preference not to dehugify to purge a soft one rather than
a hard one.  We'll avoid purging them, but only so long as we can do so by
purging non-hugified pages.  If we need to purge them to satisfy our dirty page
limits, or to hugify other, more worthy candidates, we'll still do so.
This commit is contained in:
David Goldblatt 2021-02-05 10:46:17 -08:00 committed by David Goldblatt
parent 6bddb92ad6
commit 0f6c420f83
5 changed files with 183 additions and 75 deletions

View File

@ -6,6 +6,42 @@
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/typed_list.h"
/*
* How badly we want to purge some region of memory. This is a temporary
* definition; it gets deleted in the next commit (where we adopt a more
* explicit dirtiest-first policy that only considers hugification status).
*/
enum hpdata_purge_level_e {
/*
* The level number is important -- we use it as indices into an array
* of size 2 (one for each purge level).
*/
/* "Regular" candidates for purging. */
hpdata_purge_level_default = 0,
/*
* Candidates for purging, but as a last resort. Practically,
* nonpreferred corresponds to hugified regions that are below the
* hugification threshold but have not yet reached the dehugification
* threshold, while strongly nonpreferred candidates are those which are
* above the hugification threshold.
*/
hpdata_purge_level_nonpreferred = 1,
hpdata_purge_level_strongly_nonpreferred = 2,
/* Don't purge, no matter what. */
hpdata_purge_level_never = 2,
/*
* How big an array has to be to accomodate all purge levels. This
* relies on the fact that we don't actually keep unpurgable hpdatas in
* a container.
*/
hpdata_purge_level_count = hpdata_purge_level_never
};
typedef enum hpdata_purge_level_e hpdata_purge_level_t;
/*
* The metadata representation we use for extents in hugepages. While the PAC
* uses the edata_t to represent both active and inactive extents, the HP only
@ -52,8 +88,8 @@ struct hpdata_s {
bool h_in_psset_alloc_container;
/* The same, but with purging. */
bool h_purge_allowed;
bool h_in_psset_purge_container;
uint8_t h_purge_level;
uint8_t h_purge_container_level;
/* And with hugifying. */
bool h_hugify_allowed;
@ -164,26 +200,26 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
hpdata->h_in_psset_alloc_container = in_container;
}
static inline bool
hpdata_purge_allowed_get(const hpdata_t *hpdata) {
return hpdata->h_purge_allowed;
static inline hpdata_purge_level_t
hpdata_purge_level_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_level;
}
static inline void
hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
assert(purge_allowed == false || !hpdata->h_mid_purge);
hpdata->h_purge_allowed = purge_allowed;
hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
hpdata->h_purge_level = (uint8_t)level;
}
static inline bool
hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
return hpdata->h_in_psset_purge_container;
static inline hpdata_purge_level_t
hpdata_purge_container_level_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_container_level;
}
static inline void
hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
assert(in_container != hpdata->h_in_psset_purge_container);
hpdata->h_in_psset_purge_container = in_container;
hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
assert(level != hpdata->h_purge_container_level);
hpdata->h_purge_container_level = level;
}
static inline bool
@ -284,6 +320,11 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
return hpdata->h_ntouched - hpdata->h_nactive;
}
static inline size_t
hpdata_nretained_get(hpdata_t *hpdata) {
return hpdata->h_nactive - hpdata->h_ntouched;
}
static inline void
hpdata_assert_empty(hpdata_t *hpdata) {
assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@ -316,11 +357,12 @@ hpdata_consistent(hpdata_t *hpdata) {
return false;
}
if (hpdata_changing_state_get(hpdata)
&& (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
&& ((hpdata->h_purge_level != hpdata_purge_level_never)
|| hpdata->h_hugify_allowed)) {
return false;
}
if (hpdata_purge_allowed_get(hpdata)
!= hpdata_in_psset_purge_container_get(hpdata)) {
if (hpdata_purge_level_get(hpdata)
!= hpdata_purge_container_level_get(hpdata)) {
return false;
}
if (hpdata_hugify_allowed_get(hpdata)

View File

@ -70,8 +70,8 @@ struct psset_s {
* allocations.
*/
hpdata_empty_list_t empty;
/* Slabs which are available to be purged. */
hpdata_purge_list_t to_purge;
/* Slabs which are available to be purged, ordered by purge level. */
hpdata_purge_list_t to_purge[hpdata_purge_level_count];
/* Slabs which are available to be hugified. */
hpdata_hugify_list_t to_hugify;
};

105
src/hpa.c
View File

@ -151,34 +151,59 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
>= shard->opts.hugification_threshold;
}
static bool
hpa_should_purge(hpa_shard_t *shard) {
static size_t
hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
return psset_ndirty(&shard->psset) - shard->npending_purge;
}
static size_t
hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (shard->opts.dirty_mult == (fxp_t)-1) {
return (size_t)-1;
}
return fxp_mul_frac(psset_nactive(&shard->psset),
shard->opts.dirty_mult);
}
static bool
hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
if (to_hugify == NULL) {
return false;
}
size_t adjusted_ndirty = psset_ndirty(&shard->psset)
- shard->npending_purge;
/*
* Another simple static check; purge whenever dirty exceeds 25% of
* active.
*/
size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
shard->opts.dirty_mult);
return adjusted_ndirty > max_ndirty;
return hpa_adjusted_ndirty(tsdn, shard)
+ hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
}
static bool
hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
return true;
}
if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
return true;
}
return false;
}
static void
hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
hpdata_t *ps) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpdata_changing_state_get(ps)) {
hpdata_purge_allowed_set(ps, false);
hpdata_purge_level_set(ps, hpdata_purge_level_never);
hpdata_hugify_allowed_set(ps, false);
return;
}
/*
* Hugepages are distinctly costly to purge, so do it only if they're
* *particularly* full of dirty pages. Eventually, we should use a
* smarter / more dynamic heuristic for situations where we have to
* manually hugify.
* Hugepages are distinctly costly to purge, so try to avoid it unless
* they're *particularly* full of dirty pages. Eventually, we should
* use a smarter / more dynamic heuristic for situations where we have
* to manually hugify.
*
* In situations where we don't manually hugify, this problem is
* reduced. The "bad" situation we're trying to avoid is one's that's
@ -195,17 +220,23 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
* deferred; in that case we don't need any explicit calls on the
* allocator's end at all; we just try to pack allocations in a
* hugepage-friendly manner and let the OS hugify in the background.
*
* Anyways, our strategy to delay dehugification is to only consider
* purging a hugified hugepage if it's individually dirtier than the
* overall max dirty pages setting. That setting is 1 dirty page per 4
* active pages; i.e. 4/5s of hugepage pages must be active.
*/
if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
|| (hpdata_ndirty_get(ps) != 0
&& hpdata_ndirty_get(ps) * PAGE
>= shard->opts.dehugification_threshold)) {
hpdata_purge_allowed_set(ps, true);
if (hpdata_ndirty_get(ps) > 0) {
if (hpdata_huge_get(ps)) {
if (hpa_good_hugification_candidate(shard, ps)) {
hpdata_purge_level_set(ps,
hpdata_purge_level_strongly_nonpreferred);
} else if (hpdata_ndirty_get(ps) * PAGE
>= shard->opts.dehugification_threshold) {
hpdata_purge_level_set(ps,
hpdata_purge_level_nonpreferred);
} else {
hpdata_purge_level_set(ps,
hpdata_purge_level_default);
}
} else {
hpdata_purge_level_set(ps, hpdata_purge_level_default);
}
}
if (hpa_good_hugification_candidate(shard, ps)
&& !hpdata_huge_get(ps)) {
@ -286,7 +317,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
if (to_purge == NULL) {
return false;
}
assert(hpdata_purge_allowed_get(to_purge));
assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
assert(!hpdata_changing_state_get(to_purge));
/*
@ -297,7 +328,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_purge);
assert(hpdata_alloc_allowed_get(to_purge));
hpdata_mid_purge_set(to_purge, true);
hpdata_purge_allowed_set(to_purge, false);
hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
hpdata_hugify_allowed_set(to_purge, false);
/*
* Unlike with hugification (where concurrent
@ -352,7 +383,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
hpdata_mid_purge_set(to_purge, false);
hpdata_alloc_allowed_set(to_purge, true);
hpa_update_purge_hugify_eligibility(shard, to_purge);
hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
psset_update_end(&shard->psset, to_purge);
@ -364,6 +395,10 @@ static bool
hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
return false;
}
hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
if (to_hugify == NULL) {
return false;
@ -378,7 +413,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
*/
psset_update_begin(&shard->psset, to_hugify);
hpdata_mid_hugify_set(to_hugify, true);
hpdata_purge_allowed_set(to_hugify, false);
hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
hpdata_hugify_allowed_set(to_hugify, false);
assert(hpdata_alloc_allowed_get(to_hugify));
psset_update_end(&shard->psset, to_hugify);
@ -401,7 +436,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_hugify);
hpdata_hugify(to_hugify);
hpdata_mid_hugify_set(to_hugify, false);
hpa_update_purge_hugify_eligibility(shard, to_hugify);
hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify);
psset_update_end(&shard->psset, to_hugify);
return true;
@ -419,7 +454,7 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
hugified = hpa_try_hugify(tsdn, shard);
purged = false;
if (hpa_should_purge(shard)) {
if (hpa_should_purge(tsdn, shard)) {
purged = hpa_try_purge(tsdn, shard);
}
malloc_mutex_assert_owner(tsdn, &shard->mtx);
@ -491,7 +526,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
return NULL;
}
hpa_update_purge_hugify_eligibility(shard, ps);
hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
psset_update_end(&shard->psset, ps);
return edata;
}
@ -703,7 +738,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
psset_update_begin(&shard->psset, ps);
hpdata_unreserve(ps, unreserve_addr, unreserve_size);
hpa_update_purge_hugify_eligibility(shard, ps);
hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
psset_update_end(&shard->psset, ps);
hpa_do_deferred_work(tsdn, shard);
}

View File

@ -24,8 +24,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata->h_huge = false;
hpdata->h_alloc_allowed = true;
hpdata->h_in_psset_alloc_container = false;
hpdata->h_purge_allowed = false;
hpdata->h_in_psset_purge_container = false;
hpdata->h_purge_level = hpdata_purge_level_never;
hpdata->h_purge_container_level = hpdata_purge_level_never;
hpdata->h_hugify_allowed = false;
hpdata->h_in_psset_hugify_container = false;
hpdata->h_mid_purge = false;

View File

@ -14,7 +14,9 @@ psset_init(psset_t *psset) {
memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
memset(&psset->stats, 0, sizeof(psset->stats));
hpdata_empty_list_init(&psset->empty);
hpdata_purge_list_init(&psset->to_purge);
for (int i = 0; i < hpdata_purge_level_count; i++) {
hpdata_purge_list_init(&psset->to_purge[i]);
}
hpdata_hugify_list_init(&psset->to_hugify);
}
@ -230,14 +232,31 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
psset_alloc_container_insert(psset, ps);
}
if (hpdata_purge_allowed_get(ps)
&& !hpdata_in_psset_purge_container_get(ps)) {
hpdata_in_psset_purge_container_set(ps, true);
hpdata_purge_list_append(&psset->to_purge, ps);
} else if (!hpdata_purge_allowed_get(ps)
&& hpdata_in_psset_purge_container_get(ps)) {
hpdata_in_psset_purge_container_set(ps, false);
hpdata_purge_list_remove(&psset->to_purge, ps);
if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
&& hpdata_purge_container_level_get(ps)
!= hpdata_purge_level_never) {
/* In some purge container, but shouldn't be in any. */
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)],
ps);
hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
&& hpdata_purge_container_level_get(ps)
== hpdata_purge_level_never) {
/* Not in any purge container, but should be in one. */
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
} else if (hpdata_purge_level_get(ps)
!= hpdata_purge_container_level_get(ps)) {
/* Should switch containers. */
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
}
if (hpdata_hugify_allowed_get(ps)
@ -275,7 +294,13 @@ psset_pick_alloc(psset_t *psset, size_t size) {
hpdata_t *
psset_pick_purge(psset_t *psset) {
return hpdata_purge_list_first(&psset->to_purge);
for (int i = 0; i < hpdata_purge_level_count; i++) {
hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]);
if (ps != NULL) {
return ps;
}
}
return NULL;
}
hpdata_t *
@ -291,10 +316,15 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
if (hpdata_alloc_allowed_get(ps)) {
psset_alloc_container_insert(psset, ps);
}
if (hpdata_purge_allowed_get(ps)) {
hpdata_in_psset_purge_container_set(ps, true);
hpdata_purge_list_append(&psset->to_purge, ps);
assert(
hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
}
if (hpdata_hugify_allowed_get(ps)) {
hpdata_in_psset_hugify_container_set(ps, true);
hpdata_hugify_list_append(&psset->to_hugify, ps);
@ -309,12 +339,13 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
if (hpdata_in_psset_alloc_container_get(ps)) {
psset_alloc_container_remove(psset, ps);
}
if (hpdata_in_psset_purge_container_get(ps)) {
hpdata_in_psset_purge_container_set(ps, false);
hpdata_purge_list_remove(&psset->to_purge, ps);
if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) {
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
}
if (hpdata_in_psset_purge_container_get(ps)) {
hpdata_in_psset_purge_container_set(ps, false);
hpdata_purge_list_remove(&psset->to_purge, ps);
if (hpdata_in_psset_hugify_container_get(ps)) {
hpdata_in_psset_hugify_container_set(ps, false);
hpdata_hugify_list_remove(&psset->to_hugify, ps);
}
}