HPA: Use dirtiest-first purging.

This seems to be practically beneficial, despite some pathological corner cases.
This commit is contained in:
David Goldblatt 2021-02-08 14:11:37 -08:00 committed by David Goldblatt
parent 0f6c420f83
commit 73ca4b8ef8
6 changed files with 179 additions and 137 deletions

View File

@ -6,42 +6,6 @@
#include "jemalloc/internal/ql.h" #include "jemalloc/internal/ql.h"
#include "jemalloc/internal/typed_list.h" #include "jemalloc/internal/typed_list.h"
/*
* How badly we want to purge some region of memory. This is a temporary
* definition; it gets deleted in the next commit (where we adopt a more
* explicit dirtiest-first policy that only considers hugification status).
*/
enum hpdata_purge_level_e {
/*
* The level number is important -- we use it as indices into an array
* of size 2 (one for each purge level).
*/
/* "Regular" candidates for purging. */
hpdata_purge_level_default = 0,
/*
* Candidates for purging, but as a last resort. Practically,
* nonpreferred corresponds to hugified regions that are below the
* hugification threshold but have not yet reached the dehugification
* threshold, while strongly nonpreferred candidates are those which are
* above the hugification threshold.
*/
hpdata_purge_level_nonpreferred = 1,
hpdata_purge_level_strongly_nonpreferred = 2,
/* Don't purge, no matter what. */
hpdata_purge_level_never = 2,
/*
* How big an array has to be to accomodate all purge levels. This
* relies on the fact that we don't actually keep unpurgable hpdatas in
* a container.
*/
hpdata_purge_level_count = hpdata_purge_level_never
};
typedef enum hpdata_purge_level_e hpdata_purge_level_t;
/* /*
* The metadata representation we use for extents in hugepages. While the PAC * The metadata representation we use for extents in hugepages. While the PAC
* uses the edata_t to represent both active and inactive extents, the HP only * uses the edata_t to represent both active and inactive extents, the HP only
@ -87,9 +51,13 @@ struct hpdata_s {
bool h_alloc_allowed; bool h_alloc_allowed;
bool h_in_psset_alloc_container; bool h_in_psset_alloc_container;
/* The same, but with purging. */ /*
uint8_t h_purge_level; * The same, but with purging. There's no corresponding
uint8_t h_purge_container_level; * h_in_psset_purge_container, because the psset (currently) always
* removes hpdatas from their containers during updates (to implement
* LRU for purging).
*/
bool h_purge_allowed;
/* And with hugifying. */ /* And with hugifying. */
bool h_hugify_allowed; bool h_hugify_allowed;
@ -200,26 +168,15 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
hpdata->h_in_psset_alloc_container = in_container; hpdata->h_in_psset_alloc_container = in_container;
} }
static inline hpdata_purge_level_t static inline bool
hpdata_purge_level_get(const hpdata_t *hpdata) { hpdata_purge_allowed_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_level; return hpdata->h_purge_allowed;
} }
static inline void static inline void
hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) { hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge); assert(purge_allowed == false || !hpdata->h_mid_purge);
hpdata->h_purge_level = (uint8_t)level; hpdata->h_purge_allowed = purge_allowed;
}
static inline hpdata_purge_level_t
hpdata_purge_container_level_get(const hpdata_t *hpdata) {
return (hpdata_purge_level_t)hpdata->h_purge_container_level;
}
static inline void
hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
assert(level != hpdata->h_purge_container_level);
hpdata->h_purge_container_level = level;
} }
static inline bool static inline bool
@ -357,12 +314,7 @@ hpdata_consistent(hpdata_t *hpdata) {
return false; return false;
} }
if (hpdata_changing_state_get(hpdata) if (hpdata_changing_state_get(hpdata)
&& ((hpdata->h_purge_level != hpdata_purge_level_never) && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
|| hpdata->h_hugify_allowed)) {
return false;
}
if (hpdata_purge_level_get(hpdata)
!= hpdata_purge_container_level_get(hpdata)) {
return false; return false;
} }
if (hpdata_hugify_allowed_get(hpdata) if (hpdata_hugify_allowed_get(hpdata)

View File

@ -20,6 +20,14 @@
*/ */
#define PSSET_NPSIZES 64 #define PSSET_NPSIZES 64
/*
* We keep two purge lists per page size class; one for hugified hpdatas (at
* index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
* 1). This lets us implement a preference for purging non-hugified hpdatas
* among similarly-dirty ones.
*/
#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
typedef struct psset_bin_stats_s psset_bin_stats_t; typedef struct psset_bin_stats_s psset_bin_stats_t;
struct psset_bin_stats_s { struct psset_bin_stats_s {
/* How many pageslabs are in this bin? */ /* How many pageslabs are in this bin? */
@ -71,7 +79,9 @@ struct psset_s {
*/ */
hpdata_empty_list_t empty; hpdata_empty_list_t empty;
/* Slabs which are available to be purged, ordered by purge level. */ /* Slabs which are available to be purged, ordered by purge level. */
hpdata_purge_list_t to_purge[hpdata_purge_level_count]; hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
/* Bitmap for which set bits correspond to non-empty purge lists. */
fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
/* Slabs which are available to be hugified. */ /* Slabs which are available to be hugified. */
hpdata_hugify_list_t to_hugify; hpdata_hugify_list_t to_hugify;
}; };

View File

@ -195,7 +195,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
hpdata_t *ps) { hpdata_t *ps) {
malloc_mutex_assert_owner(tsdn, &shard->mtx); malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpdata_changing_state_get(ps)) { if (hpdata_changing_state_get(ps)) {
hpdata_purge_level_set(ps, hpdata_purge_level_never); hpdata_purge_allowed_set(ps, false);
hpdata_hugify_allowed_set(ps, false); hpdata_hugify_allowed_set(ps, false);
return; return;
} }
@ -221,23 +221,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
* allocator's end at all; we just try to pack allocations in a * allocator's end at all; we just try to pack allocations in a
* hugepage-friendly manner and let the OS hugify in the background. * hugepage-friendly manner and let the OS hugify in the background.
*/ */
if (hpdata_ndirty_get(ps) > 0) { hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
if (hpdata_huge_get(ps)) {
if (hpa_good_hugification_candidate(shard, ps)) {
hpdata_purge_level_set(ps,
hpdata_purge_level_strongly_nonpreferred);
} else if (hpdata_ndirty_get(ps) * PAGE
>= shard->opts.dehugification_threshold) {
hpdata_purge_level_set(ps,
hpdata_purge_level_nonpreferred);
} else {
hpdata_purge_level_set(ps,
hpdata_purge_level_default);
}
} else {
hpdata_purge_level_set(ps, hpdata_purge_level_default);
}
}
if (hpa_good_hugification_candidate(shard, ps) if (hpa_good_hugification_candidate(shard, ps)
&& !hpdata_huge_get(ps)) { && !hpdata_huge_get(ps)) {
hpdata_hugify_allowed_set(ps, true); hpdata_hugify_allowed_set(ps, true);
@ -317,7 +301,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
if (to_purge == NULL) { if (to_purge == NULL) {
return false; return false;
} }
assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never); assert(hpdata_purge_allowed_get(to_purge));
assert(!hpdata_changing_state_get(to_purge)); assert(!hpdata_changing_state_get(to_purge));
/* /*
@ -328,7 +312,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_purge); psset_update_begin(&shard->psset, to_purge);
assert(hpdata_alloc_allowed_get(to_purge)); assert(hpdata_alloc_allowed_get(to_purge));
hpdata_mid_purge_set(to_purge, true); hpdata_mid_purge_set(to_purge, true);
hpdata_purge_level_set(to_purge, hpdata_purge_level_never); hpdata_purge_allowed_set(to_purge, false);
hpdata_hugify_allowed_set(to_purge, false); hpdata_hugify_allowed_set(to_purge, false);
/* /*
* Unlike with hugification (where concurrent * Unlike with hugification (where concurrent
@ -413,7 +397,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
*/ */
psset_update_begin(&shard->psset, to_hugify); psset_update_begin(&shard->psset, to_hugify);
hpdata_mid_hugify_set(to_hugify, true); hpdata_mid_hugify_set(to_hugify, true);
hpdata_purge_level_set(to_hugify, hpdata_purge_level_never); hpdata_purge_allowed_set(to_hugify, false);
hpdata_hugify_allowed_set(to_hugify, false); hpdata_hugify_allowed_set(to_hugify, false);
assert(hpdata_alloc_allowed_get(to_hugify)); assert(hpdata_alloc_allowed_get(to_hugify));
psset_update_end(&shard->psset, to_hugify); psset_update_end(&shard->psset, to_hugify);

View File

@ -24,8 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata->h_huge = false; hpdata->h_huge = false;
hpdata->h_alloc_allowed = true; hpdata->h_alloc_allowed = true;
hpdata->h_in_psset_alloc_container = false; hpdata->h_in_psset_alloc_container = false;
hpdata->h_purge_level = hpdata_purge_level_never; hpdata->h_purge_allowed = false;
hpdata->h_purge_container_level = hpdata_purge_level_never;
hpdata->h_hugify_allowed = false; hpdata->h_hugify_allowed = false;
hpdata->h_in_psset_hugify_container = false; hpdata->h_in_psset_hugify_container = false;
hpdata->h_mid_purge = false; hpdata->h_mid_purge = false;

View File

@ -14,9 +14,10 @@ psset_init(psset_t *psset) {
memset(&psset->merged_stats, 0, sizeof(psset->merged_stats)); memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
memset(&psset->stats, 0, sizeof(psset->stats)); memset(&psset->stats, 0, sizeof(psset->stats));
hpdata_empty_list_init(&psset->empty); hpdata_empty_list_init(&psset->empty);
for (int i = 0; i < hpdata_purge_level_count; i++) { for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
hpdata_purge_list_init(&psset->to_purge[i]); hpdata_purge_list_init(&psset->to_purge[i]);
} }
fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS);
hpdata_hugify_list_init(&psset->to_hugify); hpdata_hugify_list_init(&psset->to_hugify);
} }
@ -195,6 +196,51 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
} }
} }
static size_t
psset_purge_list_ind(hpdata_t *ps) {
size_t ndirty = hpdata_ndirty_get(ps);
/* Shouldn't have something with no dirty pages purgeable. */
assert(ndirty > 0);
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
/*
* Higher indices correspond to lists we'd like to purge earlier;
* increment the index for the nonhugified hpdatas first, so that we'll
* pick them before picking hugified ones.
*/
return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
}
static void
psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
/*
* Remove the hpdata from its purge list (if it's in one). Even if it's
* going to stay in the same one, by appending it during
* psset_update_end, we move it to the end of its queue, so that we
* purge LRU within a given dirtiness bucket.
*/
if (hpdata_purge_allowed_get(ps)) {
size_t ind = psset_purge_list_ind(ps);
hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
hpdata_purge_list_remove(purge_list, ps);
if (hpdata_purge_list_empty(purge_list)) {
fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
}
}
}
static void
psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
if (hpdata_purge_allowed_get(ps)) {
size_t ind = psset_purge_list_ind(ps);
hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
if (hpdata_purge_list_empty(purge_list)) {
fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
}
hpdata_purge_list_append(purge_list, ps);
}
}
void void
psset_update_begin(psset_t *psset, hpdata_t *ps) { psset_update_begin(psset_t *psset, hpdata_t *ps) {
hpdata_assert_consistent(ps); hpdata_assert_consistent(ps);
@ -210,10 +256,11 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
assert(hpdata_alloc_allowed_get(ps)); assert(hpdata_alloc_allowed_get(ps));
psset_alloc_container_remove(psset, ps); psset_alloc_container_remove(psset, ps);
} }
psset_maybe_remove_purge_list(psset, ps);
/* /*
* We don't update presence in the purge list or hugify list; we try to * We don't update presence in the hugify list; we try to keep it FIFO,
* keep those FIFO, even in the presence of other metadata updates. * even in the presence of other metadata updates. We'll update
* We'll update presence at the end of the metadata update if necessary. * presence at the end of the metadata update if necessary.
*/ */
} }
@ -231,33 +278,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
if (hpdata_alloc_allowed_get(ps)) { if (hpdata_alloc_allowed_get(ps)) {
psset_alloc_container_insert(psset, ps); psset_alloc_container_insert(psset, ps);
} }
psset_maybe_insert_purge_list(psset, ps);
if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
&& hpdata_purge_container_level_get(ps)
!= hpdata_purge_level_never) {
/* In some purge container, but shouldn't be in any. */
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)],
ps);
hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
&& hpdata_purge_container_level_get(ps)
== hpdata_purge_level_never) {
/* Not in any purge container, but should be in one. */
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
} else if (hpdata_purge_level_get(ps)
!= hpdata_purge_container_level_get(ps)) {
/* Should switch containers. */
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
}
if (hpdata_hugify_allowed_get(ps) if (hpdata_hugify_allowed_get(ps)
&& !hpdata_in_psset_hugify_container_get(ps)) { && !hpdata_in_psset_hugify_container_get(ps)) {
@ -294,13 +315,16 @@ psset_pick_alloc(psset_t *psset, size_t size) {
hpdata_t * hpdata_t *
psset_pick_purge(psset_t *psset) { psset_pick_purge(psset_t *psset) {
for (int i = 0; i < hpdata_purge_level_count; i++) { ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]); PSSET_NPURGE_LISTS - 1);
if (ps != NULL) { if (ind_ssz < 0) {
return ps;
}
}
return NULL; return NULL;
}
pszind_t ind = (pszind_t)ind_ssz;
assert(ind < PSSET_NPSIZES);
hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
assert(ps != NULL);
return ps;
} }
hpdata_t * hpdata_t *
@ -316,14 +340,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
if (hpdata_alloc_allowed_get(ps)) { if (hpdata_alloc_allowed_get(ps)) {
psset_alloc_container_insert(psset, ps); psset_alloc_container_insert(psset, ps);
} }
assert( psset_maybe_insert_purge_list(psset, ps);
hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
hpdata_purge_container_level_set(ps,
hpdata_purge_level_get(ps));
hpdata_purge_list_append(
&psset->to_purge[hpdata_purge_level_get(ps)], ps);
}
if (hpdata_hugify_allowed_get(ps)) { if (hpdata_hugify_allowed_get(ps)) {
hpdata_in_psset_hugify_container_set(ps, true); hpdata_in_psset_hugify_container_set(ps, true);
@ -339,11 +356,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
if (hpdata_in_psset_alloc_container_get(ps)) { if (hpdata_in_psset_alloc_container_get(ps)) {
psset_alloc_container_remove(psset, ps); psset_alloc_container_remove(psset, ps);
} }
if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) { psset_maybe_remove_purge_list(psset, ps);
hpdata_purge_list_remove(
&psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
}
if (hpdata_in_psset_hugify_container_get(ps)) { if (hpdata_in_psset_hugify_container_get(ps)) {
hpdata_in_psset_hugify_container_set(ps, false); hpdata_in_psset_hugify_container_set(ps, false);
hpdata_hugify_list_remove(&psset->to_hugify, ps); hpdata_hugify_list_remove(&psset->to_hugify, ps);

View File

@ -540,6 +540,89 @@ TEST_BEGIN(test_insert_remove) {
} }
TEST_END TEST_END
TEST_BEGIN(test_purge_prefers_nonhuge) {
/*
* All else being equal, we should prefer purging non-huge pages over
* huge ones.
*/
/* Nothing magic about this constant. */
enum {
NHP = 23,
};
hpdata_t *hpdata;
psset_t psset;
psset_init(&psset);
hpdata_t hpdata_huge[NHP];
uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
hpdata_t hpdata_nonhuge[NHP];
uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
for (size_t i = 0; i < NHP; i++) {
hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
123 + i);
psset_insert(&psset, &hpdata_huge[i]);
hpdata_init(&hpdata_nonhuge[i],
(void *)((10 + NHP + i) * HUGEPAGE),
456 + i);
psset_insert(&psset, &hpdata_nonhuge[i]);
}
for (int i = 0; i < 2 * NHP; i++) {
hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
psset_update_begin(&psset, hpdata);
void *ptr;
ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE * 3 / 4);
/* Ignore the first alloc, which will stick around. */
(void)ptr;
/*
* The second alloc is to dirty the pages; free it immediately
* after allocating.
*/
ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE / 4);
hpdata_unreserve(hpdata, ptr, HUGEPAGE / 4);
if (huge_begin <= (uintptr_t)hpdata
&& (uintptr_t)hpdata < huge_end) {
hpdata_hugify(hpdata);
}
hpdata_purge_allowed_set(hpdata, true);
psset_update_end(&psset, hpdata);
}
/*
* We've got a bunch of 1/8th dirty hpdatas. It should give us all the
* non-huge ones to purge, then all the huge ones, then refuse to purge
* further.
*/
for (int i = 0; i < NHP; i++) {
hpdata = psset_pick_purge(&psset);
assert_true(nonhuge_begin <= (uintptr_t)hpdata
&& (uintptr_t)hpdata < nonhuge_end, "");
psset_update_begin(&psset, hpdata);
test_psset_fake_purge(hpdata);
hpdata_purge_allowed_set(hpdata, false);
psset_update_end(&psset, hpdata);
}
for (int i = 0; i < NHP; i++) {
hpdata = psset_pick_purge(&psset);
expect_true(huge_begin <= (uintptr_t)hpdata
&& (uintptr_t)hpdata < huge_end, "");
psset_update_begin(&psset, hpdata);
hpdata_dehugify(hpdata);
test_psset_fake_purge(hpdata);
hpdata_purge_allowed_set(hpdata, false);
psset_update_end(&psset, hpdata);
}
}
TEST_END
int int
main(void) { main(void) {
return test_no_reentrancy( return test_no_reentrancy(
@ -550,5 +633,6 @@ main(void) {
test_multi_pageslab, test_multi_pageslab,
test_stats, test_stats,
test_oldest_fit, test_oldest_fit,
test_insert_remove); test_insert_remove,
test_purge_prefers_nonhuge);
} }