diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 3bbb7cc8..245116b9 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -6,42 +6,6 @@ #include "jemalloc/internal/ql.h" #include "jemalloc/internal/typed_list.h" -/* - * How badly we want to purge some region of memory. This is a temporary - * definition; it gets deleted in the next commit (where we adopt a more - * explicit dirtiest-first policy that only considers hugification status). - */ -enum hpdata_purge_level_e { - /* - * The level number is important -- we use it as indices into an array - * of size 2 (one for each purge level). - */ - - /* "Regular" candidates for purging. */ - hpdata_purge_level_default = 0, - - /* - * Candidates for purging, but as a last resort. Practically, - * nonpreferred corresponds to hugified regions that are below the - * hugification threshold but have not yet reached the dehugification - * threshold, while strongly nonpreferred candidates are those which are - * above the hugification threshold. - */ - hpdata_purge_level_nonpreferred = 1, - hpdata_purge_level_strongly_nonpreferred = 2, - - /* Don't purge, no matter what. */ - hpdata_purge_level_never = 2, - - /* - * How big an array has to be to accomodate all purge levels. This - * relies on the fact that we don't actually keep unpurgable hpdatas in - * a container. - */ - hpdata_purge_level_count = hpdata_purge_level_never -}; -typedef enum hpdata_purge_level_e hpdata_purge_level_t; - /* * The metadata representation we use for extents in hugepages. While the PAC * uses the edata_t to represent both active and inactive extents, the HP only @@ -87,9 +51,13 @@ struct hpdata_s { bool h_alloc_allowed; bool h_in_psset_alloc_container; - /* The same, but with purging. */ - uint8_t h_purge_level; - uint8_t h_purge_container_level; + /* + * The same, but with purging. There's no corresponding + * h_in_psset_purge_container, because the psset (currently) always + * removes hpdatas from their containers during updates (to implement + * LRU for purging). + */ + bool h_purge_allowed; /* And with hugifying. */ bool h_hugify_allowed; @@ -200,26 +168,15 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) { hpdata->h_in_psset_alloc_container = in_container; } -static inline hpdata_purge_level_t -hpdata_purge_level_get(const hpdata_t *hpdata) { - return (hpdata_purge_level_t)hpdata->h_purge_level; +static inline bool +hpdata_purge_allowed_get(const hpdata_t *hpdata) { + return hpdata->h_purge_allowed; } static inline void -hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) { - assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge); - hpdata->h_purge_level = (uint8_t)level; -} - -static inline hpdata_purge_level_t -hpdata_purge_container_level_get(const hpdata_t *hpdata) { - return (hpdata_purge_level_t)hpdata->h_purge_container_level; -} - -static inline void -hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) { - assert(level != hpdata->h_purge_container_level); - hpdata->h_purge_container_level = level; +hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) { + assert(purge_allowed == false || !hpdata->h_mid_purge); + hpdata->h_purge_allowed = purge_allowed; } static inline bool @@ -357,12 +314,7 @@ hpdata_consistent(hpdata_t *hpdata) { return false; } if (hpdata_changing_state_get(hpdata) - && ((hpdata->h_purge_level != hpdata_purge_level_never) - || hpdata->h_hugify_allowed)) { - return false; - } - if (hpdata_purge_level_get(hpdata) - != hpdata_purge_container_level_get(hpdata)) { + && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) { return false; } if (hpdata_hugify_allowed_get(hpdata) diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h index 285bf6da..96fb300e 100644 --- a/include/jemalloc/internal/psset.h +++ b/include/jemalloc/internal/psset.h @@ -20,6 +20,14 @@ */ #define PSSET_NPSIZES 64 +/* + * We keep two purge lists per page size class; one for hugified hpdatas (at + * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind + + * 1). This lets us implement a preference for purging non-hugified hpdatas + * among similarly-dirty ones. + */ +#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES) + typedef struct psset_bin_stats_s psset_bin_stats_t; struct psset_bin_stats_s { /* How many pageslabs are in this bin? */ @@ -71,7 +79,9 @@ struct psset_s { */ hpdata_empty_list_t empty; /* Slabs which are available to be purged, ordered by purge level. */ - hpdata_purge_list_t to_purge[hpdata_purge_level_count]; + hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS]; + /* Bitmap for which set bits correspond to non-empty purge lists. */ + fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)]; /* Slabs which are available to be hugified. */ hpdata_hugify_list_t to_hugify; }; diff --git a/src/hpa.c b/src/hpa.c index 90fec354..7d4fa1bf 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -195,7 +195,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) { malloc_mutex_assert_owner(tsdn, &shard->mtx); if (hpdata_changing_state_get(ps)) { - hpdata_purge_level_set(ps, hpdata_purge_level_never); + hpdata_purge_allowed_set(ps, false); hpdata_hugify_allowed_set(ps, false); return; } @@ -221,23 +221,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard, * allocator's end at all; we just try to pack allocations in a * hugepage-friendly manner and let the OS hugify in the background. */ - if (hpdata_ndirty_get(ps) > 0) { - if (hpdata_huge_get(ps)) { - if (hpa_good_hugification_candidate(shard, ps)) { - hpdata_purge_level_set(ps, - hpdata_purge_level_strongly_nonpreferred); - } else if (hpdata_ndirty_get(ps) * PAGE - >= shard->opts.dehugification_threshold) { - hpdata_purge_level_set(ps, - hpdata_purge_level_nonpreferred); - } else { - hpdata_purge_level_set(ps, - hpdata_purge_level_default); - } - } else { - hpdata_purge_level_set(ps, hpdata_purge_level_default); - } - } + hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0); if (hpa_good_hugification_candidate(shard, ps) && !hpdata_huge_get(ps)) { hpdata_hugify_allowed_set(ps, true); @@ -317,7 +301,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { if (to_purge == NULL) { return false; } - assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never); + assert(hpdata_purge_allowed_get(to_purge)); assert(!hpdata_changing_state_get(to_purge)); /* @@ -328,7 +312,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { psset_update_begin(&shard->psset, to_purge); assert(hpdata_alloc_allowed_get(to_purge)); hpdata_mid_purge_set(to_purge, true); - hpdata_purge_level_set(to_purge, hpdata_purge_level_never); + hpdata_purge_allowed_set(to_purge, false); hpdata_hugify_allowed_set(to_purge, false); /* * Unlike with hugification (where concurrent @@ -413,7 +397,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { */ psset_update_begin(&shard->psset, to_hugify); hpdata_mid_hugify_set(to_hugify, true); - hpdata_purge_level_set(to_hugify, hpdata_purge_level_never); + hpdata_purge_allowed_set(to_hugify, false); hpdata_hugify_allowed_set(to_hugify, false); assert(hpdata_alloc_allowed_get(to_hugify)); psset_update_end(&shard->psset, to_hugify); diff --git a/src/hpdata.c b/src/hpdata.c index 6aee4f61..b861e9e4 100644 --- a/src/hpdata.c +++ b/src/hpdata.c @@ -24,8 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) { hpdata->h_huge = false; hpdata->h_alloc_allowed = true; hpdata->h_in_psset_alloc_container = false; - hpdata->h_purge_level = hpdata_purge_level_never; - hpdata->h_purge_container_level = hpdata_purge_level_never; + hpdata->h_purge_allowed = false; hpdata->h_hugify_allowed = false; hpdata->h_in_psset_hugify_container = false; hpdata->h_mid_purge = false; diff --git a/src/psset.c b/src/psset.c index 6de82605..c4053efc 100644 --- a/src/psset.c +++ b/src/psset.c @@ -14,9 +14,10 @@ psset_init(psset_t *psset) { memset(&psset->merged_stats, 0, sizeof(psset->merged_stats)); memset(&psset->stats, 0, sizeof(psset->stats)); hpdata_empty_list_init(&psset->empty); - for (int i = 0; i < hpdata_purge_level_count; i++) { + for (int i = 0; i < PSSET_NPURGE_LISTS; i++) { hpdata_purge_list_init(&psset->to_purge[i]); } + fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS); hpdata_hugify_list_init(&psset->to_hugify); } @@ -195,6 +196,51 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) { } } +static size_t +psset_purge_list_ind(hpdata_t *ps) { + size_t ndirty = hpdata_ndirty_get(ps); + /* Shouldn't have something with no dirty pages purgeable. */ + assert(ndirty > 0); + pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE)); + /* + * Higher indices correspond to lists we'd like to purge earlier; + * increment the index for the nonhugified hpdatas first, so that we'll + * pick them before picking hugified ones. + */ + return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1); +} + +static void +psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) { + /* + * Remove the hpdata from its purge list (if it's in one). Even if it's + * going to stay in the same one, by appending it during + * psset_update_end, we move it to the end of its queue, so that we + * purge LRU within a given dirtiness bucket. + */ + if (hpdata_purge_allowed_get(ps)) { + size_t ind = psset_purge_list_ind(ps); + hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; + hpdata_purge_list_remove(purge_list, ps); + if (hpdata_purge_list_empty(purge_list)) { + fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); + } + } +} + +static void +psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) { + if (hpdata_purge_allowed_get(ps)) { + size_t ind = psset_purge_list_ind(ps); + hpdata_purge_list_t *purge_list = &psset->to_purge[ind]; + if (hpdata_purge_list_empty(purge_list)) { + fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind); + } + hpdata_purge_list_append(purge_list, ps); + } + +} + void psset_update_begin(psset_t *psset, hpdata_t *ps) { hpdata_assert_consistent(ps); @@ -210,10 +256,11 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) { assert(hpdata_alloc_allowed_get(ps)); psset_alloc_container_remove(psset, ps); } + psset_maybe_remove_purge_list(psset, ps); /* - * We don't update presence in the purge list or hugify list; we try to - * keep those FIFO, even in the presence of other metadata updates. - * We'll update presence at the end of the metadata update if necessary. + * We don't update presence in the hugify list; we try to keep it FIFO, + * even in the presence of other metadata updates. We'll update + * presence at the end of the metadata update if necessary. */ } @@ -231,33 +278,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) { if (hpdata_alloc_allowed_get(ps)) { psset_alloc_container_insert(psset, ps); } - - if (hpdata_purge_level_get(ps) == hpdata_purge_level_never - && hpdata_purge_container_level_get(ps) - != hpdata_purge_level_never) { - /* In some purge container, but shouldn't be in any. */ - hpdata_purge_list_remove( - &psset->to_purge[hpdata_purge_container_level_get(ps)], - ps); - hpdata_purge_container_level_set(ps, hpdata_purge_level_never); - } else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never - && hpdata_purge_container_level_get(ps) - == hpdata_purge_level_never) { - /* Not in any purge container, but should be in one. */ - hpdata_purge_list_append( - &psset->to_purge[hpdata_purge_level_get(ps)], ps); - hpdata_purge_container_level_set(ps, - hpdata_purge_level_get(ps)); - } else if (hpdata_purge_level_get(ps) - != hpdata_purge_container_level_get(ps)) { - /* Should switch containers. */ - hpdata_purge_list_remove( - &psset->to_purge[hpdata_purge_container_level_get(ps)], ps); - hpdata_purge_list_append( - &psset->to_purge[hpdata_purge_level_get(ps)], ps); - hpdata_purge_container_level_set(ps, - hpdata_purge_level_get(ps)); - } + psset_maybe_insert_purge_list(psset, ps); if (hpdata_hugify_allowed_get(ps) && !hpdata_in_psset_hugify_container_get(ps)) { @@ -294,13 +315,16 @@ psset_pick_alloc(psset_t *psset, size_t size) { hpdata_t * psset_pick_purge(psset_t *psset) { - for (int i = 0; i < hpdata_purge_level_count; i++) { - hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]); - if (ps != NULL) { - return ps; - } + ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS, + PSSET_NPURGE_LISTS - 1); + if (ind_ssz < 0) { + return NULL; } - return NULL; + pszind_t ind = (pszind_t)ind_ssz; + assert(ind < PSSET_NPSIZES); + hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]); + assert(ps != NULL); + return ps; } hpdata_t * @@ -316,14 +340,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) { if (hpdata_alloc_allowed_get(ps)) { psset_alloc_container_insert(psset, ps); } - assert( - hpdata_purge_container_level_get(ps) == hpdata_purge_level_never); - if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) { - hpdata_purge_container_level_set(ps, - hpdata_purge_level_get(ps)); - hpdata_purge_list_append( - &psset->to_purge[hpdata_purge_level_get(ps)], ps); - } + psset_maybe_insert_purge_list(psset, ps); if (hpdata_hugify_allowed_get(ps)) { hpdata_in_psset_hugify_container_set(ps, true); @@ -339,11 +356,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) { if (hpdata_in_psset_alloc_container_get(ps)) { psset_alloc_container_remove(psset, ps); } - if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) { - hpdata_purge_list_remove( - &psset->to_purge[hpdata_purge_container_level_get(ps)], ps); - hpdata_purge_container_level_set(ps, hpdata_purge_level_never); - } + psset_maybe_remove_purge_list(psset, ps); if (hpdata_in_psset_hugify_container_get(ps)) { hpdata_in_psset_hugify_container_set(ps, false); hpdata_hugify_list_remove(&psset->to_hugify, ps); diff --git a/test/unit/psset.c b/test/unit/psset.c index fdc28d3d..fde403e1 100644 --- a/test/unit/psset.c +++ b/test/unit/psset.c @@ -540,6 +540,89 @@ TEST_BEGIN(test_insert_remove) { } TEST_END +TEST_BEGIN(test_purge_prefers_nonhuge) { + /* + * All else being equal, we should prefer purging non-huge pages over + * huge ones. + */ + + /* Nothing magic about this constant. */ + enum { + NHP = 23, + }; + hpdata_t *hpdata; + + psset_t psset; + psset_init(&psset); + + hpdata_t hpdata_huge[NHP]; + uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0]; + uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP]; + hpdata_t hpdata_nonhuge[NHP]; + uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0]; + uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP]; + + for (size_t i = 0; i < NHP; i++) { + hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), + 123 + i); + psset_insert(&psset, &hpdata_huge[i]); + + hpdata_init(&hpdata_nonhuge[i], + (void *)((10 + NHP + i) * HUGEPAGE), + 456 + i); + psset_insert(&psset, &hpdata_nonhuge[i]); + + } + for (int i = 0; i < 2 * NHP; i++) { + hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4); + psset_update_begin(&psset, hpdata); + void *ptr; + ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE * 3 / 4); + /* Ignore the first alloc, which will stick around. */ + (void)ptr; + /* + * The second alloc is to dirty the pages; free it immediately + * after allocating. + */ + ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE / 4); + hpdata_unreserve(hpdata, ptr, HUGEPAGE / 4); + + if (huge_begin <= (uintptr_t)hpdata + && (uintptr_t)hpdata < huge_end) { + hpdata_hugify(hpdata); + } + + hpdata_purge_allowed_set(hpdata, true); + psset_update_end(&psset, hpdata); + } + + /* + * We've got a bunch of 1/8th dirty hpdatas. It should give us all the + * non-huge ones to purge, then all the huge ones, then refuse to purge + * further. + */ + for (int i = 0; i < NHP; i++) { + hpdata = psset_pick_purge(&psset); + assert_true(nonhuge_begin <= (uintptr_t)hpdata + && (uintptr_t)hpdata < nonhuge_end, ""); + psset_update_begin(&psset, hpdata); + test_psset_fake_purge(hpdata); + hpdata_purge_allowed_set(hpdata, false); + psset_update_end(&psset, hpdata); + } + for (int i = 0; i < NHP; i++) { + hpdata = psset_pick_purge(&psset); + expect_true(huge_begin <= (uintptr_t)hpdata + && (uintptr_t)hpdata < huge_end, ""); + psset_update_begin(&psset, hpdata); + hpdata_dehugify(hpdata); + test_psset_fake_purge(hpdata); + hpdata_purge_allowed_set(hpdata, false); + psset_update_end(&psset, hpdata); + } +} +TEST_END + int main(void) { return test_no_reentrancy( @@ -550,5 +633,6 @@ main(void) { test_multi_pageslab, test_stats, test_oldest_fit, - test_insert_remove); + test_insert_remove, + test_purge_prefers_nonhuge); }