hpdata: Add state changing helpers.

We're about to allow hugepage subextent purging; get as much of our metadata
handling ready as possible.
This commit is contained in:
David Goldblatt 2020-12-02 18:44:34 -08:00 committed by David Goldblatt
parent 9b75808be1
commit 70692cfb13
4 changed files with 331 additions and 13 deletions

View File

@ -34,6 +34,16 @@ struct hpdata_s {
uint64_t h_age; uint64_t h_age;
/* Whether or not we think the hugepage is mapped that way by the OS. */ /* Whether or not we think the hugepage is mapped that way by the OS. */
bool h_huge; bool h_huge;
/*
* Whether or not some thread is purging this hpdata (i.e. has called
* hpdata_purge_begin but not yet called hpdata_purge_end), or
* hugifying it. Only one thread at a time is allowed to change a
* hugepage's state.
*/
bool h_mid_purge;
bool h_mid_hugify;
union { union {
/* When nonempty, used by the psset bins. */ /* When nonempty, used by the psset bins. */
phn(hpdata_t) ph_link; phn(hpdata_t) ph_link;
@ -90,6 +100,22 @@ hpdata_huge_get(const hpdata_t *hpdata) {
return hpdata->h_huge; return hpdata->h_huge;
} }
static inline bool
hpdata_changing_state_get(const hpdata_t *hpdata) {
return hpdata->h_mid_purge || hpdata->h_mid_hugify;
}
static inline bool
hpdata_mid_purge_get(const hpdata_t *hpdata) {
return hpdata->h_mid_purge;
}
static inline bool
hpdata_mid_hugify_get(const hpdata_t *hpdata) {
return hpdata->h_mid_hugify;
}
static inline size_t static inline size_t
hpdata_longest_free_range_get(const hpdata_t *hpdata) { hpdata_longest_free_range_get(const hpdata_t *hpdata) {
return hpdata->h_longest_free_range; return hpdata->h_longest_free_range;
@ -106,6 +132,11 @@ hpdata_nactive_get(hpdata_t *hpdata) {
return hpdata->h_nactive; return hpdata->h_nactive;
} }
static inline size_t
hpdata_ndirty_get(hpdata_t *hpdata) {
return hpdata->h_ndirty;
}
static inline void static inline void
hpdata_assert_empty(hpdata_t *hpdata) { hpdata_assert_empty(hpdata_t *hpdata) {
assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES)); assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@ -164,20 +195,69 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz); void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz); void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
/*
* Tell the hpdata that it's now a hugepage (which, correspondingly, means that
* all its pages become dirty.
*/
void hpdata_hugify(hpdata_t *hpdata);
/*
* Tell the hpdata that it's no longer a hugepage (all its pages are still
* counted as dirty, though; an explicit purge call is required to change that).
*/
void hpdata_dehugify(hpdata_t *hpdata);
/* /*
* Tell the hpdata (which should be empty) that all dirty pages in it have been * Tell the hpdata (which should be empty) that all dirty pages in it have been
* purged. * purged.
*/ */
void hpdata_purge(hpdata_t *hpdata); void hpdata_purge(hpdata_t *hpdata);
/*
* The hpdata_purge_prepare_t allows grabbing the metadata required to purge
* subranges of a hugepage while holding a lock, drop the lock during the actual
* purging of them, and reacquire it to update the metadata again.
*/
typedef struct hpdata_purge_state_s hpdata_purge_state_t;
struct hpdata_purge_state_s {
size_t npurged;
fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
size_t next_purge_search_begin;
};
/*
* Initializes purge state. The access to hpdata must be externally
* synchronized with other hpdata_* calls.
*
* You can tell whether or not a thread is purging or hugifying a given hpdata
* via hpdata_changing_state_get(hpdata). Racing hugification or purging
* operations aren't allowed.
*
* Once you begin purging, you have to follow through and call hpdata_purge_next
* until you're done, and then end. Allocating out of an hpdata undergoing
* purging is not allowed.
*/
void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
/*
* If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
* true, and returns true. Otherwise, returns false to indicate that we're
* done.
*
* This requires exclusive access to the purge state, but *not* to the hpdata.
* In particular, unreserve calls are allowed while purging (i.e. you can dalloc
* into one part of the hpdata while purging a different part).
*/
bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
void **r_purge_addr, size_t *r_purge_size);
/*
* Updates the hpdata metadata after all purging is done. Needs external
* synchronization.
*/
void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
/*
* Similarly, when hugifying , callers can do the metadata modifications while
* holding a lock (thereby setting the change_state field), but actually do the
* operation without blocking other threads.
*/
void hpdata_hugify_begin(hpdata_t *hpdata);
void hpdata_hugify_end(hpdata_t *hpdata);
/*
* Tell the hpdata that it's no longer a hugepage (all its pages are still
* counted as dirty, though; an explicit purge call is required to change that).
*
* This should only be done after starting to purge, and before actually purging
* any contents.
*/
void hpdata_dehugify(hpdata_t *hpdata);
#endif /* JEMALLOC_INTERNAL_HPDATA_H */ #endif /* JEMALLOC_INTERNAL_HPDATA_H */

View File

@ -300,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
bool hugify = hpa_should_hugify(shard, ps); bool hugify = hpa_should_hugify(shard, ps);
if (hugify) { if (hugify) {
hpdata_hugify(ps); hpdata_hugify_begin(ps);
} }
psset_insert(&shard->psset, ps); psset_insert(&shard->psset, ps);
@ -319,6 +319,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
* operations in this hpa shard. * operations in this hpa shard.
*/ */
hpa_hugify(ps); hpa_hugify(ps);
malloc_mutex_lock(tsdn, &shard->mtx);
hpdata_hugify_end(ps);
malloc_mutex_unlock(tsdn, &shard->mtx);
} }
return edata; return edata;
} }

View File

@ -22,6 +22,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata_addr_set(hpdata, addr); hpdata_addr_set(hpdata, addr);
hpdata_age_set(hpdata, age); hpdata_age_set(hpdata, age);
hpdata->h_huge = false; hpdata->h_huge = false;
hpdata->h_mid_purge = false;
hpdata->h_mid_hugify = false;
hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES); hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
hpdata->h_nactive = 0; hpdata->h_nactive = 0;
fb_init(hpdata->active_pages, HUGEPAGE_PAGES); fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@ -140,17 +142,125 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
} }
void void
hpdata_hugify(hpdata_t *hpdata) { hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(!hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify);
hpdata->h_mid_purge = true;
purge_state->npurged = 0;
purge_state->next_purge_search_begin = 0;
/*
* Initialize to_purge with everything that's not active but that is
* dirty.
*
* As an optimization, we could note that in practice we never allocate
* out of a hugepage while purging within it, and so could try to
* combine dirty extents separated by a non-dirty but non-active extent
* to avoid purge calls. This does nontrivially complicate metadata
* tracking though, so let's hold off for now.
*/
fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
fb_bit_and(purge_state->to_purge, purge_state->to_purge,
hpdata->dirty_pages, HUGEPAGE_PAGES);
/* We purge everything we can. */
assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount(
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
hpdata_assert_consistent(hpdata);
}
bool
hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
void **r_purge_addr, size_t *r_purge_size) {
/*
* Note that we don't have a consistency check here; we're accessing
* hpdata without synchronization, and therefore have no right to expect
* a consistent state.
*/
assert(hpdata->h_mid_purge);
/* Should have dehugified already (if necessary). */
assert(!hpdata->h_huge);
assert(!hpdata->h_mid_hugify);
if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
return false;
}
size_t purge_begin;
size_t purge_len;
bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
purge_state->next_purge_search_begin, &purge_begin, &purge_len);
if (!found_range) {
return false;
}
*r_purge_addr = (void *)(
(uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
*r_purge_size = purge_len * PAGE;
purge_state->next_purge_search_begin = purge_begin + purge_len;
purge_state->npurged += purge_len;
assert(purge_state->npurged <= HUGEPAGE_PAGES);
return true;
}
void
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
hpdata_assert_consistent(hpdata);
assert(hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify);
hpdata->h_mid_purge = false;
assert(purge_state->npurged == fb_scount(purge_state->to_purge,
HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
fb_bit_not(purge_state->to_purge, purge_state->to_purge,
HUGEPAGE_PAGES);
fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages,
purge_state->to_purge, HUGEPAGE_PAGES);
assert(hpdata->h_ndirty >= purge_state->npurged);
hpdata->h_ndirty -= purge_state->npurged;
hpdata_assert_consistent(hpdata);
}
void
hpdata_hugify_begin(hpdata_t *hpdata) {
hpdata_assert_consistent(hpdata);
assert(!hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify);
hpdata->h_mid_hugify = true;
hpdata->h_huge = true; hpdata->h_huge = true;
fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES); fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
hpdata->h_ndirty = HUGEPAGE_PAGES; hpdata->h_ndirty = HUGEPAGE_PAGES;
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
} }
void
hpdata_hugify_end(hpdata_t *hpdata) {
hpdata_assert_consistent(hpdata);
assert(!hpdata->h_mid_purge);
assert(hpdata->h_mid_hugify);
hpdata->h_mid_hugify = false;
hpdata_assert_consistent(hpdata);
}
void void
hpdata_dehugify(hpdata_t *hpdata) { hpdata_dehugify(hpdata_t *hpdata) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
/*
* These asserts are morally right; for now, though, we have the "purge a
* hugepage only in its entirety, when it becomes empty", path sharing
* hpdata_dehugify with the new purge pathway coming in the next
* commit.
*/
/*
assert(hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify);
*/
hpdata->h_huge = false; hpdata->h_huge = false;
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
} }

View File

@ -55,7 +55,132 @@ TEST_BEGIN(test_reserve_alloc) {
} }
TEST_END TEST_END
TEST_BEGIN(test_purge_simple) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
expect_ptr_eq(alloc, HPDATA_ADDR, "");
/* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */
hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, "");
expect_false(hpdata_changing_state_get(&hpdata), "");
hpdata_purge_state_t purge_state;
hpdata_purge_begin(&hpdata, &purge_state);
expect_true(hpdata_changing_state_get(&hpdata), "");
void *purge_addr;
size_t purge_size;
bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
&purge_size);
expect_true(got_result, "");
expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
expect_true(hpdata_changing_state_get(&hpdata), "");
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
&purge_size);
expect_false(got_result, "Unexpected additional purge range: "
"extent at %p of size %zu", purge_addr, purge_size);
expect_true(hpdata_changing_state_get(&hpdata), "");
hpdata_purge_end(&hpdata, &purge_state);
expect_false(hpdata_changing_state_get(&hpdata), "");
expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
}
TEST_END
/*
* We only test intervening dalloc's not intervening allocs; we don't need
* intervening allocs, and foreseeable optimizations will make them not just
* unnecessary but incorrect. In particular, if there are two dirty extents
* separated only by a retained extent, we can just purge the entire range,
* saving a purge call.
*/
TEST_BEGIN(test_purge_intervening_dalloc) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
/* Allocate the first 3/4 of the pages. */
void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
expect_ptr_eq(alloc, HPDATA_ADDR, "");
/* Free the first 1/4 and the third 1/4 of the pages. */
hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
hpdata_unreserve(&hpdata,
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
HUGEPAGE_PAGES / 4 * PAGE);
expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
hpdata_purge_state_t purge_state;
hpdata_purge_begin(&hpdata, &purge_state);
void *purge_addr;
size_t purge_size;
/* First purge. */
bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
&purge_size);
expect_true(got_result, "");
expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
/* Deallocate the second 1/4 before the second purge occurs. */
hpdata_unreserve(&hpdata,
(void *)((uintptr_t)alloc + 1 * HUGEPAGE_PAGES / 4 * PAGE),
HUGEPAGE_PAGES / 4 * PAGE);
/* Now continue purging. */
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
&purge_size);
expect_true(got_result, "");
expect_ptr_eq(
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
purge_addr, "");
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
&purge_size);
expect_false(got_result, "Unexpected additional purge range: "
"extent at %p of size %zu", purge_addr, purge_size);
hpdata_purge_end(&hpdata, &purge_state);
expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
}
TEST_END
TEST_BEGIN(test_hugify) {
hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
expect_ptr_eq(alloc, HPDATA_ADDR, "");
expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), "");
expect_false(hpdata_changing_state_get(&hpdata), "");
hpdata_hugify_begin(&hpdata);
expect_true(hpdata_changing_state_get(&hpdata), "");
hpdata_hugify_end(&hpdata);
expect_false(hpdata_changing_state_get(&hpdata), "");
/* Hugeifying should have increased the dirty page count. */
expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), "");
}
TEST_END
int main(void) { int main(void) {
return test_no_reentrancy( return test_no_reentrancy(
test_reserve_alloc); test_reserve_alloc,
test_purge_simple,
test_purge_intervening_dalloc,
test_hugify);
} }