HPA: Purge across retained extents.
This lets us cut down on the number of expensive system calls we perform.
This commit is contained in:
parent
347523517b
commit
41fd56605e
@ -110,7 +110,7 @@ struct hpdata_s {
|
||||
*/
|
||||
size_t h_ntouched;
|
||||
|
||||
/* The dirty pages (using the same definition as above). */
|
||||
/* The touched pages (using the same definition as above). */
|
||||
fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||
};
|
||||
|
||||
@ -356,6 +356,7 @@ void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
|
||||
typedef struct hpdata_purge_state_s hpdata_purge_state_t;
|
||||
struct hpdata_purge_state_s {
|
||||
size_t npurged;
|
||||
size_t ndirty_to_purge;
|
||||
fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||
size_t next_purge_search_begin;
|
||||
};
|
||||
@ -372,7 +373,7 @@ struct hpdata_purge_state_s {
|
||||
* until you're done, and then end. Allocating out of an hpdata undergoing
|
||||
* purging is not allowed.
|
||||
*
|
||||
* Returns the number of pages that will be purged.
|
||||
* Returns the number of dirty pages that will be purged.
|
||||
*/
|
||||
size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
|
||||
|
||||
|
97
src/hpdata.c
97
src/hpdata.c
@ -166,33 +166,93 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
|
||||
size_t
|
||||
hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
||||
hpdata_assert_consistent(hpdata);
|
||||
/* See the comment in reserve. */
|
||||
/*
|
||||
* See the comment below; we might purge any inactive extent, so it's
|
||||
* unsafe for any other thread to turn any inactive extent active while
|
||||
* we're operating on it.
|
||||
*/
|
||||
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||
|
||||
purge_state->npurged = 0;
|
||||
purge_state->next_purge_search_begin = 0;
|
||||
|
||||
/*
|
||||
* Initialize to_purge with everything that's not active but that is
|
||||
* dirty.
|
||||
* Initialize to_purge.
|
||||
*
|
||||
* As an optimization, we could note that in practice we never allocate
|
||||
* out of a hugepage while purging within it, and so could try to
|
||||
* combine dirty extents separated by a non-dirty but non-active extent
|
||||
* to avoid purge calls. This does nontrivially complicate metadata
|
||||
* tracking though, so let's hold off for now.
|
||||
* It's possible to end up in situations where two dirty extents are
|
||||
* separated by a retained extent:
|
||||
* - 1 page allocated.
|
||||
* - 1 page allocated.
|
||||
* - 1 pages allocated.
|
||||
*
|
||||
* If the middle page is freed and purged, and then the first and third
|
||||
* pages are freed, and then another purge pass happens, the hpdata
|
||||
* looks like this:
|
||||
* - 1 page dirty.
|
||||
* - 1 page retained.
|
||||
* - 1 page dirty.
|
||||
*
|
||||
* But it's safe to do a single 3-page purge.
|
||||
*
|
||||
* We do this by first computing the dirty pages, and then filling in
|
||||
* any gaps by extending each range in the dirty bitmap to extend until
|
||||
* the next active page. This purges more pages, but the expensive part
|
||||
* of purging is the TLB shootdowns, rather than the kernel state
|
||||
* tracking; doing a little bit more of the latter is fine if it saves
|
||||
* us from doing some of the former.
|
||||
*/
|
||||
fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
|
||||
fb_bit_and(purge_state->to_purge, purge_state->to_purge,
|
||||
hpdata->touched_pages, HUGEPAGE_PAGES);
|
||||
|
||||
/* We purge everything we can. */
|
||||
size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
|
||||
assert(to_purge == fb_scount(
|
||||
/*
|
||||
* The dirty pages are those that are touched but not active. Note that
|
||||
* in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
|
||||
* fb_group_t is 64 bits, so this is 64 bytes, spread across 8
|
||||
* fb_group_ts.
|
||||
*/
|
||||
fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||
fb_init(dirty_pages, HUGEPAGE_PAGES);
|
||||
fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
|
||||
fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
|
||||
HUGEPAGE_PAGES);
|
||||
|
||||
fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
|
||||
size_t next_bit = 0;
|
||||
while (next_bit < HUGEPAGE_PAGES) {
|
||||
size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
|
||||
next_bit);
|
||||
/* Recall that fb_ffs returns nbits if no set bit is found. */
|
||||
if (next_dirty == HUGEPAGE_PAGES) {
|
||||
break;
|
||||
}
|
||||
size_t next_active = fb_ffs(hpdata->active_pages,
|
||||
HUGEPAGE_PAGES, next_dirty);
|
||||
/*
|
||||
* Don't purge past the end of the dirty extent, into retained
|
||||
* pages. This helps the kernel a tiny bit, but honestly it's
|
||||
* mostly helpful for testing (where we tend to write test cases
|
||||
* that think in terms of the dirty ranges).
|
||||
*/
|
||||
ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
|
||||
next_active - 1);
|
||||
assert(last_dirty >= 0);
|
||||
assert((size_t)last_dirty >= next_dirty);
|
||||
assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
|
||||
|
||||
fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
|
||||
last_dirty - next_dirty + 1);
|
||||
next_bit = next_active + 1;
|
||||
}
|
||||
|
||||
/* We should purge, at least, everything dirty. */
|
||||
size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
|
||||
purge_state->ndirty_to_purge = ndirty;
|
||||
assert(ndirty <= fb_scount(
|
||||
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
||||
assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
|
||||
HUGEPAGE_PAGES));
|
||||
|
||||
hpdata_assert_consistent(hpdata);
|
||||
|
||||
return to_purge;
|
||||
return ndirty;
|
||||
}
|
||||
|
||||
bool
|
||||
@ -203,6 +263,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
|
||||
* hpdata without synchronization, and therefore have no right to expect
|
||||
* a consistent state.
|
||||
*/
|
||||
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||
|
||||
if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
|
||||
return false;
|
||||
@ -228,19 +289,21 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
|
||||
|
||||
void
|
||||
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
||||
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||
hpdata_assert_consistent(hpdata);
|
||||
/* See the comment in reserve. */
|
||||
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
||||
|
||||
assert(purge_state->npurged == fb_scount(purge_state->to_purge,
|
||||
HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
||||
assert(purge_state->npurged >= purge_state->ndirty_to_purge);
|
||||
|
||||
fb_bit_not(purge_state->to_purge, purge_state->to_purge,
|
||||
HUGEPAGE_PAGES);
|
||||
fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
|
||||
purge_state->to_purge, HUGEPAGE_PAGES);
|
||||
assert(hpdata->h_ntouched >= purge_state->npurged);
|
||||
hpdata->h_ntouched -= purge_state->npurged;
|
||||
assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
|
||||
hpdata->h_ntouched -= purge_state->ndirty_to_purge;
|
||||
|
||||
hpdata_assert_consistent(hpdata);
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ TEST_BEGIN(test_purge_simple) {
|
||||
|
||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
|
||||
|
||||
hpdata_alloc_allowed_set(&hpdata, false);
|
||||
hpdata_purge_state_t purge_state;
|
||||
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
|
||||
@ -90,11 +91,9 @@ TEST_BEGIN(test_purge_simple) {
|
||||
TEST_END
|
||||
|
||||
/*
|
||||
* We only test intervening dalloc's not intervening allocs; we don't need
|
||||
* intervening allocs, and foreseeable optimizations will make them not just
|
||||
* unnecessary but incorrect. In particular, if there are two dirty extents
|
||||
* separated only by a retained extent, we can just purge the entire range,
|
||||
* saving a purge call.
|
||||
* We only test intervening dalloc's not intervening allocs; the latter are
|
||||
* disallowed as a purging precondition (because they interfere with purging
|
||||
* across a retained extent, saving a purge call).
|
||||
*/
|
||||
TEST_BEGIN(test_purge_intervening_dalloc) {
|
||||
hpdata_t hpdata;
|
||||
@ -112,6 +111,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
||||
|
||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
|
||||
|
||||
hpdata_alloc_allowed_set(&hpdata, false);
|
||||
hpdata_purge_state_t purge_state;
|
||||
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
|
||||
@ -137,7 +137,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
||||
expect_ptr_eq(
|
||||
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
|
||||
purge_addr, "");
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||
expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||
|
||||
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||
&purge_size);
|
||||
@ -150,6 +150,74 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_purge_over_retained) {
|
||||
void *purge_addr;
|
||||
size_t purge_size;
|
||||
|
||||
hpdata_t hpdata;
|
||||
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
|
||||
|
||||
/* Allocate the first 3/4 of the pages. */
|
||||
void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
|
||||
expect_ptr_eq(alloc, HPDATA_ADDR, "");
|
||||
|
||||
/* Free the second quarter. */
|
||||
void *second_quarter =
|
||||
(void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
|
||||
hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
|
||||
|
||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
|
||||
|
||||
/* Purge the second quarter. */
|
||||
hpdata_alloc_allowed_set(&hpdata, false);
|
||||
hpdata_purge_state_t purge_state;
|
||||
size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
|
||||
|
||||
bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||
&purge_size);
|
||||
expect_true(got_result, "");
|
||||
expect_ptr_eq(second_quarter, purge_addr, "");
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||
|
||||
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||
&purge_size);
|
||||
expect_false(got_result, "Unexpected additional purge range: "
|
||||
"extent at %p of size %zu", purge_addr, purge_size);
|
||||
hpdata_purge_end(&hpdata, &purge_state);
|
||||
|
||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
|
||||
|
||||
/* Free the first and third quarter. */
|
||||
hpdata_unreserve(&hpdata, HPDATA_ADDR, HUGEPAGE_PAGES / 4 * PAGE);
|
||||
hpdata_unreserve(&hpdata,
|
||||
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
|
||||
HUGEPAGE_PAGES / 4 * PAGE);
|
||||
|
||||
/*
|
||||
* Purge again. The second quarter is retained, so we can safely
|
||||
* re-purge it. We expect a single purge of 3/4 of the hugepage,
|
||||
* purging half its pages.
|
||||
*/
|
||||
to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
|
||||
expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
|
||||
|
||||
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||
&purge_size);
|
||||
expect_true(got_result, "");
|
||||
expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
|
||||
expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||
|
||||
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||
&purge_size);
|
||||
expect_false(got_result, "Unexpected additional purge range: "
|
||||
"extent at %p of size %zu", purge_addr, purge_size);
|
||||
hpdata_purge_end(&hpdata, &purge_state);
|
||||
|
||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_hugify) {
|
||||
hpdata_t hpdata;
|
||||
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
|
||||
@ -171,5 +239,6 @@ int main(void) {
|
||||
test_reserve_alloc,
|
||||
test_purge_simple,
|
||||
test_purge_intervening_dalloc,
|
||||
test_purge_over_retained,
|
||||
test_hugify);
|
||||
}
|
||||
|
@ -18,12 +18,14 @@ edata_init_test(edata_t *edata) {
|
||||
static void
|
||||
test_psset_fake_purge(hpdata_t *ps) {
|
||||
hpdata_purge_state_t purge_state;
|
||||
hpdata_alloc_allowed_set(ps, false);
|
||||
hpdata_purge_begin(ps, &purge_state);
|
||||
void *addr;
|
||||
size_t size;
|
||||
while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
|
||||
}
|
||||
hpdata_purge_end(ps, &purge_state);
|
||||
hpdata_alloc_allowed_set(ps, true);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user