HPA: Purge across retained extents.
This lets us cut down on the number of expensive system calls we perform.
This commit is contained in:
parent
347523517b
commit
41fd56605e
@ -110,7 +110,7 @@ struct hpdata_s {
|
|||||||
*/
|
*/
|
||||||
size_t h_ntouched;
|
size_t h_ntouched;
|
||||||
|
|
||||||
/* The dirty pages (using the same definition as above). */
|
/* The touched pages (using the same definition as above). */
|
||||||
fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -356,6 +356,7 @@ void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
|
|||||||
typedef struct hpdata_purge_state_s hpdata_purge_state_t;
|
typedef struct hpdata_purge_state_s hpdata_purge_state_t;
|
||||||
struct hpdata_purge_state_s {
|
struct hpdata_purge_state_s {
|
||||||
size_t npurged;
|
size_t npurged;
|
||||||
|
size_t ndirty_to_purge;
|
||||||
fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
|
fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||||
size_t next_purge_search_begin;
|
size_t next_purge_search_begin;
|
||||||
};
|
};
|
||||||
@ -372,7 +373,7 @@ struct hpdata_purge_state_s {
|
|||||||
* until you're done, and then end. Allocating out of an hpdata undergoing
|
* until you're done, and then end. Allocating out of an hpdata undergoing
|
||||||
* purging is not allowed.
|
* purging is not allowed.
|
||||||
*
|
*
|
||||||
* Returns the number of pages that will be purged.
|
* Returns the number of dirty pages that will be purged.
|
||||||
*/
|
*/
|
||||||
size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
|
size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
|
||||||
|
|
||||||
|
97
src/hpdata.c
97
src/hpdata.c
@ -166,33 +166,93 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
|
|||||||
size_t
|
size_t
|
||||||
hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
||||||
hpdata_assert_consistent(hpdata);
|
hpdata_assert_consistent(hpdata);
|
||||||
/* See the comment in reserve. */
|
/*
|
||||||
|
* See the comment below; we might purge any inactive extent, so it's
|
||||||
|
* unsafe for any other thread to turn any inactive extent active while
|
||||||
|
* we're operating on it.
|
||||||
|
*/
|
||||||
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||||
|
|
||||||
purge_state->npurged = 0;
|
purge_state->npurged = 0;
|
||||||
purge_state->next_purge_search_begin = 0;
|
purge_state->next_purge_search_begin = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize to_purge with everything that's not active but that is
|
* Initialize to_purge.
|
||||||
* dirty.
|
|
||||||
*
|
*
|
||||||
* As an optimization, we could note that in practice we never allocate
|
* It's possible to end up in situations where two dirty extents are
|
||||||
* out of a hugepage while purging within it, and so could try to
|
* separated by a retained extent:
|
||||||
* combine dirty extents separated by a non-dirty but non-active extent
|
* - 1 page allocated.
|
||||||
* to avoid purge calls. This does nontrivially complicate metadata
|
* - 1 page allocated.
|
||||||
* tracking though, so let's hold off for now.
|
* - 1 pages allocated.
|
||||||
|
*
|
||||||
|
* If the middle page is freed and purged, and then the first and third
|
||||||
|
* pages are freed, and then another purge pass happens, the hpdata
|
||||||
|
* looks like this:
|
||||||
|
* - 1 page dirty.
|
||||||
|
* - 1 page retained.
|
||||||
|
* - 1 page dirty.
|
||||||
|
*
|
||||||
|
* But it's safe to do a single 3-page purge.
|
||||||
|
*
|
||||||
|
* We do this by first computing the dirty pages, and then filling in
|
||||||
|
* any gaps by extending each range in the dirty bitmap to extend until
|
||||||
|
* the next active page. This purges more pages, but the expensive part
|
||||||
|
* of purging is the TLB shootdowns, rather than the kernel state
|
||||||
|
* tracking; doing a little bit more of the latter is fine if it saves
|
||||||
|
* us from doing some of the former.
|
||||||
*/
|
*/
|
||||||
fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
|
|
||||||
fb_bit_and(purge_state->to_purge, purge_state->to_purge,
|
|
||||||
hpdata->touched_pages, HUGEPAGE_PAGES);
|
|
||||||
|
|
||||||
/* We purge everything we can. */
|
/*
|
||||||
size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
|
* The dirty pages are those that are touched but not active. Note that
|
||||||
assert(to_purge == fb_scount(
|
* in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
|
||||||
|
* fb_group_t is 64 bits, so this is 64 bytes, spread across 8
|
||||||
|
* fb_group_ts.
|
||||||
|
*/
|
||||||
|
fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
|
||||||
|
fb_init(dirty_pages, HUGEPAGE_PAGES);
|
||||||
|
fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
|
||||||
|
fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
|
||||||
|
HUGEPAGE_PAGES);
|
||||||
|
|
||||||
|
fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
|
||||||
|
size_t next_bit = 0;
|
||||||
|
while (next_bit < HUGEPAGE_PAGES) {
|
||||||
|
size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
|
||||||
|
next_bit);
|
||||||
|
/* Recall that fb_ffs returns nbits if no set bit is found. */
|
||||||
|
if (next_dirty == HUGEPAGE_PAGES) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
size_t next_active = fb_ffs(hpdata->active_pages,
|
||||||
|
HUGEPAGE_PAGES, next_dirty);
|
||||||
|
/*
|
||||||
|
* Don't purge past the end of the dirty extent, into retained
|
||||||
|
* pages. This helps the kernel a tiny bit, but honestly it's
|
||||||
|
* mostly helpful for testing (where we tend to write test cases
|
||||||
|
* that think in terms of the dirty ranges).
|
||||||
|
*/
|
||||||
|
ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
|
||||||
|
next_active - 1);
|
||||||
|
assert(last_dirty >= 0);
|
||||||
|
assert((size_t)last_dirty >= next_dirty);
|
||||||
|
assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
|
||||||
|
|
||||||
|
fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
|
||||||
|
last_dirty - next_dirty + 1);
|
||||||
|
next_bit = next_active + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should purge, at least, everything dirty. */
|
||||||
|
size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
|
||||||
|
purge_state->ndirty_to_purge = ndirty;
|
||||||
|
assert(ndirty <= fb_scount(
|
||||||
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
||||||
|
assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
|
||||||
|
HUGEPAGE_PAGES));
|
||||||
|
|
||||||
hpdata_assert_consistent(hpdata);
|
hpdata_assert_consistent(hpdata);
|
||||||
|
|
||||||
return to_purge;
|
return ndirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -203,6 +263,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
|
|||||||
* hpdata without synchronization, and therefore have no right to expect
|
* hpdata without synchronization, and therefore have no right to expect
|
||||||
* a consistent state.
|
* a consistent state.
|
||||||
*/
|
*/
|
||||||
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||||
|
|
||||||
if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
|
if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
|
||||||
return false;
|
return false;
|
||||||
@ -228,19 +289,21 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
|
|||||||
|
|
||||||
void
|
void
|
||||||
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
|
||||||
|
assert(!hpdata_alloc_allowed_get(hpdata));
|
||||||
hpdata_assert_consistent(hpdata);
|
hpdata_assert_consistent(hpdata);
|
||||||
/* See the comment in reserve. */
|
/* See the comment in reserve. */
|
||||||
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
assert(!hpdata->h_in_psset || hpdata->h_updating);
|
||||||
|
|
||||||
assert(purge_state->npurged == fb_scount(purge_state->to_purge,
|
assert(purge_state->npurged == fb_scount(purge_state->to_purge,
|
||||||
HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
|
||||||
|
assert(purge_state->npurged >= purge_state->ndirty_to_purge);
|
||||||
|
|
||||||
fb_bit_not(purge_state->to_purge, purge_state->to_purge,
|
fb_bit_not(purge_state->to_purge, purge_state->to_purge,
|
||||||
HUGEPAGE_PAGES);
|
HUGEPAGE_PAGES);
|
||||||
fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
|
fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
|
||||||
purge_state->to_purge, HUGEPAGE_PAGES);
|
purge_state->to_purge, HUGEPAGE_PAGES);
|
||||||
assert(hpdata->h_ntouched >= purge_state->npurged);
|
assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
|
||||||
hpdata->h_ntouched -= purge_state->npurged;
|
hpdata->h_ntouched -= purge_state->ndirty_to_purge;
|
||||||
|
|
||||||
hpdata_assert_consistent(hpdata);
|
hpdata_assert_consistent(hpdata);
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,7 @@ TEST_BEGIN(test_purge_simple) {
|
|||||||
|
|
||||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
|
expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
|
||||||
|
|
||||||
|
hpdata_alloc_allowed_set(&hpdata, false);
|
||||||
hpdata_purge_state_t purge_state;
|
hpdata_purge_state_t purge_state;
|
||||||
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
||||||
expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
|
expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
|
||||||
@ -90,11 +91,9 @@ TEST_BEGIN(test_purge_simple) {
|
|||||||
TEST_END
|
TEST_END
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We only test intervening dalloc's not intervening allocs; we don't need
|
* We only test intervening dalloc's not intervening allocs; the latter are
|
||||||
* intervening allocs, and foreseeable optimizations will make them not just
|
* disallowed as a purging precondition (because they interfere with purging
|
||||||
* unnecessary but incorrect. In particular, if there are two dirty extents
|
* across a retained extent, saving a purge call).
|
||||||
* separated only by a retained extent, we can just purge the entire range,
|
|
||||||
* saving a purge call.
|
|
||||||
*/
|
*/
|
||||||
TEST_BEGIN(test_purge_intervening_dalloc) {
|
TEST_BEGIN(test_purge_intervening_dalloc) {
|
||||||
hpdata_t hpdata;
|
hpdata_t hpdata;
|
||||||
@ -112,6 +111,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
|||||||
|
|
||||||
expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
|
expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
|
||||||
|
|
||||||
|
hpdata_alloc_allowed_set(&hpdata, false);
|
||||||
hpdata_purge_state_t purge_state;
|
hpdata_purge_state_t purge_state;
|
||||||
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
|
||||||
expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
|
expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
|
||||||
@ -137,7 +137,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
|||||||
expect_ptr_eq(
|
expect_ptr_eq(
|
||||||
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
|
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
|
||||||
purge_addr, "");
|
purge_addr, "");
|
||||||
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||||
|
|
||||||
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||||
&purge_size);
|
&purge_size);
|
||||||
@ -150,6 +150,74 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
|
|||||||
}
|
}
|
||||||
TEST_END
|
TEST_END
|
||||||
|
|
||||||
|
TEST_BEGIN(test_purge_over_retained) {
|
||||||
|
void *purge_addr;
|
||||||
|
size_t purge_size;
|
||||||
|
|
||||||
|
hpdata_t hpdata;
|
||||||
|
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
|
||||||
|
|
||||||
|
/* Allocate the first 3/4 of the pages. */
|
||||||
|
void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
|
||||||
|
expect_ptr_eq(alloc, HPDATA_ADDR, "");
|
||||||
|
|
||||||
|
/* Free the second quarter. */
|
||||||
|
void *second_quarter =
|
||||||
|
(void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
|
||||||
|
hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
|
||||||
|
|
||||||
|
expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
|
||||||
|
|
||||||
|
/* Purge the second quarter. */
|
||||||
|
hpdata_alloc_allowed_set(&hpdata, false);
|
||||||
|
hpdata_purge_state_t purge_state;
|
||||||
|
size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
|
||||||
|
expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
|
||||||
|
|
||||||
|
bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||||
|
&purge_size);
|
||||||
|
expect_true(got_result, "");
|
||||||
|
expect_ptr_eq(second_quarter, purge_addr, "");
|
||||||
|
expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||||
|
|
||||||
|
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||||
|
&purge_size);
|
||||||
|
expect_false(got_result, "Unexpected additional purge range: "
|
||||||
|
"extent at %p of size %zu", purge_addr, purge_size);
|
||||||
|
hpdata_purge_end(&hpdata, &purge_state);
|
||||||
|
|
||||||
|
expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
|
||||||
|
|
||||||
|
/* Free the first and third quarter. */
|
||||||
|
hpdata_unreserve(&hpdata, HPDATA_ADDR, HUGEPAGE_PAGES / 4 * PAGE);
|
||||||
|
hpdata_unreserve(&hpdata,
|
||||||
|
(void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
|
||||||
|
HUGEPAGE_PAGES / 4 * PAGE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Purge again. The second quarter is retained, so we can safely
|
||||||
|
* re-purge it. We expect a single purge of 3/4 of the hugepage,
|
||||||
|
* purging half its pages.
|
||||||
|
*/
|
||||||
|
to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
|
||||||
|
expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
|
||||||
|
|
||||||
|
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||||
|
&purge_size);
|
||||||
|
expect_true(got_result, "");
|
||||||
|
expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
|
||||||
|
expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
|
||||||
|
|
||||||
|
got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
|
||||||
|
&purge_size);
|
||||||
|
expect_false(got_result, "Unexpected additional purge range: "
|
||||||
|
"extent at %p of size %zu", purge_addr, purge_size);
|
||||||
|
hpdata_purge_end(&hpdata, &purge_state);
|
||||||
|
|
||||||
|
expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
|
||||||
|
}
|
||||||
|
TEST_END
|
||||||
|
|
||||||
TEST_BEGIN(test_hugify) {
|
TEST_BEGIN(test_hugify) {
|
||||||
hpdata_t hpdata;
|
hpdata_t hpdata;
|
||||||
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
|
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
|
||||||
@ -171,5 +239,6 @@ int main(void) {
|
|||||||
test_reserve_alloc,
|
test_reserve_alloc,
|
||||||
test_purge_simple,
|
test_purge_simple,
|
||||||
test_purge_intervening_dalloc,
|
test_purge_intervening_dalloc,
|
||||||
|
test_purge_over_retained,
|
||||||
test_hugify);
|
test_hugify);
|
||||||
}
|
}
|
||||||
|
@ -18,12 +18,14 @@ edata_init_test(edata_t *edata) {
|
|||||||
static void
|
static void
|
||||||
test_psset_fake_purge(hpdata_t *ps) {
|
test_psset_fake_purge(hpdata_t *ps) {
|
||||||
hpdata_purge_state_t purge_state;
|
hpdata_purge_state_t purge_state;
|
||||||
|
hpdata_alloc_allowed_set(ps, false);
|
||||||
hpdata_purge_begin(ps, &purge_state);
|
hpdata_purge_begin(ps, &purge_state);
|
||||||
void *addr;
|
void *addr;
|
||||||
size_t size;
|
size_t size;
|
||||||
while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
|
while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
|
||||||
}
|
}
|
||||||
hpdata_purge_end(ps, &purge_state);
|
hpdata_purge_end(ps, &purge_state);
|
||||||
|
hpdata_alloc_allowed_set(ps, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
Loading…
Reference in New Issue
Block a user