Move empty slab tracking to the psset.

We're moving towards a world in which purging decisions are less rigidly
enforced at a single-hugepage level.  In that world, it makes sense to keep
around some hpdatas which are not completely purged, in which case we'll need to
track them.
This commit is contained in:
David Goldblatt 2020-12-05 17:42:04 -08:00 committed by David Goldblatt
parent 99fc0717e6
commit bf64557ed6
10 changed files with 193 additions and 169 deletions

View File

@ -8,14 +8,6 @@
typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t; typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
struct hpa_shard_nonderived_stats_s { struct hpa_shard_nonderived_stats_s {
/*
* The number of times we've fully purged a hugepage and evicted it from
* the psset.
*
* Guarded by grow_mtx.
*/
uint64_t nevictions;
/* /*
* The number of times we've purged within a hugepage. * The number of times we've purged within a hugepage.
* *
@ -80,15 +72,6 @@ struct hpa_shard_s {
*/ */
size_t alloc_max; size_t alloc_max;
/*
* Slabs currently purged away. They are hugepage-sized and
* hugepage-aligned, but have had pages_nohuge and pages_purge_forced
* called on them.
*
* Guarded by grow_mtx.
*/
hpdata_list_t unused_slabs;
/* /*
* How many grow operations have occurred. * How many grow operations have occurred.
* *

View File

@ -52,14 +52,17 @@ struct hpdata_s {
*/ */
bool h_updating; bool h_updating;
/* Whether or not the hpdata is in a psset. */
bool h_in_psset;
union { union {
/* When nonempty, used by the psset bins. */ /* When nonempty (and also nonfull), used by the psset bins. */
phn(hpdata_t) ph_link; phn(hpdata_t) ph_link;
/* /*
* When empty (or not corresponding to any hugepage), list * When empty (or not corresponding to any hugepage), list
* linkage. * linkage.
*/ */
ql_elm(hpdata_t) ql_link; ql_elm(hpdata_t) ql_link_empty;
}; };
/* The length of the largest contiguous sequence of inactive pages. */ /* The length of the largest contiguous sequence of inactive pages. */
@ -82,7 +85,7 @@ struct hpdata_s {
fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)]; fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
}; };
TYPED_LIST(hpdata_list, hpdata_t, ql_link) TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
typedef ph(hpdata_t) hpdata_age_heap_t; typedef ph(hpdata_t) hpdata_age_heap_t;
ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t); ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
@ -138,6 +141,17 @@ hpdata_updating_set(hpdata_t *hpdata, bool updating) {
hpdata->h_updating = updating; hpdata->h_updating = updating;
} }
static inline bool
hpdata_in_psset_get(const hpdata_t *hpdata) {
return hpdata->h_in_psset;
}
static inline void
hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
assert(in_psset != hpdata->h_in_psset);
hpdata->h_in_psset = in_psset;
}
static inline size_t static inline size_t
hpdata_longest_free_range_get(const hpdata_t *hpdata) { hpdata_longest_free_range_get(const hpdata_t *hpdata) {
return hpdata->h_longest_free_range; return hpdata->h_longest_free_range;
@ -208,6 +222,11 @@ hpdata_empty(hpdata_t *hpdata) {
return hpdata->h_nactive == 0; return hpdata->h_nactive == 0;
} }
static inline bool
hpdata_full(hpdata_t *hpdata) {
return hpdata->h_nactive == HUGEPAGE_PAGES;
}
void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age); void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
/* /*

View File

@ -35,7 +35,6 @@ struct psset_bin_stats_s {
typedef struct psset_stats_s psset_stats_t; typedef struct psset_stats_s psset_stats_t;
struct psset_stats_s { struct psset_stats_s {
/* /*
* The second index is huge stats; nonfull_slabs[pszind][0] contains * The second index is huge stats; nonfull_slabs[pszind][0] contains
* stats for the non-huge slabs in bucket pszind, while * stats for the non-huge slabs in bucket pszind, while
@ -44,10 +43,13 @@ struct psset_stats_s {
psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2]; psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
/* /*
* Full slabs don't live in any edata heap. But we still track their * Full slabs don't live in any edata heap, but we still track their
* stats. * stats.
*/ */
psset_bin_stats_t full_slabs[2]; psset_bin_stats_t full_slabs[2];
/* Empty slabs are similar. */
psset_bin_stats_t empty_slabs[2];
}; };
typedef struct psset_s psset_t; typedef struct psset_s psset_t;
@ -59,6 +61,8 @@ struct psset_s {
hpdata_age_heap_t pageslabs[PSSET_NPSIZES]; hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)]; bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
psset_stats_t stats; psset_stats_t stats;
/* Slabs with no active allocations. */
hpdata_empty_list_t empty_slabs;
}; };
void psset_init(psset_t *psset); void psset_init(psset_t *psset);
@ -74,4 +78,7 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
/* Analogous to the eset_fit; pick a hpdata to serve the request. */ /* Analogous to the eset_fit; pick a hpdata to serve the request. */
hpdata_t *psset_pick_alloc(psset_t *psset, size_t size); hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
void psset_insert(psset_t *psset, hpdata_t *ps);
void psset_remove(psset_t *psset, hpdata_t *ps);
#endif /* JEMALLOC_INTERNAL_PSSET_H */ #endif /* JEMALLOC_INTERNAL_PSSET_H */

View File

@ -226,7 +226,6 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes) CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
CTL_PROTO(stats_arenas_i_extents_j_retained_bytes) CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
INDEX_PROTO(stats_arenas_i_extents_j) INDEX_PROTO(stats_arenas_i_extents_j)
CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes) CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
CTL_PROTO(stats_arenas_i_hpa_shard_npurges) CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies) CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
@ -700,7 +699,6 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
{NAME("nonfull_slabs"), CHILD(indexed, {NAME("nonfull_slabs"), CHILD(indexed,
stats_arenas_i_hpa_shard_nonfull_slabs)}, stats_arenas_i_hpa_shard_nonfull_slabs)},
{NAME("nevictions"), CTL(stats_arenas_i_hpa_shard_nevictions)},
{NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)}, {NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)},
{NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)}, {NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)},
{NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)}, {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
@ -3514,8 +3512,6 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
return super_stats_arenas_i_extents_j_node; return super_stats_arenas_i_extents_j_node;
} }
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t); arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,

View File

@ -68,14 +68,12 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
edata_cache_small_init(&shard->ecs, edata_cache); edata_cache_small_init(&shard->ecs, edata_cache);
psset_init(&shard->psset); psset_init(&shard->psset);
shard->alloc_max = alloc_max; shard->alloc_max = alloc_max;
hpdata_list_init(&shard->unused_slabs);
shard->age_counter = 0; shard->age_counter = 0;
shard->eden = NULL; shard->eden = NULL;
shard->eden_len = 0; shard->eden_len = 0;
shard->ind = ind; shard->ind = ind;
shard->emap = emap; shard->emap = emap;
shard->stats.nevictions = 0;
shard->stats.npurge_passes = 0; shard->stats.npurge_passes = 0;
shard->stats.npurges = 0; shard->stats.npurges = 0;
shard->stats.nhugifies = 0; shard->stats.nhugifies = 0;
@ -103,7 +101,6 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
static void static void
hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst, hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
hpa_shard_nonderived_stats_t *src) { hpa_shard_nonderived_stats_t *src) {
dst->nevictions += src->nevictions;
dst->npurge_passes += src->npurge_passes; dst->npurge_passes += src->npurge_passes;
dst->npurges += src->npurges; dst->npurges += src->npurges;
dst->nhugifies += src->nhugifies; dst->nhugifies += src->nhugifies;
@ -171,15 +168,6 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->grow_mtx); malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
hpdata_t *ps = NULL; hpdata_t *ps = NULL;
/* Is there address space waiting for reuse? */
malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
ps = hpdata_list_first(&shard->unused_slabs);
if (ps != NULL) {
hpdata_list_remove(&shard->unused_slabs, ps);
hpdata_age_set(ps, shard->age_counter++);
return ps;
}
/* Is eden a perfect fit? */ /* Is eden a perfect fit? */
if (shard->eden != NULL && shard->eden_len == HUGEPAGE) { if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
ps = hpa_alloc_ps(tsdn, shard); ps = hpa_alloc_ps(tsdn, shard);
@ -300,26 +288,6 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
} }
} }
/*
* Does the metadata tracking associated with a page slab becoming empty. The
* psset doesn't hold empty pageslabs, but we do want address space reuse, so we
* track these pages outside the psset.
*/
static void
hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
/*
* We do relatively expensive system calls. The ps was evicted, so no
* one should touch it while we're also touching it.
*/
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
malloc_mutex_lock(tsdn, &shard->grow_mtx);
shard->stats.nevictions++;
hpdata_list_prepend(&shard->unused_slabs, ps);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
}
static edata_t * static edata_t *
hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) { hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
bool err; bool err;
@ -341,6 +309,18 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
} }
psset_update_begin(&shard->psset, ps); psset_update_begin(&shard->psset, ps);
if (hpdata_empty(ps)) {
/*
* If the pageslab used to be empty, treat it as though it's
* brand new for fragmentation-avoidance purposes; what we're
* trying to approximate is the age of the allocations *in* that
* pageslab, and the allocations in the new pageslab are
* definitionally the youngest in this hpa shard.
*/
hpdata_age_set(ps, shard->age_counter++);
}
void *addr = hpdata_reserve_alloc(ps, size); void *addr = hpdata_reserve_alloc(ps, size);
edata_init(edata, shard->ind, addr, size, /* slab */ false, edata_init(edata, shard->ind, addr, size, /* slab */ false,
SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false, SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@ -453,26 +433,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
return NULL; return NULL;
} }
/* We got the new edata; allocate from it. */ /* We got the pageslab; allocate from it. */
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
/*
* This will go away soon. The psset doesn't draw a distinction between psset_insert(&shard->psset, ps);
* pageslab removal and updating. If this is a new pageslab, we pretend
* that it's an old one that's been getting updated.
*/
if (!hpdata_updating_get(ps)) {
hpdata_updating_set(ps, true);
}
edata = edata_cache_small_get(tsdn, &shard->ecs); edata = edata_cache_small_get(tsdn, &shard->ecs);
if (edata == NULL) { if (edata == NULL) {
shard->stats.nevictions++;
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx);
hpa_handle_ps_eviction(tsdn, shard, ps);
return NULL; return NULL;
} }
psset_update_begin(&shard->psset, ps);
void *addr = hpdata_reserve_alloc(ps, size); void *addr = hpdata_reserve_alloc(ps, size);
edata_init(edata, shard->ind, addr, size, /* slab */ false, edata_init(edata, shard->ind, addr, size, /* slab */ false,
SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false, SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@ -487,10 +461,6 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
edata_cache_small_put(tsdn, &shard->ecs, edata); edata_cache_small_put(tsdn, &shard->ecs, edata);
shard->stats.nevictions++;
malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
/* We'll do a fake purge; the pages weren't really touched. */ /* We'll do a fake purge; the pages weren't really touched. */
hpdata_purge_state_t purge_state; hpdata_purge_state_t purge_state;
void *purge_addr; void *purge_addr;
@ -506,7 +476,9 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
assert(!found_extent); assert(!found_extent);
hpdata_purge_end(ps, &purge_state); hpdata_purge_end(ps, &purge_state);
hpa_handle_ps_eviction(tsdn, shard, ps); psset_update_end(&shard->psset, ps);
malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
return NULL; return NULL;
} }
psset_update_end(&shard->psset, ps); psset_update_end(&shard->psset, ps);
@ -614,9 +586,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
* management. * management.
* - The ps must not be in the psset while purging. This is because we * - The ps must not be in the psset while purging. This is because we
* can't handle purge/alloc races. * can't handle purge/alloc races.
* - Whoever removes the ps from the psset is the one to reinsert it (or * - Whoever removes the ps from the psset is the one to reinsert it.
* to pass it to hpa_handle_ps_eviction upon emptying). This keeps
* responsibility tracking simple.
*/ */
if (hpdata_mid_purge_get(ps)) { if (hpdata_mid_purge_get(ps)) {
/* /*
@ -649,17 +619,9 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
/* It's our job to purge. */ /* It's our job to purge. */
hpa_purge(tsdn, shard, ps); hpa_purge(tsdn, shard, ps);
/*
* OK, the hpdata is as purged as we want it to be, and it's going back
* into the psset (if nonempty) or getting evicted (if empty).
*/
if (hpdata_empty(ps)) {
malloc_mutex_unlock(tsdn, &shard->mtx);
hpa_handle_ps_eviction(tsdn, shard, ps);
} else {
psset_update_end(&shard->psset, ps); psset_update_end(&shard->psset, ps);
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
}
} }
void void
@ -678,8 +640,6 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
static void static void
hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) { hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
malloc_mutex_assert_owner(tsdn, &shard->mtx); malloc_mutex_assert_owner(tsdn, &shard->mtx);
hpdata_t *ps = psset_pick_alloc(psset, PAGE);
assert(ps == NULL);
for (int huge = 0; huge <= 1; huge++) { for (int huge = 0; huge <= 1; huge++) {
hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]); hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
@ -703,8 +663,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
} }
hpdata_t *ps; hpdata_t *ps;
while ((ps = hpdata_list_first(&shard->unused_slabs)) != NULL) { while ((ps = psset_pick_alloc(&shard->psset, PAGE)) != NULL) {
hpdata_list_remove(&shard->unused_slabs, ps); /* There should be no allocations anywhere. */
assert(hpdata_empty(ps));
psset_remove(&shard->psset, ps);
pages_unmap(hpdata_addr_get(ps), HUGEPAGE); pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
} }
} }

View File

@ -25,6 +25,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata->h_mid_purge = false; hpdata->h_mid_purge = false;
hpdata->h_mid_hugify = false; hpdata->h_mid_hugify = false;
hpdata->h_updating = false; hpdata->h_updating = false;
hpdata->h_in_psset = false;
hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES); hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
hpdata->h_nactive = 0; hpdata->h_nactive = 0;
fb_init(hpdata->active_pages, HUGEPAGE_PAGES); fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@ -37,7 +38,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
void * void *
hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) { hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(hpdata->h_updating); /*
* This is a metadata change; the hpdata should therefore either not be
* in the psset, or should have explicitly marked itself as being
* mid-update.
*/
assert(!hpdata->h_in_psset || hpdata->h_updating);
assert((sz & PAGE_MASK) == 0); assert((sz & PAGE_MASK) == 0);
size_t npages = sz >> LG_PAGE; size_t npages = sz >> LG_PAGE;
assert(npages <= hpdata_longest_free_range_get(hpdata)); assert(npages <= hpdata_longest_free_range_get(hpdata));
@ -118,7 +124,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
void void
hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) { hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(hpdata->h_updating); /* See the comment in reserve. */
assert(!hpdata->h_in_psset || hpdata->h_updating);
assert(((uintptr_t)addr & PAGE_MASK) == 0); assert(((uintptr_t)addr & PAGE_MASK) == 0);
assert((sz & PAGE_MASK) == 0); assert((sz & PAGE_MASK) == 0);
size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata)) size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@ -147,7 +154,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
void void
hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(hpdata->h_updating); /* See the comment in reserve. */
assert(!hpdata->h_in_psset || hpdata->h_updating);
assert(!hpdata->h_mid_purge); assert(!hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify); assert(!hpdata->h_mid_hugify);
hpdata->h_mid_purge = true; hpdata->h_mid_purge = true;
@ -185,7 +193,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
* a consistent state. * a consistent state.
*/ */
assert(hpdata->h_mid_purge); assert(hpdata->h_mid_purge);
assert(hpdata->h_updating); /* See the comment in reserve. */
assert(!hpdata->h_in_psset || hpdata->h_updating);
/* Should have dehugified already (if necessary). */ /* Should have dehugified already (if necessary). */
assert(!hpdata->h_huge); assert(!hpdata->h_huge);
assert(!hpdata->h_mid_hugify); assert(!hpdata->h_mid_hugify);
@ -215,7 +224,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
void void
hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(hpdata->h_updating); /* See the comment in reserve. */
assert(!hpdata->h_in_psset || hpdata->h_updating);
assert(hpdata->h_mid_purge); assert(hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify); assert(!hpdata->h_mid_hugify);
hpdata->h_mid_purge = false; hpdata->h_mid_purge = false;
@ -236,7 +246,8 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
void void
hpdata_hugify_begin(hpdata_t *hpdata) { hpdata_hugify_begin(hpdata_t *hpdata) {
hpdata_assert_consistent(hpdata); hpdata_assert_consistent(hpdata);
assert(hpdata->h_updating); /* See the comment in reserve. */
assert(!hpdata->h_in_psset || hpdata->h_updating);
assert(!hpdata->h_mid_purge); assert(!hpdata->h_mid_purge);
assert(!hpdata->h_mid_hugify); assert(!hpdata->h_mid_hugify);
hpdata->h_mid_hugify = true; hpdata->h_mid_hugify = true;
@ -253,7 +264,7 @@ hpdata_hugify_end(hpdata_t *hpdata) {
* This is the exception to the "no-metadata updates without informing * This is the exception to the "no-metadata updates without informing
* the psset first" rule; this assert would be incorrect. * the psset first" rule; this assert would be incorrect.
*/ */
/* assert(hpdata->h_updating); */ /* assert(!hpdata->h_in_psset || hpdata->h_updating); */
assert(!hpdata->h_mid_purge); assert(!hpdata->h_mid_purge);
assert(hpdata->h_mid_hugify); assert(hpdata->h_mid_hugify);
hpdata->h_mid_hugify = false; hpdata->h_mid_hugify = false;

View File

@ -15,6 +15,7 @@ psset_init(psset_t *psset) {
} }
bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true); bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
memset(&psset->stats, 0, sizeof(psset->stats)); memset(&psset->stats, 0, sizeof(psset->stats));
hpdata_empty_list_init(&psset->empty_slabs);
} }
static void static void
@ -28,6 +29,8 @@ void
psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) { psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]); psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]); psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
psset_bin_stats_accum(&dst->nonfull_slabs[i][0], psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
&src->nonfull_slabs[i][0]); &src->nonfull_slabs[i][0]);
@ -69,71 +72,104 @@ psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
static void static void
psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) { psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
hpdata_age_heap_remove(&psset->pageslabs[pind], ps); hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
}
static void
psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
}
void
psset_update_begin(psset_t *psset, hpdata_t *ps) {
hpdata_assert_consistent(ps);
assert(!hpdata_updating_get(ps));
hpdata_updating_set(ps, true);
size_t longest_free_range = hpdata_longest_free_range_get(ps);
if (longest_free_range == 0) {
psset_bin_stats_remove(psset->stats.full_slabs, ps);
return;
}
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
longest_free_range << LG_PAGE));
assert(pind < PSSET_NPSIZES);
psset_hpdata_heap_remove(psset, pind, ps);
if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind); bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
} }
} }
void static void
psset_update_end(psset_t *psset, hpdata_t *ps) { psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
assert(!hpdata_empty(ps));
hpdata_assert_consistent(ps);
assert(hpdata_updating_get(ps));
hpdata_updating_set(ps, false);
size_t longest_free_range = hpdata_longest_free_range_get(ps);
if (longest_free_range == 0) {
/*
* We don't ned to track full slabs; just pretend to for stats
* purposes. See the comment at psset_bin_stats_adjust.
*/
psset_bin_stats_insert(psset->stats.full_slabs, ps);
return;
}
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
longest_free_range << LG_PAGE));
assert(pind < PSSET_NPSIZES);
if (hpdata_age_heap_empty(&psset->pageslabs[pind])) { if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind); bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
} }
hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
}
/*
* Insert ps into the data structures we use to track allocation stats and pick
* the pageslabs for new allocations.
*
* In particular, this does *not* remove ps from any hugification / purging
* queues it may be in.
*/
static void
psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
if (hpdata_empty(ps)) {
psset_bin_stats_insert(psset->stats.empty_slabs, ps);
/*
* This prepend, paired with popping the head in psset_fit,
* means we implement LIFO ordering for the empty slabs set,
* which seems reasonable.
*/
hpdata_empty_list_prepend(&psset->empty_slabs, ps);
} else if (hpdata_full(ps)) {
psset_bin_stats_insert(psset->stats.full_slabs, ps);
/*
* We don't need to keep track of the full slabs; we're never
* going to return them from a psset_pick_alloc call.
*/
} else {
size_t longest_free_range = hpdata_longest_free_range_get(ps);
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
longest_free_range << LG_PAGE));
assert(pind < PSSET_NPSIZES);
psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
psset_hpdata_heap_insert(psset, pind, ps); psset_hpdata_heap_insert(psset, pind, ps);
}
}
/* Remove ps from those collections. */
static void
psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
if (hpdata_empty(ps)) {
psset_bin_stats_remove(psset->stats.empty_slabs, ps);
hpdata_empty_list_remove(&psset->empty_slabs, ps);
} else if (hpdata_full(ps)) {
/*
* We don't need to maintain an explicit container of full
* pageslabs anywhere, but we do have to update stats.
*/
psset_bin_stats_remove(psset->stats.full_slabs, ps);
} else {
size_t longest_free_range = hpdata_longest_free_range_get(ps);
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
longest_free_range << LG_PAGE));
assert(pind < PSSET_NPSIZES);
psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
psset_hpdata_heap_remove(psset, pind, ps);
}
}
void
psset_update_begin(psset_t *psset, hpdata_t *ps) {
hpdata_assert_consistent(ps);
assert(hpdata_in_psset_get(ps));
hpdata_updating_set(ps, true);
psset_do_alloc_tracking_remove(psset, ps);
}
void
psset_update_end(psset_t *psset, hpdata_t *ps) {
hpdata_assert_consistent(ps);
assert(hpdata_in_psset_get(ps));
hpdata_updating_set(ps, false);
psset_do_alloc_tracking_insert(psset, ps);
} }
hpdata_t * hpdata_t *
psset_pick_alloc(psset_t *psset, size_t size) { psset_pick_alloc(psset_t *psset, size_t size) {
assert((size & PAGE_MASK) == 0);
assert(size <= HUGEPAGE);
pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size)); pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info, pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
(size_t)min_pind); (size_t)min_pind);
if (pind == PSSET_NPSIZES) { if (pind == PSSET_NPSIZES) {
return NULL; return hpdata_empty_list_first(&psset->empty_slabs);
} }
hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]); hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
if (ps == NULL) { if (ps == NULL) {
@ -144,3 +180,17 @@ psset_pick_alloc(psset_t *psset, size_t size) {
return ps; return ps;
} }
void
psset_insert(psset_t *psset, hpdata_t *ps) {
/* We only support inserting empty pageslabs, for now. */
assert(hpdata_empty(ps));
hpdata_in_psset_set(ps, true);
psset_do_alloc_tracking_insert(psset, ps);
}
void
psset_remove(psset_t *psset, hpdata_t *ps) {
hpdata_in_psset_set(ps, false);
psset_do_alloc_tracking_remove(psset, ps);
}

View File

@ -790,14 +790,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
emitter_row_t row; emitter_row_t row;
emitter_row_init(&row); emitter_row_init(&row);
uint64_t nevictions;
uint64_t npurge_passes; uint64_t npurge_passes;
uint64_t npurges; uint64_t npurges;
uint64_t nhugifies; uint64_t nhugifies;
uint64_t ndehugifies; uint64_t ndehugifies;
CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
i, &nevictions, uint64_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes", CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
i, &npurge_passes, uint64_t); i, &npurge_passes, uint64_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.npurges", CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
@ -839,7 +836,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
emitter_table_printf(emitter, emitter_table_printf(emitter,
"HPA shard stats:\n" "HPA shard stats:\n"
" Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
" Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n" " Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
" Purges: %" FMTu64 " (%" FMTu64 " / sec)\n" " Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
" Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n" " Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
@ -850,7 +846,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
" nactive: %zu huge, %zu nonhuge \n" " nactive: %zu huge, %zu nonhuge \n"
" ndirty: %zu huge, %zu nonhuge \n" " ndirty: %zu huge, %zu nonhuge \n"
" nretained: 0 huge, %zu nonhuge \n", " nretained: 0 huge, %zu nonhuge \n",
nevictions, rate_per_second(nevictions, uptime),
npurge_passes, rate_per_second(npurge_passes, uptime), npurge_passes, rate_per_second(npurge_passes, uptime),
npurges, rate_per_second(npurges, uptime), npurges, rate_per_second(npurges, uptime),
nhugifies, rate_per_second(nhugifies, uptime), nhugifies, rate_per_second(nhugifies, uptime),
@ -861,8 +856,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
nretained_nonhuge); nretained_nonhuge);
emitter_json_object_kv_begin(emitter, "hpa_shard"); emitter_json_object_kv_begin(emitter, "hpa_shard");
emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
&nevictions);
emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64, emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
&npurge_passes); &npurge_passes);
emitter_json_kv(emitter, "npurges", emitter_type_uint64, emitter_json_kv(emitter, "npurges", emitter_type_uint64,

View File

@ -7,8 +7,6 @@ TEST_BEGIN(test_reserve_alloc) {
hpdata_t hpdata; hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_updating_set(&hpdata, true);
/* Allocating a page at a time, we should do first fit. */ /* Allocating a page at a time, we should do first fit. */
for (size_t i = 0; i < HUGEPAGE_PAGES; i++) { for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
expect_true(hpdata_consistent(&hpdata), ""); expect_true(hpdata_consistent(&hpdata), "");
@ -61,8 +59,6 @@ TEST_BEGIN(test_purge_simple) {
hpdata_t hpdata; hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_updating_set(&hpdata, true);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE); void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
expect_ptr_eq(alloc, HPDATA_ADDR, ""); expect_ptr_eq(alloc, HPDATA_ADDR, "");
@ -111,7 +107,6 @@ TEST_END
TEST_BEGIN(test_purge_intervening_dalloc) { TEST_BEGIN(test_purge_intervening_dalloc) {
hpdata_t hpdata; hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_updating_set(&hpdata, true);
/* Allocate the first 3/4 of the pages. */ /* Allocate the first 3/4 of the pages. */
void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE); void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
@ -165,7 +160,6 @@ TEST_END
TEST_BEGIN(test_hugify) { TEST_BEGIN(test_hugify) {
hpdata_t hpdata; hpdata_t hpdata;
hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE); hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
hpdata_updating_set(&hpdata, true);
void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2); void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
expect_ptr_eq(alloc, HPDATA_ADDR, ""); expect_ptr_eq(alloc, HPDATA_ADDR, "");

View File

@ -15,18 +15,26 @@ edata_init_test(edata_t *edata) {
edata_esn_set(edata, ALLOC_ESN); edata_esn_set(edata, ALLOC_ESN);
} }
static void
test_psset_fake_purge(hpdata_t *ps) {
hpdata_purge_state_t purge_state;
hpdata_purge_begin(ps, &purge_state);
void *addr;
size_t size;
while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
}
hpdata_purge_end(ps, &purge_state);
}
static void static void
test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
size_t size) { size_t size) {
hpdata_assert_empty(ps); hpdata_assert_empty(ps);
/* test_psset_fake_purge(ps);
* As in hpa.c; pretend that the ps is already in the psset and just
* being updated, until we implement true insert/removal support. psset_insert(psset, ps);
*/ psset_update_begin(psset, ps);
if (!hpdata_updating_get(ps)) {
hpdata_updating_set(ps, true);
}
void *addr = hpdata_reserve_alloc(ps, size); void *addr = hpdata_reserve_alloc(ps, size);
edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size, edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
@ -59,10 +67,11 @@ test_psset_dalloc(psset_t *psset, edata_t *edata) {
hpdata_t *ps = edata_ps_get(edata); hpdata_t *ps = edata_ps_get(edata);
psset_update_begin(psset, ps); psset_update_begin(psset, ps);
hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata)); hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
psset_update_end(psset, ps);
if (hpdata_empty(ps)) { if (hpdata_empty(ps)) {
psset_remove(psset, ps);
return ps; return ps;
} else { } else {
psset_update_end(psset, ps);
return NULL; return NULL;
} }
} }