From 43af63fff496967bf2173c92737aea1cca4ca025 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Mon, 9 Nov 2020 13:49:30 -0800 Subject: [PATCH] HPA: Manage whole hugepages at a time. This redesigns the HPA implementation to allow us to manage hugepages all at once, locally, without relying on a global fallback. --- include/jemalloc/internal/arena_externs.h | 1 - include/jemalloc/internal/edata.h | 18 +- include/jemalloc/internal/hpa.h | 93 ++-- include/jemalloc/internal/mutex_prof.h | 4 +- include/jemalloc/internal/pa.h | 5 +- include/jemalloc/internal/psset.h | 11 +- src/arena.c | 6 +- src/ctl.c | 115 ++-- src/hpa.c | 613 +++++++++++++--------- src/jemalloc.c | 37 +- src/pa.c | 15 +- src/pa_extra.c | 2 +- src/psset.c | 120 +++-- src/stats.c | 113 ++-- test/unit/hpa.c | 76 +-- test/unit/psset.c | 21 +- 16 files changed, 700 insertions(+), 550 deletions(-) diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 40223b58..e3cfcee2 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -16,7 +16,6 @@ extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern malloc_mutex_t arenas_lock; extern emap_t arena_emap_global; -extern hpa_t arena_hpa_global; extern size_t opt_oversize_threshold; extern size_t oversize_threshold; diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h index 5ec12beb..465c962f 100644 --- a/include/jemalloc/internal/edata.h +++ b/include/jemalloc/internal/edata.h @@ -208,9 +208,9 @@ struct edata_s { */ /* - * If this edata is from an HPA, it may be part of some larger - * pageslab. Track it if so. Otherwise (either because it's - * not part of a pageslab, or not from the HPA at all), NULL. + * If this edata is a user allocation from an HPA, it comes out + * of some pageslab (we don't yet support huegpage allocations + * that don't fit into pageslabs). This tracks it. */ edata_t *ps; /* @@ -225,6 +225,8 @@ struct edata_s { * between heaps. */ uint32_t longest_free_range; + /* Whether or not the slab is backed by a hugepage. */ + bool hugeified; }; }; @@ -328,6 +330,11 @@ edata_pai_get(const edata_t *edata) { EDATA_BITS_PAI_SHIFT); } +static inline bool +edata_hugeified_get(const edata_t *edata) { + return edata->hugeified; +} + static inline bool edata_slab_get(const edata_t *edata) { return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >> @@ -559,6 +566,11 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) { ((uint64_t)pai << EDATA_BITS_PAI_SHIFT); } +static inline void +edata_hugeified_set(edata_t *edata, bool hugeified) { + edata->hugeified = hugeified; +} + static inline void edata_slab_set(edata_t *edata, bool slab) { edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) | diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h index 12a7a17d..1c4585df 100644 --- a/include/jemalloc/internal/hpa.h +++ b/include/jemalloc/internal/hpa.h @@ -6,32 +6,6 @@ #include "jemalloc/internal/pai.h" #include "jemalloc/internal/psset.h" -typedef struct hpa_s hpa_t; -struct hpa_s { - /* - * We have two mutexes for the central allocator; mtx protects its - * state, while grow_mtx protects controls the ability to grow the - * backing store. This prevents race conditions in which the central - * allocator has exhausted its memory while mutiple threads are trying - * to allocate. If they all reserved more address space from the OS - * without synchronization, we'd end consuming much more than necessary. - */ - malloc_mutex_t grow_mtx; - malloc_mutex_t mtx; - hpa_central_t central; - /* The arena ind we're associated with. */ - unsigned ind; - /* - * This edata cache is the global one that we use for new allocations in - * growing; practically, it comes from a0. - * - * We don't use an edata_cache_small in front of this, since we expect a - * small finite number of allocations from it. - */ - edata_cache_t *edata_cache; - exp_grow_t exp_grow; -}; - /* Used only by CTL; not actually stored here (i.e., all derived). */ typedef struct hpa_shard_stats_s hpa_shard_stats_t; struct hpa_shard_stats_s { @@ -53,44 +27,53 @@ struct hpa_shard_s { * allocator, and so will use its edata_cache. */ edata_cache_small_t ecs; - hpa_t *hpa; + psset_t psset; /* - * When we're grabbing a new ps from the central allocator, how big - * would we like it to be? This is mostly about the level of batching - * we use in our requests to the centralized allocator. + * The largest size we'll allocate out of the shard. For those + * allocations refused, the caller (in practice, the PA module) will + * fall back to the more general (for now) PAC, which can always handle + * any allocation request. */ - size_t ps_goal; + size_t alloc_max; + /* - * What's the maximum size we'll try to allocate out of the psset? We - * don't want this to be too large relative to ps_goal, as a - * fragmentation avoidance measure. + * Slabs currently purged away. They are hugepage-sized and + * hugepage-aligned, but have had pages_nohuge and pages_purge_forced + * called on them. + * + * Guarded by grow_mtx. */ - size_t ps_alloc_max; + edata_list_inactive_t unused_slabs; + /* - * What's the maximum size we'll try to allocate out of the shard at - * all? + * Either NULL (if empty), or some integer multiple of a + * hugepage-aligned number of hugepages. We carve them off one at a + * time to satisfy new pageslab requests. + * + * Guarded by grow_mtx. */ - size_t small_max; - /* - * What's the minimum size for which we'll go straight to the global - * arena? - */ - size_t large_min; + edata_t *eden; /* The arena ind we're associated with. */ unsigned ind; + emap_t *emap; }; -bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, - edata_cache_t *edata_cache); -bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, - edata_cache_t *edata_cache, unsigned ind, size_t ps_goal, - size_t ps_alloc_max, size_t small_max, size_t large_min); +/* + * Whether or not the HPA can be used given the current configuration. This is + * is not necessarily a guarantee that it backs its allocations by hugepages, + * just that it can function properly given the system it's running on. + */ +bool hpa_supported(); +bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, + edata_cache_t *edata_cache, unsigned ind, size_t alloc_max); + +void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src); +void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, + hpa_shard_stats_t *dst); -void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src); -void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst); /* * Notify the shard that we won't use it for allocations much longer. Due to * the possibility of races, we don't actually prevent allocations; just flush @@ -108,14 +91,4 @@ void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard); -/* - * These should be acquired after all the shard locks in phase 4, but before any - * locks in phase 4. The central HPA may acquire an edata cache mutex (of a0), - * so it needs to be lower in the witness ordering, but it's also logically - * global and not tied to any particular arena. - */ -void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa); -void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa); -void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa); - #endif /* JEMALLOC_INTERNAL_HPA_H */ diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h index ef0bf0d3..3759daaf 100644 --- a/include/jemalloc/internal/mutex_prof.h +++ b/include/jemalloc/internal/mutex_prof.h @@ -11,9 +11,7 @@ OP(ctl) \ OP(prof) \ OP(prof_thds_data) \ - OP(prof_dump) \ - OP(hpa_central) \ - OP(hpa_central_grow) + OP(prof_dump) typedef enum { #define OP(mtx) global_prof_mutex_##mtx, diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h index f1823e6b..b9030226 100644 --- a/include/jemalloc/internal/pa.h +++ b/include/jemalloc/internal/pa.h @@ -130,9 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base, * This isn't exposed to users; we allow late enablement of the HPA shard so * that we can boot without worrying about the HPA, then turn it on in a0. */ -bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal, - size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards, - size_t sec_alloc_max, size_t sec_bytes_max); +bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, + size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max); /* * We stop using the HPA when custom extent hooks are installed, but still * redirect deallocations to it. diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h index 4529827a..3c9f23bb 100644 --- a/include/jemalloc/internal/psset.h +++ b/include/jemalloc/internal/psset.h @@ -24,11 +24,14 @@ typedef struct psset_bin_stats_s psset_bin_stats_t; struct psset_bin_stats_s { /* How many pageslabs are in this bin? */ - size_t npageslabs; + size_t npageslabs_huge; + size_t npageslabs_nonhuge; /* Of them, how many pages are active? */ - size_t nactive; + size_t nactive_huge; + size_t nactive_nonhuge; /* How many are inactive? */ - size_t ninactive; + size_t ninactive_huge; + size_t ninactive_nonhuge; }; /* Used only by CTL; not actually stored here (i.e., all derived). */ @@ -62,6 +65,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src); void psset_insert(psset_t *psset, edata_t *ps); void psset_remove(psset_t *psset, edata_t *ps); +void psset_hugify(psset_t *psset, edata_t *ps); + /* * Tries to obtain a chunk from an existing pageslab already in the set. * Returns true on failure. diff --git a/src/arena.c b/src/arena.c index 7099713a..209eb347 100644 --- a/src/arena.c +++ b/src/arena.c @@ -37,7 +37,6 @@ static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; emap_t arena_emap_global; -hpa_t arena_hpa_global; const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #define STEP(step, h, x, y) \ @@ -1535,9 +1534,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { * so arena_hpa_global is not yet initialized. */ if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) { - if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global, - opt_hpa_slab_goal, opt_hpa_slab_max_alloc, - opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards, + if (pa_shard_enable_hpa(&arena->pa_shard, + opt_hpa_slab_max_alloc, opt_hpa_sec_nshards, opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) { goto label_error; } diff --git a/src/ctl.c b/src/ctl.c index f0df73b7..88cee666 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -220,13 +220,19 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes) CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes) CTL_PROTO(stats_arenas_i_extents_j_retained_bytes) INDEX_PROTO(stats_arenas_i_extents_j) -CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs) -CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive) -CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge) +CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge) +CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge) INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j) -CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs) -CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive) -CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_uptime) CTL_PROTO(stats_arenas_i_dss) @@ -606,21 +612,33 @@ MUTEX_PROF_ARENA_MUTEXES }; static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = { - {NAME("npageslabs"), - CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)}, - {NAME("nactive"), - CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)}, - {NAME("ninactive"), - CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)} + {NAME("npageslabs_huge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)}, + {NAME("nactive_huge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)}, + {NAME("ninactive_huge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)}, + {NAME("npageslabs_nonhuge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)}, + {NAME("nactive_nonhuge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)}, + {NAME("ninactive_nonhuge"), + CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)}, }; static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = { - {NAME("npageslabs"), - CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)}, - {NAME("nactive"), - CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)}, - {NAME("ninactive"), - CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)} + {NAME("npageslabs_huge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)}, + {NAME("nactive_huge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)}, + {NAME("ninactive_huge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)}, + {NAME("npageslabs_nonhuge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)}, + {NAME("nactive_nonhuge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)}, + {NAME("ninactive_nonhuge"), + CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)} }; static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = { @@ -1104,7 +1122,7 @@ MUTEX_PROF_ARENA_MUTEXES } /* Merge HPA stats. */ - hpa_stats_accum(&sdstats->hpastats, &astats->hpastats); + hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats); sec_stats_accum(&sdstats->secstats, &astats->secstats); } } @@ -1219,14 +1237,6 @@ ctl_refresh(tsdn_t *tsdn) { READ_GLOBAL_MUTEX_PROF_DATA( global_prof_mutex_prof_dump, prof_dump_mtx); } - if (opt_hpa) { - READ_GLOBAL_MUTEX_PROF_DATA( - global_prof_mutex_hpa_central, - arena_hpa_global.mtx); - READ_GLOBAL_MUTEX_PROF_DATA( - global_prof_mutex_hpa_central_grow, - arena_hpa_global.grow_mtx); - } if (have_background_thread) { READ_GLOBAL_MUTEX_PROF_DATA( global_prof_mutex_background_thread, @@ -3259,11 +3269,6 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, MUTEX_PROF_RESET(tdatas_mtx); MUTEX_PROF_RESET(prof_dump_mtx); } - if (opt_hpa) { - MUTEX_PROF_RESET(arena_hpa_global.mtx); - MUTEX_PROF_RESET(arena_hpa_global.grow_mtx); - } - /* Per arena mutexes. */ unsigned n = narenas_total_get(); @@ -3367,22 +3372,44 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib, return super_stats_arenas_i_extents_j_node; } -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs, - arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs, +/* Full, huge */ +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge, size_t); -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive, - arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t); -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive, - arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t); -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs, - arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs, +/* Full, nonhuge */ +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge, size_t); -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive, - arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive, +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t); + +/* Nonfull, huge */ +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge, size_t); -CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive, - arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive, +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge, + size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge, + size_t); + +/* Nonfull, nonhuge */ +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge, + size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge, + size_t); +CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge, + arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge, size_t); static const ctl_named_node_t * diff --git a/src/hpa.c b/src/hpa.c index e7548adb..ca75628c 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -6,6 +6,8 @@ #include "jemalloc/internal/flat_bitmap.h" #include "jemalloc/internal/witness.h" +#define HPA_EDEN_SIZE (128 * HUGEPAGE) + static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero); static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, @@ -15,43 +17,40 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); bool -hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) { - bool err; - +hpa_supported() { +#ifdef _WIN32 + /* + * At least until the API and implementation is somewhat settled, we + * don't want to try to debug the VM subsystem on the hardest-to-test + * platform. + */ + return false; +#endif + if (!pages_can_hugify) { + return false; + } /* * We fundamentally rely on a address-space-hungry growth strategy for - * hugepages. This may change in the future, but for now we should have - * refused to turn on any HPA at a higher level of the stack. + * hugepages. */ - assert(LG_SIZEOF_PTR == 3); - - err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW, - malloc_mutex_rank_exclusive); - if (err) { - return true; + if (LG_SIZEOF_PTR == 2) { + return false; } - err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA, - malloc_mutex_rank_exclusive); - if (err) { - return true; + /* + * We use the edata bitmap; it needs to have at least as many bits as a + * hugepage has pages. + */ + if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) { + return false; } - - hpa_central_init(&hpa->central, edata_cache, emap); - if (err) { - return true; - } - hpa->ind = base_ind_get(base); - hpa->edata_cache = edata_cache; - - exp_grow_init(&hpa->exp_grow); - - return false; + return true; } bool -hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, - unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max, - size_t large_min) { +hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache, + unsigned ind, size_t alloc_max) { + /* malloc_conf processing should have filtered out these cases. */ + assert(hpa_supported()); bool err; err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow", WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive); @@ -66,12 +65,12 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, assert(edata_cache != NULL); edata_cache_small_init(&shard->ecs, edata_cache); - shard->hpa = hpa; psset_init(&shard->psset); - shard->ps_goal = ps_goal; - shard->ps_alloc_max = ps_alloc_max; - shard->small_max = small_max; - shard->large_min = large_min; + shard->alloc_max = alloc_max; + edata_list_inactive_init(&shard->unused_slabs); + shard->eden = NULL; + shard->ind = ind; + shard->emap = emap; /* * Fill these in last, so that if an hpa_shard gets used despite @@ -83,9 +82,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, shard->pai.shrink = &hpa_shrink; shard->pai.dalloc = &hpa_dalloc; - shard->ind = ind; - assert(ind == base_ind_get(edata_cache->base)); - return false; } @@ -96,176 +92,333 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, * locking here. */ void -hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) { +hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) { psset_stats_accum(&dst->psset_stats, &src->psset_stats); } void -hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) { +hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, + hpa_shard_stats_t *dst) { malloc_mutex_lock(tsdn, &shard->mtx); psset_stats_accum(&dst->psset_stats, &shard->psset.stats); malloc_mutex_unlock(tsdn, &shard->mtx); } -static edata_t * -hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min, - size_t size_goal) { - bool err; - edata_t *edata; - - hpa_t *hpa = shard->hpa; - - malloc_mutex_lock(tsdn, &hpa->mtx); - edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, - size_goal); - malloc_mutex_unlock(tsdn, &hpa->mtx); - if (edata != NULL) { - edata_arena_ind_set(edata, shard->ind); - return edata; - } - /* No existing range can satisfy the request; try to grow. */ - malloc_mutex_lock(tsdn, &hpa->grow_mtx); - +static bool +hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) { /* - * We could have raced with other grow attempts; re-check to see if we - * did, and are now able to satisfy the request. + * For now, just use a static check; hugify a page if it's <= 5% + * inactive. Eventually, this should be a malloc conf option. */ - malloc_mutex_lock(tsdn, &hpa->mtx); - edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, - size_goal); - malloc_mutex_unlock(tsdn, &hpa->mtx); - if (edata != NULL) { - malloc_mutex_unlock(tsdn, &hpa->grow_mtx); - edata_arena_ind_set(edata, shard->ind); - return edata; - } + return !edata_hugeified_get(ps) + && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100; +} +/* Returns true on error. */ +static void +hpa_hugify(edata_t *ps) { + assert(edata_size_get(ps) == HUGEPAGE); + assert(edata_hugeified_get(ps)); + bool err = pages_huge(edata_base_get(ps), HUGEPAGE); /* - * No such luck. We've dropped mtx, so other allocations can proceed - * while we allocate the new extent. We know no one else will grow in - * the meantime, though, since we still hold grow_mtx. - */ - size_t alloc_size; - pszind_t skip; - - size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal); - - err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min, - &alloc_size, &skip); - if (err) { - malloc_mutex_unlock(tsdn, &hpa->grow_mtx); - return NULL; - } - alloc_size = HUGEPAGE_CEILING(alloc_size); - - /* - * Eventually, we need to think about this more systematically, and in - * terms of extent hooks. For now, though, we know we only care about - * overcommitting systems, and we're not going to purge much. - */ - bool commit = true; - void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit); - if (addr == NULL) { - malloc_mutex_unlock(tsdn, &hpa->grow_mtx); - return NULL; - } - err = pages_huge(addr, alloc_size); - /* - * Ignore this for now; even if the allocation fails, the address space - * should still be usable. + * Eat the error; even if the hugeification failed, it's still safe to + * pretend it didn't (and would require extraordinary measures to + * unhugify). */ (void)err; +} - edata = edata_cache_get(tsdn, hpa->edata_cache); - if (edata == NULL) { - malloc_mutex_unlock(tsdn, &hpa->grow_mtx); - pages_unmap(addr, alloc_size); - return NULL; +static void +hpa_dehugify(edata_t *ps) { + /* Purge, then dehugify while unbacked. */ + pages_purge_forced(edata_addr_get(ps), HUGEPAGE); + pages_nohuge(edata_addr_get(ps), HUGEPAGE); + edata_hugeified_set(ps, false); +} + +static edata_t * +hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->grow_mtx); + edata_t *ps = NULL; + + /* Is there address space waiting for reuse? */ + malloc_mutex_assert_owner(tsdn, &shard->grow_mtx); + ps = edata_list_inactive_first(&shard->unused_slabs); + if (ps != NULL) { + edata_list_inactive_remove(&shard->unused_slabs, ps); + return ps; + } + + /* Is eden a perfect fit? */ + if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) { + ps = shard->eden; + shard->eden = NULL; + return ps; } /* - * The serial number here is just a placeholder; the hpa_central gets to - * decide how it wants to fill it in. - * - * The grow edata is associated with the hpa_central_t arena ind; the - * subsequent allocation we get (in the hpa_central_alloc_grow call - * below) will be filled in with the shard ind. + * We're about to try to allocate from eden by splitting. If eden is + * NULL, we have to allocate it too. Otherwise, we just have to + * allocate an edata_t for the new psset. */ - edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false, - SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true, - /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true); + if (shard->eden == NULL) { + /* + * During development, we're primarily concerned with systems + * with overcommit. Eventually, we should be more careful here. + */ + bool commit = true; + /* Allocate address space, bailing if we fail. */ + void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE, + &commit); + if (new_eden == NULL) { + return NULL; + } + malloc_mutex_lock(tsdn, &shard->mtx); + /* Allocate ps edata, bailing if we fail. */ + ps = edata_cache_small_get(tsdn, &shard->ecs); + if (ps == NULL) { + malloc_mutex_unlock(tsdn, &shard->mtx); + pages_unmap(new_eden, HPA_EDEN_SIZE); + return NULL; + } + /* Allocate eden edata, bailing if we fail. */ + shard->eden = edata_cache_small_get(tsdn, &shard->ecs); + if (shard->eden == NULL) { + edata_cache_small_put(tsdn, &shard->ecs, ps); + malloc_mutex_unlock(tsdn, &shard->mtx); + pages_unmap(new_eden, HPA_EDEN_SIZE); + return NULL; + } + /* Success. */ + malloc_mutex_unlock(tsdn, &shard->mtx); - malloc_mutex_lock(tsdn, &hpa->mtx); - /* Note that this replace edata with the allocation to return. */ - err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata); - malloc_mutex_unlock(tsdn, &hpa->mtx); - - if (!err) { - exp_grow_size_commit(&hpa->exp_grow, skip); + /* + * Note that the values here don't really make sense (e.g. eden + * is actually zeroed). But we don't use the slab metadata in + * determining subsequent allocation metadata (e.g. zero + * tracking should be done at the per-page level, not at the + * level of the hugepage). It's just a convenient data + * structure that contains much of the helpers we need (defined + * lists, a bitmap, an address field, etc.). Eventually, we'll + * have a "real" representation of a hugepage that's unconnected + * to the edata_ts it will serve allocations into. + */ + edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE, + /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty, + /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA, + /* is_head */ true); + edata_hugeified_set(shard->eden, false); + } else { + /* Eden is already nonempty; only need an edata for ps. */ + malloc_mutex_lock(tsdn, &shard->mtx); + ps = edata_cache_small_get(tsdn, &shard->ecs); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (ps == NULL) { + return NULL; + } } - malloc_mutex_unlock(tsdn, &hpa->grow_mtx); - edata_arena_ind_set(edata, shard->ind); + /* + * We should have dropped mtx since we're not touching ecs any more, but + * we should continue to hold the grow mutex, since we're about to touch + * eden. + */ + malloc_mutex_assert_not_owner(tsdn, &shard->mtx); + malloc_mutex_assert_owner(tsdn, &shard->grow_mtx); + assert(shard->eden != NULL); + assert(edata_size_get(shard->eden) > HUGEPAGE); + assert(edata_size_get(shard->eden) % HUGEPAGE == 0); + assert(edata_addr_get(shard->eden) + == HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden))); + malloc_mutex_lock(tsdn, &shard->mtx); + ps = edata_cache_small_get(tsdn, &shard->ecs); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (ps == NULL) { + return NULL; + } + edata_init(ps, edata_arena_ind_get(shard->eden), + edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false, + /* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty, + /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA, + /* is_head */ true); + edata_hugeified_set(ps, false); + edata_addr_set(shard->eden, edata_past_get(ps)); + edata_size_set(shard->eden, + edata_size_get(shard->eden) - HUGEPAGE); + + return ps; +} + +/* + * The psset does not hold empty slabs. Upon becoming empty, then, we need to + * put them somewhere. We take this as an opportunity to purge, and retain + * their address space in a list outside the psset. + */ +static void +hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) { + /* + * We do relatively expensive system calls. The ps was evicted, so no + * one should touch it while we're also touching it. + */ + malloc_mutex_assert_not_owner(tsdn, &shard->mtx); + malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx); + + assert(edata_size_get(ps) == HUGEPAGE); + assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps)); + + /* + * We do this unconditionally, even for pages which were not originally + * hugeified; it has the same effect. + */ + hpa_dehugify(ps); + + malloc_mutex_lock(tsdn, &shard->grow_mtx); + edata_list_inactive_prepend(&shard->unused_slabs, ps); + malloc_mutex_unlock(tsdn, &shard->grow_mtx); +} + +static edata_t * +hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) { + bool err; + malloc_mutex_lock(tsdn, &shard->mtx); + edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs); + *oom = false; + if (edata == NULL) { + malloc_mutex_unlock(tsdn, &shard->mtx); + *oom = true; + return NULL; + } + assert(edata_arena_ind_get(edata) == shard->ind); + + err = psset_alloc_reuse(&shard->psset, edata, size); if (err) { - pages_unmap(addr, alloc_size); - edata_cache_put(tsdn, hpa->edata_cache, edata); + edata_cache_small_put(tsdn, &shard->ecs, edata); + malloc_mutex_unlock(tsdn, &shard->mtx); + return NULL; + } + /* + * This could theoretically be moved outside of the critical section, + * but that introduces the potential for a race. Without the lock, the + * (initially nonempty, since this is the reuse pathway) pageslab we + * allocated out of could become otherwise empty while the lock is + * dropped. This would force us to deal with a pageslab eviction down + * the error pathway, which is a pain. + */ + err = emap_register_boundary(tsdn, shard->emap, edata, + SC_NSIZES, /* slab */ false); + if (err) { + edata_t *ps = psset_dalloc(&shard->psset, edata); + /* + * The pageslab was nonempty before we started; it + * should still be nonempty now, and so shouldn't get + * evicted. + */ + assert(ps == NULL); + edata_cache_small_put(tsdn, &shard->ecs, edata); + malloc_mutex_unlock(tsdn, &shard->mtx); + *oom = true; return NULL; } + edata_t *ps = edata_ps_get(edata); + assert(ps != NULL); + bool hugify = hpa_should_hugify(shard, ps); + if (hugify) { + /* + * Do the metadata modification while holding the lock; we'll + * actually change state with the lock dropped. + */ + psset_hugify(&shard->psset, ps); + } + malloc_mutex_unlock(tsdn, &shard->mtx); + if (hugify) { + /* + * Hugifying with the lock dropped is safe, even with + * concurrent modifications to the ps. This relies on + * the fact that the current implementation will never + * dehugify a non-empty pageslab, and ps will never + * become empty before we return edata to the user to be + * freed. + * + * Note that holding the lock would prevent not just operations + * on this page slab, but also operations any other alloc/dalloc + * operations in this hpa shard. + */ + hpa_hugify(ps); + } return edata; } static edata_t * hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { - assert(size <= shard->ps_alloc_max); - + assert(size <= shard->alloc_max); bool err; - malloc_mutex_lock(tsdn, &shard->mtx); - edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs); - if (edata == NULL) { - malloc_mutex_unlock(tsdn, &shard->mtx); - return NULL; - } - edata_arena_ind_set(edata, shard->ind); + bool oom; + edata_t *edata; - err = psset_alloc_reuse(&shard->psset, edata, size); - malloc_mutex_unlock(tsdn, &shard->mtx); - if (!err) { + edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom); + if (edata != NULL) { return edata; } + /* Nothing in the psset works; we have to grow it. */ malloc_mutex_lock(tsdn, &shard->grow_mtx); - - /* As above; check for grow races. */ - malloc_mutex_lock(tsdn, &shard->mtx); - err = psset_alloc_reuse(&shard->psset, edata, size); - malloc_mutex_unlock(tsdn, &shard->mtx); - if (!err) { + /* + * Check for grow races; maybe some earlier thread expanded the psset + * in between when we dropped the main mutex and grabbed the grow mutex. + */ + edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom); + if (edata != NULL || oom) { malloc_mutex_unlock(tsdn, &shard->grow_mtx); return edata; } - edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size, - shard->ps_goal); + /* + * Note that we don't hold shard->mtx here (while growing); + * deallocations (and allocations of smaller sizes) may still succeed + * while we're doing this potentially expensive system call. + */ + edata_t *grow_edata = hpa_grow(tsdn, shard); if (grow_edata == NULL) { malloc_mutex_unlock(tsdn, &shard->grow_mtx); - - malloc_mutex_lock(tsdn, &shard->mtx); - edata_cache_small_put(tsdn, &shard->ecs, edata); - malloc_mutex_unlock(tsdn, &shard->mtx); - return NULL; } - edata_arena_ind_set(grow_edata, shard->ind); + assert(edata_arena_ind_get(grow_edata) == shard->ind); + edata_slab_set(grow_edata, true); fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap; - fb_init(fb, shard->ps_goal / PAGE); + fb_init(fb, HUGEPAGE / PAGE); /* We got the new edata; allocate from it. */ malloc_mutex_lock(tsdn, &shard->mtx); + edata = edata_cache_small_get(tsdn, &shard->ecs); + if (edata == NULL) { + malloc_mutex_unlock(tsdn, &shard->mtx); + malloc_mutex_unlock(tsdn, &shard->grow_mtx); + return NULL; + } psset_alloc_new(&shard->psset, grow_edata, edata, size); + err = emap_register_boundary(tsdn, shard->emap, edata, + SC_NSIZES, /* slab */ false); + if (err) { + edata_t *ps = psset_dalloc(&shard->psset, edata); + /* + * The pageslab was empty except for the new allocation; it + * should get evicted. + */ + assert(ps == grow_edata); + edata_cache_small_put(tsdn, &shard->ecs, edata); + /* + * Technically the same as fallthrough at the time of this + * writing, but consistent with the error handling in the rest + * of the function. + */ + malloc_mutex_unlock(tsdn, &shard->mtx); + malloc_mutex_unlock(tsdn, &shard->grow_mtx); + hpa_handle_ps_eviction(tsdn, shard, ps); + return NULL; + } malloc_mutex_unlock(tsdn, &shard->mtx); - malloc_mutex_unlock(tsdn, &shard->grow_mtx); return edata; } @@ -283,33 +436,25 @@ static edata_t * hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) { assert((size & PAGE_MASK) == 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + hpa_shard_t *shard = hpa_from_pai(self); /* We don't handle alignment or zeroing for now. */ if (alignment > PAGE || zero) { return NULL; } - if (size > shard->small_max && size < shard->large_min) { + if (size > shard->alloc_max) { return NULL; } - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - - edata_t *edata; - if (size <= shard->ps_alloc_max) { - edata = hpa_alloc_psset(tsdn, shard, size); - if (edata != NULL) { - emap_register_boundary(tsdn, shard->hpa->central.emap, - edata, SC_NSIZES, /* slab */ false); - } - } else { - edata = hpa_alloc_central(tsdn, shard, size, size); - } + edata_t *edata = hpa_alloc_psset(tsdn, shard, size); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); + if (edata != NULL) { - emap_assert_mapped(tsdn, shard->hpa->central.emap, edata); + emap_assert_mapped(tsdn, shard->emap, edata); assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_state_get(edata) == extent_state_active); assert(edata_arena_ind_get(edata) == shard->ind); @@ -336,16 +481,6 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, return true; } -static void -hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) { - hpa_t *hpa = shard->hpa; - - edata_arena_ind_set(edata, hpa->ind); - malloc_mutex_lock(tsdn, &hpa->mtx); - hpa_central_dalloc(tsdn, &hpa->central, edata); - malloc_mutex_unlock(tsdn, &hpa->mtx); -} - static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { hpa_shard_t *shard = hpa_from_pai(self); @@ -361,56 +496,29 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { assert(edata_committed_get(edata)); assert(edata_base_get(edata) != NULL); + edata_t *ps = edata_ps_get(edata); + /* Currently, all edatas come from pageslabs. */ + assert(ps != NULL); + emap_deregister_boundary(tsdn, shard->emap, edata); + malloc_mutex_lock(tsdn, &shard->mtx); /* - * There are two cases: - * - The psset field is NULL. In this case, the edata comes directly - * from the hpa_central_t and should be returned to it. - * - THe psset field is not NULL, in which case we return the edata to - * the appropriate slab (which may in turn cause it to become empty, - * triggering an eviction of the whole slab, which should then be - * returned to the hpa_central_t). + * Note that the shard mutex protects the edata hugeified field, too. + * Page slabs can move between pssets (and have their hugeified status + * change) in racy ways. */ - if (edata_ps_get(edata) != NULL) { - emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata); - - malloc_mutex_lock(tsdn, &shard->mtx); - edata_t *evicted_ps = psset_dalloc(&shard->psset, edata); - edata_cache_small_put(tsdn, &shard->ecs, edata); - malloc_mutex_unlock(tsdn, &shard->mtx); - - - if (evicted_ps != NULL) { - /* - * The deallocation caused a pageslab to become empty. - * Free it back to the centralized allocator. - */ - bool err = emap_register_boundary(tsdn, - shard->hpa->central.emap, evicted_ps, SC_NSIZES, - /* slab */ false); - /* - * Registration can only fail on OOM, but the boundary - * mappings should have been initialized during - * allocation. - */ - assert(!err); - edata_slab_set(evicted_ps, false); - edata_ps_set(evicted_ps, NULL); - - assert(edata_arena_ind_get(evicted_ps) == shard->ind); - hpa_dalloc_central(tsdn, shard, evicted_ps); - } - } else { - hpa_dalloc_central(tsdn, shard, edata); + edata_t *evicted_ps = psset_dalloc(&shard->psset, edata); + /* + * If a pageslab became empty because of the dalloc, it better have been + * the one we expected. + */ + assert(evicted_ps == NULL || evicted_ps == ps); + edata_cache_small_put(tsdn, &shard->ecs, edata); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (evicted_ps != NULL) { + hpa_handle_ps_eviction(tsdn, shard, evicted_ps); } } -static void -hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { - assert(bin_stats->npageslabs == 0); - assert(bin_stats->nactive == 0); - assert(bin_stats->ninactive == 0); -} - void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_lock(tsdn, &shard->mtx); @@ -418,6 +526,29 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_unlock(tsdn, &shard->mtx); } +static void +hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { + assert(bin_stats->npageslabs_huge == 0); + assert(bin_stats->nactive_huge == 0); + assert(bin_stats->ninactive_huge == 0); + assert(bin_stats->npageslabs_nonhuge == 0); + assert(bin_stats->nactive_nonhuge == 0); + assert(bin_stats->ninactive_nonhuge == 0); +} + +static void +hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) { + edata_t edata = {0}; + malloc_mutex_assert_owner(tsdn, &shard->mtx); + bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE); + assert(psset_empty); + hpa_shard_assert_stats_empty(&psset->stats.full_slabs); + for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { + hpa_shard_assert_stats_empty( + &psset->stats.nonfull_slabs[i]); + } +} + void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { /* @@ -427,17 +558,15 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { * 1-page allocation. */ if (config_debug) { - edata_t edata = {0}; malloc_mutex_lock(tsdn, &shard->mtx); - bool psset_empty = psset_alloc_reuse(&shard->psset, &edata, - PAGE); + hpa_assert_empty(tsdn, shard, &shard->psset); malloc_mutex_unlock(tsdn, &shard->mtx); - assert(psset_empty); - hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs); - for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { - hpa_shard_assert_stats_empty( - &shard->psset.stats.nonfull_slabs[i]); - } + } + edata_t *ps; + while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) { + assert(edata_size_get(ps) == HUGEPAGE); + edata_list_inactive_remove(&shard->unused_slabs, ps); + pages_unmap(edata_base_get(ps), HUGEPAGE); } } @@ -462,21 +591,3 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->mtx); } - -void -hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) { - malloc_mutex_prefork(tsdn, &hpa->grow_mtx); - malloc_mutex_prefork(tsdn, &hpa->mtx); -} - -void -hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) { - malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx); - malloc_mutex_postfork_parent(tsdn, &hpa->mtx); -} - -void -hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) { - malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx); - malloc_mutex_postfork_child(tsdn, &hpa->mtx); -} diff --git a/src/jemalloc.c b/src/jemalloc.c index 74240c0a..277b9e72 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1802,31 +1802,19 @@ malloc_init_hard_a0_locked() { } a0 = arena_get(TSDN_NULL, 0, false); - if (opt_hpa && LG_SIZEOF_PTR == 2) { + if (opt_hpa && !hpa_supported()) { + malloc_printf(": HPA not supported in the current " + "configuration; %s.", + opt_abort_conf ? "aborting" : "disabling"); if (opt_abort_conf) { - malloc_printf(": Hugepages not currently " - "supported on 32-bit architectures; aborting."); + malloc_abort_invalid_conf(); } else { - malloc_printf(": Hugepages not currently " - "supported on 32-bit architectures; disabling."); opt_hpa = false; } } else if (opt_hpa) { - /* - * The global HPA uses the edata cache from a0, and so needs to - * be initialized specially, after a0 is. The arena init code - * handles this case specially, and does not turn on the HPA for - * a0 when opt_hpa is true. This lets us do global HPA - * initialization against a valid a0. - */ - if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global, - &a0->pa_shard.edata_cache)) { - return true; - } - if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global, - opt_hpa_slab_goal, opt_hpa_slab_max_alloc, - opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards, - opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) { + if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc, + opt_hpa_sec_nshards, opt_hpa_sec_max_alloc, + opt_hpa_sec_max_bytes)) { return true; } } @@ -4346,9 +4334,6 @@ _malloc_prefork(void) } } } - if (i == 4 && opt_hpa) { - hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global); - } } prof_prefork1(tsd_tsdn(tsd)); @@ -4388,9 +4373,6 @@ _malloc_postfork(void) arena_postfork_parent(tsd_tsdn(tsd), arena); } } - if (opt_hpa) { - hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global); - } prof_postfork_parent(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_parent(tsd_tsdn(tsd)); @@ -4421,9 +4403,6 @@ jemalloc_postfork_child(void) { arena_postfork_child(tsd_tsdn(tsd), arena); } } - if (opt_hpa) { - hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global); - } prof_postfork_child(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_child(tsd_tsdn(tsd)); diff --git a/src/pa.c b/src/pa.c index e5fcbb7b..bc52ff43 100644 --- a/src/pa.c +++ b/src/pa.c @@ -49,17 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base, } bool -pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal, - size_t ps_alloc_max, size_t small_max, size_t large_min, - size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) { - ps_goal &= ~PAGE_MASK; - ps_alloc_max &= ~PAGE_MASK; - - if (ps_alloc_max > ps_goal) { - ps_alloc_max = ps_goal; - } - if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache, - shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) { +pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards, + size_t sec_alloc_max, size_t sec_bytes_max) { + if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache, + shard->ind, alloc_max)) { return true; } if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards, diff --git a/src/pa_extra.c b/src/pa_extra.c index 2002418a..0f488be6 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -150,7 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, } if (shard->ever_used_hpa) { - hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out); + hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out); sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out); } } diff --git a/src/psset.c b/src/psset.c index c24266ce..2ee683b6 100644 --- a/src/psset.c +++ b/src/psset.c @@ -20,9 +20,13 @@ psset_init(psset_t *psset) { static void psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) { - dst->npageslabs += src->npageslabs; - dst->nactive += src->nactive; - dst->ninactive += src->ninactive; + dst->npageslabs_huge += src->npageslabs_huge; + dst->nactive_huge += src->nactive_huge; + dst->ninactive_huge += src->ninactive_huge; + + dst->npageslabs_nonhuge += src->npageslabs_nonhuge; + dst->nactive_nonhuge += src->nactive_nonhuge; + dst->ninactive_nonhuge += src->ninactive_nonhuge; } void @@ -45,29 +49,62 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) { * ensure we don't miss any heap modification operations. */ JEMALLOC_ALWAYS_INLINE void -psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) { - size_t mul = inc ? (size_t)1 : (size_t)-1; +psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps, + bool insert) { + size_t *npageslabs_dst = edata_hugeified_get(ps) + ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge; + size_t *nactive_dst = edata_hugeified_get(ps) + ? &binstats->nactive_huge : &binstats->nactive_nonhuge; + size_t *ninactive_dst = edata_hugeified_get(ps) + ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge; size_t npages = edata_size_get(ps) >> LG_PAGE; size_t ninactive = edata_nfree_get(ps); size_t nactive = npages - ninactive; - binstats->npageslabs += mul * 1; - binstats->nactive += mul * nactive; - binstats->ninactive += mul * ninactive; + + size_t mul = insert ? (size_t)1 : (size_t)-1; + *npageslabs_dst += mul * 1; + *nactive_dst += mul * nactive; + *ninactive_dst += mul * ninactive; +} + +static void +psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) { + psset_bin_stats_insert_remove(binstats, ps, /* insert */ true); +} + +static void +psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) { + psset_bin_stats_insert_remove(binstats, ps, /* insert */ false); +} + +/* + * We don't currently need an "activate" equivalent to this, since down the + * allocation pathways we don't do the optimization in which we change a slab + * without first removing it from a bin. + */ +static void +psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) { + size_t *nactive_dst = huge + ? &binstats->nactive_huge : &binstats->nactive_nonhuge; + size_t *ninactive_dst = huge + ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge; + + assert(*nactive_dst >= num); + *nactive_dst -= num; + *ninactive_dst += num; } static void psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) { edata_age_heap_remove(&psset->pageslabs[pind], ps); - psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps, - /* inc */ false); + psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps); } static void psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) { edata_age_heap_insert(&psset->pageslabs[pind], ps); - psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps, - /* inc */ true); + psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps); } JEMALLOC_ALWAYS_INLINE void @@ -86,8 +123,7 @@ psset_insert(psset_t *psset, edata_t *ps) { * We don't ned to track full slabs; just pretend to for stats * purposes. See the comment at psset_bin_stats_adjust. */ - psset_bin_stats_adjust(&psset->stats.full_slabs, ps, - /* inc */ true); + psset_bin_stats_insert(&psset->stats.full_slabs, ps); return; } @@ -107,8 +143,7 @@ psset_remove(psset_t *psset, edata_t *ps) { size_t longest_free_range = edata_longest_free_range_get(ps); if (longest_free_range == 0) { - psset_bin_stats_adjust(&psset->stats.full_slabs, ps, - /* inc */ true); + psset_bin_stats_remove(&psset->stats.full_slabs, ps); return; } @@ -121,6 +156,26 @@ psset_remove(psset_t *psset, edata_t *ps) { } } +void +psset_hugify(psset_t *psset, edata_t *ps) { + assert(!edata_hugeified_get(ps)); + psset_assert_ps_consistent(ps); + + size_t longest_free_range = edata_longest_free_range_get(ps); + psset_bin_stats_t *bin_stats; + if (longest_free_range == 0) { + bin_stats = &psset->stats.full_slabs; + } else { + pszind_t pind = sz_psz2ind(sz_psz_quantize_floor( + longest_free_range << LG_PAGE)); + assert(pind < PSSET_NPSIZES); + bin_stats = &psset->stats.nonfull_slabs[pind]; + } + psset_bin_stats_remove(bin_stats, ps); + edata_hugeified_set(ps, true); + psset_bin_stats_insert(bin_stats, ps); +} + /* * Similar to PAC's extent_recycle_extract. Out of all the pageslabs in the * set, picks one that can satisfy the allocation and remove it from the set. @@ -225,8 +280,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata, } edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range); if (largest_unchosen_range == 0) { - psset_bin_stats_adjust(&psset->stats.full_slabs, ps, - /* inc */ true); + psset_bin_stats_insert(&psset->stats.full_slabs, ps); } else { psset_insert(psset, ps); } @@ -258,8 +312,8 @@ edata_t * psset_dalloc(psset_t *psset, edata_t *edata) { assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_ps_get(edata) != NULL); - edata_t *ps = edata_ps_get(edata); + fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap; size_t ps_old_longest_free_range = edata_longest_free_range_get(ps); pszind_t old_pind = SC_NPSIZES; @@ -274,22 +328,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) { >> LG_PAGE; size_t len = edata_size_get(edata) >> LG_PAGE; fb_unset_range(ps_fb, ps_npages, begin, len); - if (ps_old_longest_free_range == 0) { - /* We were in the (imaginary) full bin; update stats for it. */ - psset_bin_stats_adjust(&psset->stats.full_slabs, ps, - /* inc */ false); - } else { - /* - * The edata is still in the bin, need to update its - * contribution. - */ - psset->stats.nonfull_slabs[old_pind].nactive -= len; - psset->stats.nonfull_slabs[old_pind].ninactive += len; - } - /* - * Note that we want to do this after the stats updates, since if it was - * full it psset_bin_stats_adjust would have looked at the old version. - */ + + /* The pageslab is still in the bin; adjust its stats first. */ + psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0 + ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]); + psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len); + edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len)); /* We might have just created a new, larger range. */ @@ -327,6 +371,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) { bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)old_pind); } + } else { + /* + * Otherwise, the bin was full, and we need to adjust the full + * bin stats. + */ + psset_bin_stats_remove(&psset->stats.full_slabs, ps); } /* If the pageslab is empty, it gets evicted from the set. */ if (new_range_len == ps_npages) { diff --git a/src/stats.c b/src/stats.c index 4b40721a..abe3ab16 100644 --- a/src/stats.c +++ b/src/stats.c @@ -667,16 +667,27 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) { emitter_row_t row; emitter_row_init(&row); - size_t npageslabs; - size_t nactive; - size_t ninactive; + size_t npageslabs_huge; + size_t nactive_huge; + size_t ninactive_huge; - CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs", - i, &npageslabs, size_t); - CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive", - i, &nactive, size_t); - CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive", - i, &ninactive, size_t); + size_t npageslabs_nonhuge; + size_t nactive_nonhuge; + size_t ninactive_nonhuge; + + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge", + i, &npageslabs_huge, size_t); + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge", + i, &nactive_huge, size_t); + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge", + i, &ninactive_huge, size_t); + + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge", + i, &npageslabs_nonhuge, size_t); + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge", + i, &nactive_nonhuge, size_t); + CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge", + i, &ninactive_nonhuge, size_t); size_t sec_bytes; CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t); @@ -686,39 +697,62 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) { emitter_table_printf(emitter, "HPA shard stats:\n" " In full slabs:\n" - " npageslabs: %zu\n" - " nactive: %zu\n" - " ninactive: %zu\n", - npageslabs, nactive, ninactive); + " npageslabs: %zu huge, %zu nonhuge\n" + " nactive: %zu huge, %zu nonhuge \n" + " ninactive: %zu huge, %zu nonhuge \n", + npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge, + ninactive_huge, ninactive_nonhuge); emitter_json_object_kv_begin(emitter, "hpa_shard"); emitter_json_object_kv_begin(emitter, "full_slabs"); - emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs); - emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive); - emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive); + emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size, + &npageslabs_huge); + emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size, + &npageslabs_nonhuge); + emitter_json_kv(emitter, "nactive_huge", emitter_type_size, + &nactive_huge); + emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size, + &nactive_nonhuge); + emitter_json_kv(emitter, "ninactive_huge", emitter_type_size, + &ninactive_huge); + emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size, + &ninactive_nonhuge); emitter_json_object_end(emitter); /* End "full_slabs" */ COL_HDR(row, size, NULL, right, 20, size) COL_HDR(row, ind, NULL, right, 4, unsigned) - COL_HDR(row, npageslabs, NULL, right, 13, size) - COL_HDR(row, nactive, NULL, right, 13, size) - COL_HDR(row, ninactive, NULL, right, 13, size) + COL_HDR(row, npageslabs_huge, NULL, right, 16, size) + COL_HDR(row, nactive_huge, NULL, right, 16, size) + COL_HDR(row, ninactive_huge, NULL, right, 16, size) + COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size) + COL_HDR(row, nactive_nonhuge, NULL, right, 20, size) + COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size) emitter_table_row(emitter, &header_row); emitter_json_array_kv_begin(emitter, "nonfull_slabs"); bool in_gap = false; for (pszind_t j = 0; j < PSSET_NPSIZES; j++) { CTL_M2_M5_GET( - "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs", - i, j, &npageslabs, size_t); + "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge", + i, j, &npageslabs_huge, size_t); CTL_M2_M5_GET( - "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive", - i, j, &nactive, size_t); + "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge", + i, j, &nactive_huge, size_t); CTL_M2_M5_GET( - "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive", - i, j, &ninactive, size_t); + "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge", + i, j, &ninactive_huge, size_t); + + CTL_M2_M5_GET( + "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge", + i, j, &npageslabs_nonhuge, size_t); + CTL_M2_M5_GET( + "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge", + i, j, &nactive_nonhuge, size_t); + CTL_M2_M5_GET( + "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge", + i, j, &ninactive_nonhuge, size_t); bool in_gap_prev = in_gap; - in_gap = (npageslabs == 0); + in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0); if (in_gap_prev && !in_gap) { emitter_table_printf(emitter, " ---\n"); @@ -726,20 +760,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) { col_size.size_val = sz_pind2sz(j); col_ind.size_val = j; - col_npageslabs.size_val = npageslabs; - col_nactive.size_val = nactive; - col_ninactive.size_val = ninactive; + col_npageslabs_huge.size_val = npageslabs_huge; + col_nactive_huge.size_val = nactive_huge; + col_ninactive_huge.size_val = ninactive_huge; + col_npageslabs_nonhuge.size_val = npageslabs_nonhuge; + col_nactive_nonhuge.size_val = nactive_nonhuge; + col_ninactive_nonhuge.size_val = ninactive_nonhuge; if (!in_gap) { emitter_table_row(emitter, &row); } emitter_json_object_begin(emitter); - emitter_json_kv(emitter, "npageslabs", emitter_type_size, - &npageslabs); - emitter_json_kv(emitter, "nactive", emitter_type_size, - &nactive); - emitter_json_kv(emitter, "ninactive", emitter_type_size, - &ninactive); + emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size, + &npageslabs_huge); + emitter_json_kv(emitter, "nactive_huge", emitter_type_size, + &nactive_huge); + emitter_json_kv(emitter, "ninactive_huge", emitter_type_size, + &ninactive_huge); + emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size, + &npageslabs_nonhuge); + emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size, + &nactive_nonhuge); + emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size, + &ninactive_huge); emitter_json_object_end(emitter); } emitter_json_array_end(emitter); /* End "nonfull_slabs" */ diff --git a/test/unit/hpa.c b/test/unit/hpa.c index b58dcede..72a20c32 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -2,14 +2,9 @@ #include "jemalloc/internal/hpa.h" -#define HPA_IND 111 -#define SHARD_IND 222 +#define SHARD_IND 111 -#define PS_GOAL (128 * PAGE) -#define PS_ALLOC_MAX (64 * PAGE) - -#define HPA_SMALL_MAX (200 * PAGE) -#define HPA_LARGE_MIN (300 * PAGE) +#define ALLOC_MAX (HUGEPAGE / 4) typedef struct test_data_s test_data_t; struct test_data_s { @@ -18,50 +13,32 @@ struct test_data_s { * test_data_t and the hpa_shard_t; */ hpa_shard_t shard; - base_t *shard_base; + base_t *base; edata_cache_t shard_edata_cache; - hpa_t hpa; - base_t *hpa_base; - edata_cache_t hpa_edata_cache; - emap_t emap; }; static hpa_shard_t * create_test_data() { bool err; - base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND, + base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND, &ehooks_default_extent_hooks); - assert_ptr_not_null(shard_base, ""); - - base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND, - &ehooks_default_extent_hooks); - assert_ptr_not_null(hpa_base, ""); + assert_ptr_not_null(base, ""); test_data_t *test_data = malloc(sizeof(test_data_t)); assert_ptr_not_null(test_data, ""); - test_data->shard_base = shard_base; - test_data->hpa_base = hpa_base; + test_data->base = base; - err = edata_cache_init(&test_data->shard_edata_cache, shard_base); + err = edata_cache_init(&test_data->shard_edata_cache, base); assert_false(err, ""); - err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base); + err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false); assert_false(err, ""); - err = emap_init(&test_data->emap, test_data->hpa_base, - /* zeroed */ false); - assert_false(err, ""); - - err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap, - &test_data->hpa_edata_cache); - assert_false(err, ""); - - err = hpa_shard_init(&test_data->shard, &test_data->hpa, - &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX, - HPA_SMALL_MAX, HPA_LARGE_MIN); + err = hpa_shard_init(&test_data->shard, &test_data->emap, + &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX); assert_false(err, ""); return (hpa_shard_t *)test_data; @@ -70,12 +47,11 @@ create_test_data() { static void destroy_test_data(hpa_shard_t *shard) { test_data_t *test_data = (test_data_t *)shard; - base_delete(TSDN_NULL, test_data->shard_base); - base_delete(TSDN_NULL, test_data->hpa_base); + base_delete(TSDN_NULL, test_data->base); free(test_data); } -TEST_BEGIN(test_small_max_large_min) { +TEST_BEGIN(test_alloc_max) { test_skip_if(LG_SIZEOF_PTR != 3); hpa_shard_t *shard = create_test_data(); @@ -84,18 +60,11 @@ TEST_BEGIN(test_small_max_large_min) { edata_t *edata; /* Small max */ - edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false); + edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false); expect_ptr_not_null(edata, "Allocation of small max failed"); - edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false); + edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false); expect_ptr_null(edata, "Allocation of larger than small max succeeded"); - /* Large min */ - edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false); - expect_ptr_not_null(edata, "Allocation of large min failed"); - edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false); - expect_ptr_null(edata, - "Allocation of smaller than large min succeeded"); - destroy_test_data(shard); } TEST_END @@ -178,26 +147,19 @@ TEST_BEGIN(test_stress) { mem_tree_new(&tree); for (size_t i = 0; i < 100 * 1000; i++) { - size_t operation = prng_range_zu(&prng_state, 4); - if (operation < 2) { + size_t operation = prng_range_zu(&prng_state, 2); + if (operation == 0) { /* Alloc */ if (nlive_edatas == nlive_edatas_max) { continue; } - size_t npages_min; - size_t npages_max; /* * We make sure to get an even balance of small and * large allocations. */ - if (operation == 0) { - npages_min = 1; - npages_max = HPA_SMALL_MAX / PAGE; - } else { - npages_min = HPA_LARGE_MIN / PAGE; - npages_max = HPA_LARGE_MIN / PAGE + 20; - } + size_t npages_min = 1; + size_t npages_max = ALLOC_MAX / PAGE; size_t npages = npages_min + prng_range_zu(&prng_state, npages_max - npages_min); edata_t *edata = pai_alloc(tsdn, &shard->pai, @@ -260,6 +222,6 @@ main(void) { (void)mem_tree_reverse_iter; (void)mem_tree_destroy; return test_no_reentrancy( - test_small_max_large_min, + test_alloc_max, test_stress); } diff --git a/test/unit/psset.c b/test/unit/psset.c index e07bdc46..ea61ab92 100644 --- a/test/unit/psset.c +++ b/test/unit/psset.c @@ -2,7 +2,7 @@ #include "jemalloc/internal/psset.h" -#define PAGESLAB_PAGES 64 +#define PAGESLAB_PAGES (HUGEPAGE / PAGE) #define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE) #define PAGESLAB_SN 123 #define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE)) @@ -296,22 +296,23 @@ TEST_END static void stats_expect_empty(psset_bin_stats_t *stats) { - assert_zu_eq(0, stats->npageslabs, + assert_zu_eq(0, stats->npageslabs_nonhuge, "Supposedly empty bin had positive npageslabs"); - expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin" + expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin" "Supposedly empty bin had positive nactive"); - expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin" + expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin" "Supposedly empty bin had positive ninactive"); } static void stats_expect(psset_t *psset, size_t nactive) { if (nactive == PAGESLAB_PAGES) { - expect_zu_eq(1, psset->stats.full_slabs.npageslabs, + expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge, "Expected a full slab"); - expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive, + expect_zu_eq(PAGESLAB_PAGES, + psset->stats.full_slabs.nactive_nonhuge, "Should have exactly filled the bin"); - expect_zu_eq(0, psset->stats.full_slabs.ninactive, + expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge, "Should never have inactive pages in a full slab"); } else { stats_expect_empty(&psset->stats.full_slabs); @@ -325,13 +326,13 @@ stats_expect(psset_t *psset, size_t nactive) { for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { if (i == nonempty_pind) { assert_zu_eq(1, - psset->stats.nonfull_slabs[i].npageslabs, + psset->stats.nonfull_slabs[i].npageslabs_nonhuge, "Should have found a slab"); expect_zu_eq(nactive, - psset->stats.nonfull_slabs[i].nactive, + psset->stats.nonfull_slabs[i].nactive_nonhuge, "Mismatch in active pages"); expect_zu_eq(ninactive, - psset->stats.nonfull_slabs[i].ninactive, + psset->stats.nonfull_slabs[i].ninactive_nonhuge, "Mismatch in inactive pages"); } else { stats_expect_empty(&psset->stats.nonfull_slabs[i]);