diff --git a/Makefile.in b/Makefile.in index 008cffd8..67568f00 100644 --- a/Makefile.in +++ b/Makefile.in @@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/extent_mmap.c \ $(srcroot)src/geom_grow.c \ $(srcroot)src/hook.c \ + $(srcroot)src/hpa.c \ $(srcroot)src/hpa_central.c \ $(srcroot)src/inspect.c \ $(srcroot)src/large.c \ @@ -212,6 +213,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/hook.c \ + $(srcroot)test/unit/hpa.c \ $(srcroot)test/unit/hpa_central.c \ $(srcroot)test/unit/huge.c \ $(srcroot)test/unit/inspect.c \ diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index a2fdff9f..9d4da31b 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -16,6 +16,7 @@ extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern malloc_mutex_t arenas_lock; extern emap_t arena_emap_global; +extern hpa_t arena_hpa_global; extern size_t opt_oversize_threshold; extern size_t oversize_threshold; diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h new file mode 100644 index 00000000..83f22033 --- /dev/null +++ b/include/jemalloc/internal/hpa.h @@ -0,0 +1,92 @@ +#ifndef JEMALLOC_INTERNAL_HPA_H +#define JEMALLOC_INTERNAL_HPA_H + +#include "jemalloc/internal/geom_grow.h" +#include "jemalloc/internal/hpa_central.h" +#include "jemalloc/internal/pai.h" +#include "jemalloc/internal/psset.h" + +typedef struct hpa_s hpa_t; +struct hpa_s { + /* + * We have two mutexes for the central allocator; mtx protects its + * state, while grow_mtx protects controls the ability to grow the + * backing store. This prevents race conditions in which the central + * allocator has exhausted its memory while mutiple threads are trying + * to allocate. If they all reserved more address space from the OS + * without synchronization, we'd end consuming much more than necessary. + */ + malloc_mutex_t grow_mtx; + malloc_mutex_t mtx; + hpa_central_t central; + /* The arena ind we're associated with. */ + unsigned ind; + /* + * This edata cache is the global one that we use for new allocations in + * growing; practically, it comes from a0. + */ + edata_cache_t *edata_cache; + geom_grow_t geom_grow; +}; + +typedef struct hpa_shard_s hpa_shard_t; +struct hpa_shard_s { + /* + * pai must be the first member; we cast from a pointer to it to a + * pointer to the hpa_shard_t. + */ + pai_t pai; + malloc_mutex_t grow_mtx; + malloc_mutex_t mtx; + /* + * This edata cache is the one we use when allocating a small extent + * from a pageslab. The pageslab itself comes from the centralized + * allocator, and so will use its edata_cache. + */ + edata_cache_t *edata_cache; + hpa_t *hpa; + psset_t psset; + + /* + * When we're grabbing a new ps from the central allocator, how big + * would we like it to be? This is mostly about the level of batching + * we use in our requests to the centralized allocator. + */ + size_t ps_goal; + /* + * What's the maximum size we'll try to allocate out of the psset? We + * don't want this to be too large relative to ps_goal, as a + * fragmentation avoidance measure. + */ + size_t ps_alloc_max; + /* The arena ind we're associated with. */ + unsigned ind; +}; + +bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, + edata_cache_t *edata_cache); +bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, + edata_cache_t *edata_cache, unsigned ind, size_t ps_goal, + size_t ps_alloc_max); +void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard); + +/* + * We share the fork ordering with the PA and arena prefork handling; that's why + * these are 2 and 3 rather than 0 or 1. + */ +void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard); +void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard); +void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard); +void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard); + +/* + * These should be acquired after all the shard locks in phase 4, but before any + * locks in phase 4. The central HPA may acquire an edata cache mutex (of a0), + * so it needs to be lower in the witness ordering, but it's also logically + * global and not tied to any particular arena. + */ +void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa); +void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa); +void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa); + +#endif /* JEMALLOC_INTERNAL_HPA_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 3e7124d5..c26153e3 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -12,6 +12,7 @@ extern bool malloc_slow; extern bool opt_abort; extern bool opt_abort_conf; extern bool opt_confirm_conf; +extern bool opt_hpa; extern const char *opt_junk; extern bool opt_junk_alloc; extern bool opt_junk_free; diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h index f6d0a7c3..7f73c274 100644 --- a/include/jemalloc/internal/pa.h +++ b/include/jemalloc/internal/pa.h @@ -6,6 +6,7 @@ #include "jemalloc/internal/ecache.h" #include "jemalloc/internal/edata_cache.h" #include "jemalloc/internal/emap.h" +#include "jemalloc/internal/hpa.h" #include "jemalloc/internal/lockedint.h" #include "jemalloc/internal/pac.h" #include "jemalloc/internal/pai.h" @@ -66,12 +67,32 @@ struct pa_shard_s { */ atomic_zu_t nactive; + /* + * Whether or not we should prefer the hugepage allocator. Atomic since + * it may be concurrently modified by a thread setting extent hooks. + * Note that we still may do HPA operations in this arena; if use_hpa is + * changed from true to false, we'll free back to the hugepage allocator + * for those allocations. + */ + atomic_b_t use_hpa; + /* + * If we never used the HPA to begin with, it wasn't initialized, and so + * we shouldn't try to e.g. acquire its mutexes during fork. This + * tracks that knowledge. + */ + bool ever_used_hpa; + /* Allocates from a PAC. */ pac_t pac; + /* Allocates from a HPA. */ + hpa_shard_t hpa_shard; + /* The source of edata_t objects. */ edata_cache_t edata_cache; + unsigned ind; + malloc_mutex_t *stats_mtx; pa_shard_stats_t *stats; @@ -98,6 +119,17 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms); +/* + * This isn't exposed to users; we allow late enablement of the HPA shard so + * that we can boot without worrying about the HPA, then turn it on in a0. + */ +bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa); +/* + * We stop using the HPA when custom extent hooks are installed, but still + * redirect deallocations to it. + */ +void pa_shard_disable_hpa(pa_shard_t *shard); + /* * This does the PA-specific parts of arena reset (i.e. freeing all active * allocations). diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h index abbfc241..72ff240e 100644 --- a/include/jemalloc/internal/psset.h +++ b/include/jemalloc/internal/psset.h @@ -49,7 +49,6 @@ struct psset_s { void psset_init(psset_t *psset); - /* * Tries to obtain a chunk from an existing pageslab already in the set. * Returns true on failure. diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 652afe65..686bf403 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -43,8 +43,16 @@ enum witness_rank_e { WITNESS_RANK_CORE, WITNESS_RANK_DECAY = WITNESS_RANK_CORE, WITNESS_RANK_TCACHE_QL, + WITNESS_RANK_EXTENT_GROW, + WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW, + WITNESS_RANK_EXTENTS, + WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS, + + WITNESS_RANK_HPA_GROW, + WITNESS_RANK_HPA, + WITNESS_RANK_EDATA_CACHE, WITNESS_RANK_EMAP, diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 2dcc994a..46e497ac 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -58,6 +58,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index 81f39345..f46a92fa 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -58,6 +58,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index fd814c32..dbf6f95a 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -58,6 +58,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index 81f39345..f46a92fa 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -58,6 +58,9 @@ Source Files + + Source Files + Source Files diff --git a/src/arena.c b/src/arena.c index f8e8cba2..74f90ccc 100644 --- a/src/arena.c +++ b/src/arena.c @@ -37,6 +37,7 @@ static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; emap_t arena_emap_global; +hpa_t arena_hpa_global; const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #define STEP(step, h, x, y) \ @@ -1360,6 +1361,8 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena, info = arena_background_thread_info_get(arena); malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); } + /* No using the HPA now that we have the custom hooks. */ + pa_shard_disable_hpa(&arena->pa_shard); extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks); if (have_background_thread) { malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); @@ -1516,6 +1519,19 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { nstime_init_update(&arena->create_time); + /* + * We turn on the HPA if set to. There are two exceptions: + * - Custom extent hooks (we should only return memory allocated from + * them in that case). + * - Arena 0 initialization. In this case, we're mid-bootstrapping, and + * so arena_hpa_global is not yet initialized. + */ + if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) { + if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global)) { + goto label_error; + } + } + /* We don't support reentrancy for arena 0 bootstrapping. */ if (ind != 0) { /* diff --git a/src/ctl.c b/src/ctl.c index db0e05f0..9e22e66c 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -90,6 +90,7 @@ CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) CTL_PROTO(opt_confirm_conf) +CTL_PROTO(opt_hpa) CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) @@ -343,6 +344,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, {NAME("confirm_conf"), CTL(opt_confirm_conf)}, + {NAME("hpa"), CTL(opt_hpa)}, {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, @@ -1816,6 +1818,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) +CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool) CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) diff --git a/src/hpa.c b/src/hpa.c new file mode 100644 index 00000000..842384bd --- /dev/null +++ b/src/hpa.c @@ -0,0 +1,447 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/hpa.h" + +#include "jemalloc/internal/flat_bitmap.h" +#include "jemalloc/internal/witness.h" + +static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, + size_t alignment, bool zero); +static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool zero); +static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size); +static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); + +bool +hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) { + bool err; + + /* + * We fundamentally rely on a address-space-hungry growth strategy for + * hugepages. This may change in the future, but for now we should have + * refused to turn on any HPA at a higher level of the stack. + */ + assert(LG_SIZEOF_PTR == 3); + + err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW, + malloc_mutex_rank_exclusive); + if (err) { + return true; + } + err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA, + malloc_mutex_rank_exclusive); + if (err) { + return true; + } + + hpa_central_init(&hpa->central, edata_cache, emap); + if (err) { + return true; + } + hpa->ind = base_ind_get(base); + hpa->edata_cache = edata_cache; + + geom_grow_init(&hpa->geom_grow); + + return false; +} + +bool +hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, + unsigned ind, size_t ps_goal, size_t ps_alloc_max) { + bool err; + err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow", + WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive); + if (err) { + return true; + } + err = malloc_mutex_init(&shard->mtx, "hpa_shard", + WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive); + if (err) { + return true; + } + + shard->edata_cache = edata_cache; + shard->hpa = hpa; + psset_init(&shard->psset); + shard->ps_goal = ps_goal; + shard->ps_alloc_max = ps_alloc_max; + + /* + * Fill these in last, so that if an hpa_shard gets used despite + * initialization failing, we'll at least crash instead of just + * operating on corrupted data. + */ + shard->pai.alloc = &hpa_alloc; + shard->pai.expand = &hpa_expand; + shard->pai.shrink = &hpa_shrink; + shard->pai.dalloc = &hpa_dalloc; + + shard->ind = ind; + assert(ind == base_ind_get(edata_cache->base)); + + return false; +} + +static edata_t * +hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min, + size_t size_goal) { + bool err; + edata_t *edata; + + hpa_t *hpa = shard->hpa; + + malloc_mutex_lock(tsdn, &hpa->mtx); + edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, + size_goal); + malloc_mutex_unlock(tsdn, &hpa->mtx); + if (edata != NULL) { + edata_arena_ind_set(edata, shard->ind); + return edata; + } + /* No existing range can satisfy the request; try to grow. */ + malloc_mutex_lock(tsdn, &hpa->grow_mtx); + + /* + * We could have raced with other grow attempts; re-check to see if we + * did, and are now able to satisfy the request. + */ + malloc_mutex_lock(tsdn, &hpa->mtx); + edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, + size_goal); + malloc_mutex_unlock(tsdn, &hpa->mtx); + if (edata != NULL) { + malloc_mutex_unlock(tsdn, &hpa->grow_mtx); + edata_arena_ind_set(edata, shard->ind); + return edata; + } + + /* + * No such luck. We've dropped mtx, so other allocations can proceed + * while we allocate the new extent. We know no one else will grow in + * the meantime, though, since we still hold grow_mtx. + */ + size_t alloc_size; + pszind_t skip; + + size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal); + + err = geom_grow_size_prepare(&hpa->geom_grow, hugepage_goal_min, + &alloc_size, &skip); + if (err) { + malloc_mutex_unlock(tsdn, &hpa->grow_mtx); + return NULL; + } + alloc_size = HUGEPAGE_CEILING(alloc_size); + + /* + * Eventually, we need to think about this more systematically, and in + * terms of extent hooks. For now, though, we know we only care about + * overcommitting systems, and we're not going to purge much. + */ + bool commit = true; + void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit); + if (addr == NULL) { + malloc_mutex_unlock(tsdn, &hpa->grow_mtx); + return NULL; + } + err = pages_huge(addr, alloc_size); + /* + * Ignore this for now; even if the allocation fails, the address space + * should still be usable. + */ + (void)err; + + edata = edata_cache_get(tsdn, hpa->edata_cache); + if (edata == NULL) { + malloc_mutex_unlock(tsdn, &hpa->grow_mtx); + pages_unmap(addr, alloc_size); + return NULL; + } + + /* + * The serial number here is just a placeholder; the hpa_central gets to + * decide how it wants to fill it in. + * + * The grow edata is associated with the hpa_central_t arena ind; the + * subsequent allocation we get (in the hpa_central_alloc_grow call + * below) will be filled in with the shard ind. + */ + edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false, + SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true, + /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true); + + malloc_mutex_lock(tsdn, &hpa->mtx); + /* Note that this replace edata with the allocation to return. */ + err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata); + malloc_mutex_unlock(tsdn, &hpa->mtx); + + if (!err) { + geom_grow_size_commit(&hpa->geom_grow, skip); + } + malloc_mutex_unlock(tsdn, &hpa->grow_mtx); + edata_arena_ind_set(edata, shard->ind); + + if (err) { + pages_unmap(addr, alloc_size); + edata_cache_put(tsdn, hpa->edata_cache, edata); + return NULL; + } + + return edata; +} + +static edata_t * +hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { + assert(size < shard->ps_alloc_max); + + bool err; + edata_t *edata = edata_cache_get(tsdn, shard->edata_cache); + if (edata == NULL) { + return NULL; + } + edata_arena_ind_set(edata, shard->ind); + + malloc_mutex_lock(tsdn, &shard->mtx); + err = psset_alloc_reuse(&shard->psset, edata, size); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (!err) { + return edata; + } + /* Nothing in the psset works; we have to grow it. */ + malloc_mutex_lock(tsdn, &shard->grow_mtx); + + /* As above; check for grow races. */ + malloc_mutex_lock(tsdn, &shard->mtx); + err = psset_alloc_reuse(&shard->psset, edata, size); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (!err) { + malloc_mutex_unlock(tsdn, &shard->grow_mtx); + return edata; + } + + edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size, + shard->ps_goal); + if (grow_edata == NULL) { + malloc_mutex_unlock(tsdn, &shard->grow_mtx); + edata_cache_put(tsdn, shard->edata_cache, edata); + return NULL; + } + edata_arena_ind_set(grow_edata, shard->ind); + edata_slab_set(grow_edata, true); + fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap; + fb_init(fb, shard->ps_goal / PAGE); + + /* We got the new edata; allocate from it. */ + malloc_mutex_lock(tsdn, &shard->mtx); + psset_alloc_new(&shard->psset, grow_edata, edata, size); + malloc_mutex_unlock(tsdn, &shard->mtx); + + malloc_mutex_unlock(tsdn, &shard->grow_mtx); + return edata; +} + +static hpa_shard_t * +hpa_from_pai(pai_t *self) { + assert(self->alloc = &hpa_alloc); + assert(self->expand = &hpa_expand); + assert(self->shrink = &hpa_shrink); + assert(self->dalloc = &hpa_dalloc); + return (hpa_shard_t *)self; +} + +static edata_t * +hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, + size_t alignment, bool zero) { + + assert((size & PAGE_MASK) == 0); + /* We don't handle alignment or zeroing for now. */ + if (alignment > PAGE || zero) { + return NULL; + } + + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + hpa_shard_t *shard = hpa_from_pai(self); + + edata_t *edata; + if (size <= shard->ps_alloc_max) { + edata = hpa_alloc_psset(tsdn, shard, size); + if (edata != NULL) { + emap_register_boundary(tsdn, shard->hpa->central.emap, + edata, SC_NSIZES, /* slab */ false); + } + } else { + edata = hpa_alloc_central(tsdn, shard, size, size); + } + + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + if (edata != NULL) { + emap_assert_mapped(tsdn, shard->hpa->central.emap, edata); + assert(edata_pai_get(edata) == EXTENT_PAI_HPA); + assert(edata_state_get(edata) == extent_state_active); + assert(edata_arena_ind_get(edata) == shard->ind); + assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES); + assert(!edata_slab_get(edata)); + assert(edata_committed_get(edata)); + assert(edata_base_get(edata) == edata_addr_get(edata)); + assert(edata_base_get(edata) != NULL); + } + return edata; +} + +static bool +hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool zero) { + /* Expand not yet supported. */ + return true; +} + +static bool +hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size) { + /* Shrink not yet supported. */ + return true; +} + +static void +hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) { + hpa_t *hpa = shard->hpa; + + edata_arena_ind_set(edata, hpa->ind); + malloc_mutex_lock(tsdn, &hpa->mtx); + hpa_central_dalloc(tsdn, &hpa->central, edata); + malloc_mutex_unlock(tsdn, &hpa->mtx); +} + +static void +hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { + hpa_shard_t *shard = hpa_from_pai(self); + + edata_addr_set(edata, edata_base_get(edata)); + edata_zeroed_set(edata, false); + + assert(edata_pai_get(edata) == EXTENT_PAI_HPA); + assert(edata_state_get(edata) == extent_state_active); + assert(edata_arena_ind_get(edata) == shard->ind); + assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES); + assert(!edata_slab_get(edata)); + assert(edata_committed_get(edata)); + assert(edata_base_get(edata) != NULL); + + /* + * There are two cases: + * - The psset field is NULL. In this case, the edata comes directly + * from the hpa_central_t and should be returned to it. + * - THe psset field is not NULL, in which case we return the edata to + * the appropriate slab (which may in turn cause it to become empty, + * triggering an eviction of the whole slab, which should then be + * returned to the hpa_central_t). + */ + if (edata_ps_get(edata) != NULL) { + emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata); + + malloc_mutex_lock(tsdn, &shard->mtx); + edata_t *evicted_ps = psset_dalloc(&shard->psset, edata); + malloc_mutex_unlock(tsdn, &shard->mtx); + + edata_cache_put(tsdn, shard->edata_cache, edata); + + if (evicted_ps != NULL) { + /* + * The deallocation caused a pageslab to become empty. + * Free it back to the centralized allocator. + */ + bool err = emap_register_boundary(tsdn, + shard->hpa->central.emap, evicted_ps, SC_NSIZES, + /* slab */ false); + /* + * Registration can only fail on OOM, but the boundary + * mappings should have been initialized during + * allocation. + */ + assert(!err); + edata_slab_set(evicted_ps, false); + edata_ps_set(evicted_ps, NULL); + + assert(edata_arena_ind_get(evicted_ps) == shard->ind); + hpa_dalloc_central(tsdn, shard, evicted_ps); + } + } else { + hpa_dalloc_central(tsdn, shard, edata); + } +} + +static void +hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { + assert(bin_stats->npageslabs == 0); + assert(bin_stats->nactive == 0); + assert(bin_stats->ninactive == 0); +} + +void +hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { + /* + * By the time we're here, the arena code should have dalloc'd all the + * active extents, which means we should have eventually evicted + * everything from the psset, so it shouldn't be able to serve even a + * 1-page allocation. + */ + if (config_debug) { + edata_t edata = {0}; + malloc_mutex_lock(tsdn, &shard->mtx); + bool psset_empty = psset_alloc_reuse(&shard->psset, &edata, + PAGE); + malloc_mutex_unlock(tsdn, &shard->mtx); + assert(psset_empty); + hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats); + for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { + hpa_shard_assert_stats_empty( + &shard->psset.slab_stats[i]); + } + } +} + +void +hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_prefork(tsdn, &shard->grow_mtx); +} + +void +hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_prefork(tsdn, &shard->mtx); +} + +void +hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx); + malloc_mutex_postfork_parent(tsdn, &shard->mtx); +} + +void +hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) { + malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); + malloc_mutex_postfork_child(tsdn, &shard->mtx); +} + +void +hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) { + malloc_mutex_prefork(tsdn, &hpa->grow_mtx); + malloc_mutex_prefork(tsdn, &hpa->mtx); +} + +void +hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) { + malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx); + malloc_mutex_postfork_parent(tsdn, &hpa->mtx); +} + +void +hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) { + malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx); + malloc_mutex_postfork_child(tsdn, &hpa->mtx); +} diff --git a/src/hpa_central.c b/src/hpa_central.c index d1065951..a1895c87 100644 --- a/src/hpa_central.c +++ b/src/hpa_central.c @@ -79,6 +79,7 @@ hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central, eset_insert(¢ral->eset, edata); return NULL; } + emap_assert_mapped(tsdn, central->emap, trail); eset_insert(¢ral->eset, trail); label_success: @@ -178,6 +179,7 @@ hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a, void hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) { assert(edata_state_get(edata) == extent_state_active); + assert(edata_ps_get(edata) == NULL); /* * These should really be called at the pa interface level, but diff --git a/src/jemalloc.c b/src/jemalloc.c index 170b1723..0dc685b2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -133,6 +133,10 @@ unsigned ncpus; /* Protects arenas initialization. */ malloc_mutex_t arenas_lock; + +/* The global hpa, and whether it's on. */ +bool opt_hpa = false; + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -1476,6 +1480,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], opt_max_background_threads, CONF_CHECK_MIN, CONF_CHECK_MAX, true); + CONF_HANDLE_BOOL(opt_hpa, "hpa") if (CONF_MATCH("slab_sizes")) { if (CONF_MATCH_VALUE("default")) { sc_data_init(sc_data); @@ -1760,6 +1765,33 @@ malloc_init_hard_a0_locked() { return true; } a0 = arena_get(TSDN_NULL, 0, false); + + if (opt_hpa && LG_SIZEOF_PTR == 2) { + if (opt_abort_conf) { + malloc_printf(": Hugepages not currently " + "supported on 32-bit architectures; aborting."); + } else { + malloc_printf(": Hugepages not currently " + "supported on 32-bit architectures; disabling."); + opt_hpa = false; + } + } else if (opt_hpa) { + /* + * The global HPA uses the edata cache from a0, and so needs to + * be initialized specially, after a0 is. The arena init code + * handles this case specially, and does not turn on the HPA for + * a0 when opt_hpa is true. This lets us do global HPA + * initialization against a valid a0. + */ + if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global, + &a0->pa_shard.edata_cache)) { + return true; + } + if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global)) { + return true; + } + } + malloc_init_state = malloc_init_a0_initialized; return false; @@ -4206,6 +4238,10 @@ _malloc_prefork(void) } } } + if (i == 3 && opt_hpa) { + hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global); + } + } prof_prefork1(tsd_tsdn(tsd)); stats_prefork(tsd_tsdn(tsd)); @@ -4244,6 +4280,9 @@ _malloc_postfork(void) arena_postfork_parent(tsd_tsdn(tsd), arena); } } + if (opt_hpa) { + hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global); + } prof_postfork_parent(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_parent(tsd_tsdn(tsd)); @@ -4274,6 +4313,9 @@ jemalloc_postfork_child(void) { arena_postfork_child(tsd_tsdn(tsd), arena); } } + if (opt_hpa) { + hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global); + } prof_postfork_child(tsd_tsdn(tsd)); if (have_background_thread) { background_thread_postfork_child(tsd_tsdn(tsd)); diff --git a/src/pa.c b/src/pa.c index f068fd96..672db7b0 100644 --- a/src/pa.c +++ b/src/pa.c @@ -1,6 +1,8 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/hpa.h" + static void pa_nactive_add(pa_shard_t *shard, size_t add_pages) { atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED); @@ -21,12 +23,18 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base, if (edata_cache_init(&shard->edata_cache, base)) { return true; } + if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache, cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats, stats_mtx)) { return true; } + shard->ind = ind; + + shard->ever_used_hpa = false; + atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED); + atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED); shard->stats_mtx = stats_mtx; @@ -39,6 +47,29 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base, return false; } +bool +pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa) { + /* + * These are constants for now; eventually they'll probably be + * tuneable. + */ + size_t ps_goal = 512 * 1024; + size_t ps_alloc_max = 256 * 1024; + if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache, + shard->ind, ps_goal, ps_alloc_max)) { + return true; + } + shard->ever_used_hpa = true; + atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED); + + return false; +} + +void +pa_shard_disable_hpa(pa_shard_t *shard) { + atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED); +} + void pa_shard_reset(pa_shard_t *shard) { atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED); @@ -49,14 +80,30 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) { pac_destroy(tsdn, &shard->pac); } +static pai_t * +pa_get_pai(pa_shard_t *shard, edata_t *edata) { + return (edata_pai_get(edata) == EXTENT_PAI_PAC + ? &shard->pac.pai : &shard->hpa_shard.pai); +} + edata_t * pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment, bool slab, szind_t szind, bool zero) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - edata_t *edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, - zero); + edata_t *edata = NULL; + if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) { + edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment, + zero); + } + /* + * Fall back to the PAC if the HPA is off or couldn't serve the given + * allocation request. + */ + if (edata == NULL) { + edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero); + } if (edata != NULL) { pa_nactive_add(shard, size >> LG_PAGE); @@ -67,6 +114,9 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment, emap_register_interior(tsdn, shard->emap, edata, szind); } } + if (edata != NULL) { + assert(edata_arena_ind_get(edata) == shard->ind); + } return edata; } @@ -79,8 +129,9 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, size_t expand_amount = new_size - old_size; - bool error = pai_expand(tsdn, &shard->pac.pai, edata, old_size, - new_size, zero); + pai_t *pai = pa_get_pai(shard, edata); + + bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero); if (error) { return true; } @@ -100,13 +151,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, size_t shrink_amount = old_size - new_size; *generated_dirty = false; - bool error = pai_shrink(tsdn, &shard->pac.pai, edata, old_size, - new_size); + pai_t *pai = pa_get_pai(shard, edata); + bool error = pai_shrink(tsdn, pai, edata, old_size, new_size); if (error) { return true; } pa_nactive_sub(shard, shrink_amount >> LG_PAGE); - *generated_dirty = true; + *generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC); edata_szind_set(edata, szind); emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false); @@ -123,8 +174,9 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, } edata_szind_set(edata, SC_NSIZES); pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE); - pai_dalloc(tsdn, &shard->pac.pai, edata); - *generated_dirty = true; + pai_t *pai = pa_get_pai(shard, edata); + pai_dalloc(tsdn, pai, edata); + *generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC); } bool diff --git a/src/pa_extra.c b/src/pa_extra.c index 8bf54b96..402603ea 100644 --- a/src/pa_extra.c +++ b/src/pa_extra.c @@ -17,6 +17,9 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) { void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) { malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx); + if (shard->ever_used_hpa) { + hpa_shard_prefork2(tsdn, &shard->hpa_shard); + } } void @@ -24,6 +27,9 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) { ecache_prefork(tsdn, &shard->pac.ecache_dirty); ecache_prefork(tsdn, &shard->pac.ecache_muzzy); ecache_prefork(tsdn, &shard->pac.ecache_retained); + if (shard->ever_used_hpa) { + hpa_shard_prefork3(tsdn, &shard->hpa_shard); + } } void @@ -40,6 +46,9 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) { malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx); + if (shard->ever_used_hpa) { + hpa_shard_postfork_parent(tsdn, &shard->hpa_shard); + } } void @@ -51,6 +60,9 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) { malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx); malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx); + if (shard->ever_used_hpa) { + hpa_shard_postfork_child(tsdn, &shard->hpa_shard); + } } void diff --git a/src/stats.c b/src/stats.c index 407b60cc..b2ec57b7 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1095,6 +1095,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_UNSIGNED("narenas") OPT_WRITE_CHAR_P("percpu_arena") OPT_WRITE_SIZE_T("oversize_threshold") + OPT_WRITE_BOOL("hpa") OPT_WRITE_CHAR_P("metadata_thp") OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c index 86f7057d..555f71ae 100644 --- a/test/unit/arena_decay.c +++ b/test/unit/arena_decay.c @@ -185,6 +185,7 @@ generate_dirty(unsigned arena_ind, size_t size) { TEST_BEGIN(test_decay_ticks) { test_skip_if(check_background_thread_enabled()); + test_skip_if(opt_hpa); ticker_t *decay_ticker; unsigned tick0, tick1, arena_ind; @@ -424,6 +425,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt, TEST_BEGIN(test_decay_ticker) { test_skip_if(check_background_thread_enabled()); + test_skip_if(opt_hpa); #define NPS 2048 ssize_t ddt = opt_dirty_decay_ms; ssize_t mdt = opt_muzzy_decay_ms; @@ -485,6 +487,7 @@ TEST_END TEST_BEGIN(test_decay_nonmonotonic) { test_skip_if(check_background_thread_enabled()); + test_skip_if(opt_hpa); #define NPS (SMOOTHSTEP_NSTEPS + 1) int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); void *ps[NPS]; @@ -542,6 +545,7 @@ TEST_END TEST_BEGIN(test_decay_now) { test_skip_if(check_background_thread_enabled()); + test_skip_if(opt_hpa); unsigned arena_ind = do_arena_create(0, 0); expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages"); @@ -562,6 +566,7 @@ TEST_END TEST_BEGIN(test_decay_never) { test_skip_if(check_background_thread_enabled() || !config_stats); + test_skip_if(opt_hpa); unsigned arena_ind = do_arena_create(-1, -1); int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; diff --git a/test/unit/hpa.c b/test/unit/hpa.c new file mode 100644 index 00000000..8b319b9e --- /dev/null +++ b/test/unit/hpa.c @@ -0,0 +1,235 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/hpa.h" + +#define HPA_IND 111 +#define SHARD_IND 222 + +#define PS_GOAL (128 * PAGE) +#define PS_ALLOC_MAX (64 * PAGE) + +typedef struct test_data_s test_data_t; +struct test_data_s { + /* + * Must be the first member -- we convert back and forth between the + * test_data_t and the hpa_shard_t; + */ + hpa_shard_t shard; + base_t *shard_base; + edata_cache_t shard_edata_cache; + + hpa_t hpa; + base_t *hpa_base; + edata_cache_t hpa_edata_cache; + + emap_t emap; +}; + +static hpa_shard_t * +create_test_data() { + bool err; + base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND, + &ehooks_default_extent_hooks); + assert_ptr_not_null(shard_base, ""); + + base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND, + &ehooks_default_extent_hooks); + assert_ptr_not_null(hpa_base, ""); + + test_data_t *test_data = malloc(sizeof(test_data_t)); + assert_ptr_not_null(test_data, ""); + + test_data->shard_base = shard_base; + test_data->hpa_base = hpa_base; + + err = edata_cache_init(&test_data->shard_edata_cache, shard_base); + assert_false(err, ""); + + err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base); + assert_false(err, ""); + + err = emap_init(&test_data->emap, test_data->hpa_base, + /* zeroed */ false); + assert_false(err, ""); + + err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap, + &test_data->hpa_edata_cache); + assert_false(err, ""); + + err = hpa_shard_init(&test_data->shard, &test_data->hpa, + &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX); + assert_false(err, ""); + + return (hpa_shard_t *)test_data; +} + +static void +destroy_test_data(hpa_shard_t *shard) { + test_data_t *test_data = (test_data_t *)shard; + base_delete(TSDN_NULL, test_data->shard_base); + base_delete(TSDN_NULL, test_data->hpa_base); + free(test_data); +} + +typedef struct mem_contents_s mem_contents_t; +struct mem_contents_s { + uintptr_t my_addr; + size_t size; + edata_t *my_edata; + rb_node(mem_contents_t) link; +}; + +static int +mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) { + return (a->my_addr > b->my_addr) - (a->my_addr < b->my_addr); +} + +typedef rb_tree(mem_contents_t) mem_tree_t; +rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link, + mem_contents_cmp); + +static void +node_assert_ordered(mem_contents_t *a, mem_contents_t *b) { + assert_zu_lt(a->my_addr, a->my_addr + a->size, "Overflow"); + assert_zu_le(a->my_addr + a->size, b->my_addr, ""); +} + +static void +node_check(mem_tree_t *tree, mem_contents_t *contents) { + edata_t *edata = contents->my_edata; + assert_ptr_eq(contents, (void *)contents->my_addr, ""); + assert_ptr_eq(contents, edata_base_get(edata), ""); + assert_zu_eq(contents->size, edata_size_get(edata), ""); + assert_ptr_eq(contents->my_edata, edata, ""); + + mem_contents_t *next = mem_tree_next(tree, contents); + if (next != NULL) { + node_assert_ordered(contents, next); + } + mem_contents_t *prev = mem_tree_prev(tree, contents); + if (prev != NULL) { + node_assert_ordered(prev, contents); + } +} + +static void +node_insert(mem_tree_t *tree, edata_t *edata, size_t npages) { + mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata); + contents->my_addr = (uintptr_t)edata_base_get(edata); + contents->size = edata_size_get(edata); + contents->my_edata = edata; + mem_tree_insert(tree, contents); + node_check(tree, contents); +} + +static void +node_remove(mem_tree_t *tree, edata_t *edata) { + mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata); + node_check(tree, contents); + mem_tree_remove(tree, contents); +} + +TEST_BEGIN(test_stress) { + test_skip_if(LG_SIZEOF_PTR != 3); + + hpa_shard_t *shard = create_test_data(); + + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + + const size_t nlive_edatas_max = 500; + size_t nlive_edatas = 0; + edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *)); + /* + * Nothing special about this constant; we're only fixing it for + * consistency across runs. + */ + size_t prng_state = (size_t)0x76999ffb014df07c; + + mem_tree_t tree; + mem_tree_new(&tree); + + for (size_t i = 0; i < 100 * 1000; i++) { + size_t operation = prng_range_zu(&prng_state, 4); + if (operation < 2) { + /* Alloc */ + if (nlive_edatas == nlive_edatas_max) { + continue; + } + + size_t npages_min; + size_t npages_max; + /* + * We make sure to get an even balance of small and + * large allocations. + */ + if (operation == 0) { + npages_min = 1; + npages_max = SC_LARGE_MINCLASS / PAGE - 1; + } else { + npages_min = SC_LARGE_MINCLASS / PAGE; + npages_max = 5 * npages_min; + } + size_t npages = npages_min + prng_range_zu(&prng_state, + npages_max - npages_min); + edata_t *edata = pai_alloc(tsdn, &shard->pai, + npages * PAGE, PAGE, false); + assert_ptr_not_null(edata, + "Unexpected allocation failure"); + live_edatas[nlive_edatas] = edata; + nlive_edatas++; + node_insert(&tree, edata, npages); + } else { + /* Free. */ + if (nlive_edatas == 0) { + continue; + } + size_t victim = prng_range_zu(&prng_state, nlive_edatas); + edata_t *to_free = live_edatas[victim]; + live_edatas[victim] = live_edatas[nlive_edatas - 1]; + nlive_edatas--; + node_remove(&tree, to_free); + pai_dalloc(tsdn, &shard->pai, to_free); + } + } + + size_t ntreenodes = 0; + for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL; + contents = mem_tree_next(&tree, contents)) { + ntreenodes++; + node_check(&tree, contents); + } + expect_zu_eq(ntreenodes, nlive_edatas, ""); + + /* + * Test hpa_shard_destroy, which requires as a precondition that all its + * extents have been deallocated. + */ + for (size_t i = 0; i < nlive_edatas; i++) { + edata_t *to_free = live_edatas[i]; + node_remove(&tree, to_free); + pai_dalloc(tsdn, &shard->pai, to_free); + } + hpa_shard_destroy(tsdn, shard); + + free(live_edatas); + destroy_test_data(shard); +} +TEST_END + +int +main(void) { + /* + * These trigger unused-function warnings on CI runs, even if declared + * with static inline. + */ + (void)mem_tree_empty; + (void)mem_tree_last; + (void)mem_tree_search; + (void)mem_tree_nsearch; + (void)mem_tree_psearch; + (void)mem_tree_iter; + (void)mem_tree_reverse_iter; + (void)mem_tree_destroy; + return test_no_reentrancy( + test_stress); +} diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index cf5c88e0..cda1a659 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -163,6 +163,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(const char *, metadata_thp, always); TEST_MALLCTL_OPT(bool, retain, always); TEST_MALLCTL_OPT(const char *, dss, always); + TEST_MALLCTL_OPT(bool, hpa, always); TEST_MALLCTL_OPT(unsigned, narenas, always); TEST_MALLCTL_OPT(const char *, percpu_arena, always); TEST_MALLCTL_OPT(size_t, oversize_threshold, always); diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c index 9a47a19a..46e45036 100644 --- a/test/unit/prof_gdump.c +++ b/test/unit/prof_gdump.c @@ -17,6 +17,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) { } TEST_BEGIN(test_gdump) { + test_skip_if(opt_hpa); bool active, gdump, gdump_old; void *p, *q, *r, *s; size_t sz; diff --git a/test/unit/retained.c b/test/unit/retained.c index 81396170..80ee8cdf 100644 --- a/test/unit/retained.c +++ b/test/unit/retained.c @@ -99,6 +99,7 @@ thd_start(void *arg) { TEST_BEGIN(test_retained) { test_skip_if(!config_stats); + test_skip_if(opt_hpa); arena_ind = do_arena_create(NULL); sz = nallocx(HUGEPAGE, 0); diff --git a/test/unit/stats.c b/test/unit/stats.c index 21a29a6f..6b6594d2 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -119,7 +119,7 @@ TEST_BEGIN(test_stats_arenas_summary) { "Unexepected mallctl() result"); if (config_stats) { - if (!background_thread_enabled()) { + if (!background_thread_enabled() && !opt_hpa) { expect_u64_gt(dirty_npurge + muzzy_npurge, 0, "At least one purge should have occurred"); }