HPA: Manage whole hugepages at a time.

This redesigns the HPA implementation to allow us to manage hugepages all at
once, locally, without relying on a global fallback.
This commit is contained in:
David Goldblatt 2020-11-09 13:49:30 -08:00 committed by David Goldblatt
parent 63677dde63
commit 43af63fff4
16 changed files with 700 additions and 550 deletions

View File

@ -16,7 +16,6 @@ extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock; extern malloc_mutex_t arenas_lock;
extern emap_t arena_emap_global; extern emap_t arena_emap_global;
extern hpa_t arena_hpa_global;
extern size_t opt_oversize_threshold; extern size_t opt_oversize_threshold;
extern size_t oversize_threshold; extern size_t oversize_threshold;

View File

@ -208,9 +208,9 @@ struct edata_s {
*/ */
/* /*
* If this edata is from an HPA, it may be part of some larger * If this edata is a user allocation from an HPA, it comes out
* pageslab. Track it if so. Otherwise (either because it's * of some pageslab (we don't yet support huegpage allocations
* not part of a pageslab, or not from the HPA at all), NULL. * that don't fit into pageslabs). This tracks it.
*/ */
edata_t *ps; edata_t *ps;
/* /*
@ -225,6 +225,8 @@ struct edata_s {
* between heaps. * between heaps.
*/ */
uint32_t longest_free_range; uint32_t longest_free_range;
/* Whether or not the slab is backed by a hugepage. */
bool hugeified;
}; };
}; };
@ -328,6 +330,11 @@ edata_pai_get(const edata_t *edata) {
EDATA_BITS_PAI_SHIFT); EDATA_BITS_PAI_SHIFT);
} }
static inline bool
edata_hugeified_get(const edata_t *edata) {
return edata->hugeified;
}
static inline bool static inline bool
edata_slab_get(const edata_t *edata) { edata_slab_get(const edata_t *edata) {
return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >> return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
@ -559,6 +566,11 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
((uint64_t)pai << EDATA_BITS_PAI_SHIFT); ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
} }
static inline void
edata_hugeified_set(edata_t *edata, bool hugeified) {
edata->hugeified = hugeified;
}
static inline void static inline void
edata_slab_set(edata_t *edata, bool slab) { edata_slab_set(edata_t *edata, bool slab) {
edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) | edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |

View File

@ -6,32 +6,6 @@
#include "jemalloc/internal/pai.h" #include "jemalloc/internal/pai.h"
#include "jemalloc/internal/psset.h" #include "jemalloc/internal/psset.h"
typedef struct hpa_s hpa_t;
struct hpa_s {
/*
* We have two mutexes for the central allocator; mtx protects its
* state, while grow_mtx protects controls the ability to grow the
* backing store. This prevents race conditions in which the central
* allocator has exhausted its memory while mutiple threads are trying
* to allocate. If they all reserved more address space from the OS
* without synchronization, we'd end consuming much more than necessary.
*/
malloc_mutex_t grow_mtx;
malloc_mutex_t mtx;
hpa_central_t central;
/* The arena ind we're associated with. */
unsigned ind;
/*
* This edata cache is the global one that we use for new allocations in
* growing; practically, it comes from a0.
*
* We don't use an edata_cache_small in front of this, since we expect a
* small finite number of allocations from it.
*/
edata_cache_t *edata_cache;
exp_grow_t exp_grow;
};
/* Used only by CTL; not actually stored here (i.e., all derived). */ /* Used only by CTL; not actually stored here (i.e., all derived). */
typedef struct hpa_shard_stats_s hpa_shard_stats_t; typedef struct hpa_shard_stats_s hpa_shard_stats_t;
struct hpa_shard_stats_s { struct hpa_shard_stats_s {
@ -53,44 +27,53 @@ struct hpa_shard_s {
* allocator, and so will use its edata_cache. * allocator, and so will use its edata_cache.
*/ */
edata_cache_small_t ecs; edata_cache_small_t ecs;
hpa_t *hpa;
psset_t psset; psset_t psset;
/* /*
* When we're grabbing a new ps from the central allocator, how big * The largest size we'll allocate out of the shard. For those
* would we like it to be? This is mostly about the level of batching * allocations refused, the caller (in practice, the PA module) will
* we use in our requests to the centralized allocator. * fall back to the more general (for now) PAC, which can always handle
* any allocation request.
*/ */
size_t ps_goal; size_t alloc_max;
/* /*
* What's the maximum size we'll try to allocate out of the psset? We * Slabs currently purged away. They are hugepage-sized and
* don't want this to be too large relative to ps_goal, as a * hugepage-aligned, but have had pages_nohuge and pages_purge_forced
* fragmentation avoidance measure. * called on them.
*
* Guarded by grow_mtx.
*/ */
size_t ps_alloc_max; edata_list_inactive_t unused_slabs;
/* /*
* What's the maximum size we'll try to allocate out of the shard at * Either NULL (if empty), or some integer multiple of a
* all? * hugepage-aligned number of hugepages. We carve them off one at a
* time to satisfy new pageslab requests.
*
* Guarded by grow_mtx.
*/ */
size_t small_max; edata_t *eden;
/*
* What's the minimum size for which we'll go straight to the global
* arena?
*/
size_t large_min;
/* The arena ind we're associated with. */ /* The arena ind we're associated with. */
unsigned ind; unsigned ind;
emap_t *emap;
}; };
bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, /*
edata_cache_t *edata_cache); * Whether or not the HPA can be used given the current configuration. This is
bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, * is not necessarily a guarantee that it backs its allocations by hugepages,
edata_cache_t *edata_cache, unsigned ind, size_t ps_goal, * just that it can function properly given the system it's running on.
size_t ps_alloc_max, size_t small_max, size_t large_min); */
bool hpa_supported();
bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
hpa_shard_stats_t *dst);
void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
/* /*
* Notify the shard that we won't use it for allocations much longer. Due to * Notify the shard that we won't use it for allocations much longer. Due to
* the possibility of races, we don't actually prevent allocations; just flush * the possibility of races, we don't actually prevent allocations; just flush
@ -108,14 +91,4 @@ void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard); void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
/*
* These should be acquired after all the shard locks in phase 4, but before any
* locks in phase 4. The central HPA may acquire an edata cache mutex (of a0),
* so it needs to be lower in the witness ordering, but it's also logically
* global and not tied to any particular arena.
*/
void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
#endif /* JEMALLOC_INTERNAL_HPA_H */ #endif /* JEMALLOC_INTERNAL_HPA_H */

View File

@ -11,9 +11,7 @@
OP(ctl) \ OP(ctl) \
OP(prof) \ OP(prof) \
OP(prof_thds_data) \ OP(prof_thds_data) \
OP(prof_dump) \ OP(prof_dump)
OP(hpa_central) \
OP(hpa_central_grow)
typedef enum { typedef enum {
#define OP(mtx) global_prof_mutex_##mtx, #define OP(mtx) global_prof_mutex_##mtx,

View File

@ -130,9 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
* This isn't exposed to users; we allow late enablement of the HPA shard so * This isn't exposed to users; we allow late enablement of the HPA shard so
* that we can boot without worrying about the HPA, then turn it on in a0. * that we can boot without worrying about the HPA, then turn it on in a0.
*/ */
bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal, bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards, size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
size_t sec_alloc_max, size_t sec_bytes_max);
/* /*
* We stop using the HPA when custom extent hooks are installed, but still * We stop using the HPA when custom extent hooks are installed, but still
* redirect deallocations to it. * redirect deallocations to it.

View File

@ -24,11 +24,14 @@
typedef struct psset_bin_stats_s psset_bin_stats_t; typedef struct psset_bin_stats_s psset_bin_stats_t;
struct psset_bin_stats_s { struct psset_bin_stats_s {
/* How many pageslabs are in this bin? */ /* How many pageslabs are in this bin? */
size_t npageslabs; size_t npageslabs_huge;
size_t npageslabs_nonhuge;
/* Of them, how many pages are active? */ /* Of them, how many pages are active? */
size_t nactive; size_t nactive_huge;
size_t nactive_nonhuge;
/* How many are inactive? */ /* How many are inactive? */
size_t ninactive; size_t ninactive_huge;
size_t ninactive_nonhuge;
}; };
/* Used only by CTL; not actually stored here (i.e., all derived). */ /* Used only by CTL; not actually stored here (i.e., all derived). */
@ -62,6 +65,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
void psset_insert(psset_t *psset, edata_t *ps); void psset_insert(psset_t *psset, edata_t *ps);
void psset_remove(psset_t *psset, edata_t *ps); void psset_remove(psset_t *psset, edata_t *ps);
void psset_hugify(psset_t *psset, edata_t *ps);
/* /*
* Tries to obtain a chunk from an existing pageslab already in the set. * Tries to obtain a chunk from an existing pageslab already in the set.
* Returns true on failure. * Returns true on failure.

View File

@ -37,7 +37,6 @@ static atomic_zd_t dirty_decay_ms_default;
static atomic_zd_t muzzy_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default;
emap_t arena_emap_global; emap_t arena_emap_global;
hpa_t arena_hpa_global;
const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
#define STEP(step, h, x, y) \ #define STEP(step, h, x, y) \
@ -1535,9 +1534,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
* so arena_hpa_global is not yet initialized. * so arena_hpa_global is not yet initialized.
*/ */
if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) { if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global, if (pa_shard_enable_hpa(&arena->pa_shard,
opt_hpa_slab_goal, opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, opt_hpa_sec_nshards,
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) { opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
goto label_error; goto label_error;
} }

115
src/ctl.c
View File

@ -220,13 +220,19 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes) CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
CTL_PROTO(stats_arenas_i_extents_j_retained_bytes) CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
INDEX_PROTO(stats_arenas_i_extents_j) INDEX_PROTO(stats_arenas_i_extents_j)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs) CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive) CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive) CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)
CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)
INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j) INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive)
CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive)
CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_uptime) CTL_PROTO(stats_arenas_i_uptime)
CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_dss)
@ -606,21 +612,33 @@ MUTEX_PROF_ARENA_MUTEXES
}; };
static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = { static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
{NAME("npageslabs"), {NAME("npageslabs_huge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)}, CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
{NAME("nactive"), {NAME("nactive_huge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)}, CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
{NAME("ninactive"), {NAME("ninactive_huge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)} CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)},
{NAME("npageslabs_nonhuge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
{NAME("nactive_nonhuge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
{NAME("ninactive_nonhuge"),
CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)},
}; };
static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = { static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
{NAME("npageslabs"), {NAME("npageslabs_huge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)}, CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
{NAME("nactive"), {NAME("nactive_huge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)}, CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
{NAME("ninactive"), {NAME("ninactive_huge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)} CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)},
{NAME("npageslabs_nonhuge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
{NAME("nactive_nonhuge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
{NAME("ninactive_nonhuge"),
CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)}
}; };
static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = { static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@ -1104,7 +1122,7 @@ MUTEX_PROF_ARENA_MUTEXES
} }
/* Merge HPA stats. */ /* Merge HPA stats. */
hpa_stats_accum(&sdstats->hpastats, &astats->hpastats); hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
sec_stats_accum(&sdstats->secstats, &astats->secstats); sec_stats_accum(&sdstats->secstats, &astats->secstats);
} }
} }
@ -1219,14 +1237,6 @@ ctl_refresh(tsdn_t *tsdn) {
READ_GLOBAL_MUTEX_PROF_DATA( READ_GLOBAL_MUTEX_PROF_DATA(
global_prof_mutex_prof_dump, prof_dump_mtx); global_prof_mutex_prof_dump, prof_dump_mtx);
} }
if (opt_hpa) {
READ_GLOBAL_MUTEX_PROF_DATA(
global_prof_mutex_hpa_central,
arena_hpa_global.mtx);
READ_GLOBAL_MUTEX_PROF_DATA(
global_prof_mutex_hpa_central_grow,
arena_hpa_global.grow_mtx);
}
if (have_background_thread) { if (have_background_thread) {
READ_GLOBAL_MUTEX_PROF_DATA( READ_GLOBAL_MUTEX_PROF_DATA(
global_prof_mutex_background_thread, global_prof_mutex_background_thread,
@ -3259,11 +3269,6 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
MUTEX_PROF_RESET(tdatas_mtx); MUTEX_PROF_RESET(tdatas_mtx);
MUTEX_PROF_RESET(prof_dump_mtx); MUTEX_PROF_RESET(prof_dump_mtx);
} }
if (opt_hpa) {
MUTEX_PROF_RESET(arena_hpa_global.mtx);
MUTEX_PROF_RESET(arena_hpa_global.grow_mtx);
}
/* Per arena mutexes. */ /* Per arena mutexes. */
unsigned n = narenas_total_get(); unsigned n = narenas_total_get();
@ -3367,22 +3372,44 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
return super_stats_arenas_i_extents_j_node; return super_stats_arenas_i_extents_j_node;
} }
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs, /* Full, huge */
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
size_t); size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t); arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t); arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs, /* Full, nonhuge */
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge,
size_t); size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive, arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t);
/* Nonfull, huge */
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge,
size_t); size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive, CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive, arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge,
size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge,
size_t);
/* Nonfull, nonhuge */
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge,
size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge,
size_t);
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge,
size_t); size_t);
static const ctl_named_node_t * static const ctl_named_node_t *

611
src/hpa.c
View File

@ -6,6 +6,8 @@
#include "jemalloc/internal/flat_bitmap.h" #include "jemalloc/internal/flat_bitmap.h"
#include "jemalloc/internal/witness.h" #include "jemalloc/internal/witness.h"
#define HPA_EDEN_SIZE (128 * HUGEPAGE)
static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero); size_t alignment, bool zero);
static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@ -15,43 +17,40 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
bool bool
hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) { hpa_supported() {
bool err; #ifdef _WIN32
/*
* At least until the API and implementation is somewhat settled, we
* don't want to try to debug the VM subsystem on the hardest-to-test
* platform.
*/
return false;
#endif
if (!pages_can_hugify) {
return false;
}
/* /*
* We fundamentally rely on a address-space-hungry growth strategy for * We fundamentally rely on a address-space-hungry growth strategy for
* hugepages. This may change in the future, but for now we should have * hugepages.
* refused to turn on any HPA at a higher level of the stack.
*/ */
assert(LG_SIZEOF_PTR == 3); if (LG_SIZEOF_PTR == 2) {
err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW,
malloc_mutex_rank_exclusive);
if (err) {
return true;
}
err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA,
malloc_mutex_rank_exclusive);
if (err) {
return true;
}
hpa_central_init(&hpa->central, edata_cache, emap);
if (err) {
return true;
}
hpa->ind = base_ind_get(base);
hpa->edata_cache = edata_cache;
exp_grow_init(&hpa->exp_grow);
return false; return false;
}
/*
* We use the edata bitmap; it needs to have at least as many bits as a
* hugepage has pages.
*/
if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) {
return false;
}
return true;
} }
bool bool
hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max, unsigned ind, size_t alloc_max) {
size_t large_min) { /* malloc_conf processing should have filtered out these cases. */
assert(hpa_supported());
bool err; bool err;
err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow", err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive); WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
@ -66,12 +65,12 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
assert(edata_cache != NULL); assert(edata_cache != NULL);
edata_cache_small_init(&shard->ecs, edata_cache); edata_cache_small_init(&shard->ecs, edata_cache);
shard->hpa = hpa;
psset_init(&shard->psset); psset_init(&shard->psset);
shard->ps_goal = ps_goal; shard->alloc_max = alloc_max;
shard->ps_alloc_max = ps_alloc_max; edata_list_inactive_init(&shard->unused_slabs);
shard->small_max = small_max; shard->eden = NULL;
shard->large_min = large_min; shard->ind = ind;
shard->emap = emap;
/* /*
* Fill these in last, so that if an hpa_shard gets used despite * Fill these in last, so that if an hpa_shard gets used despite
@ -83,9 +82,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
shard->pai.shrink = &hpa_shrink; shard->pai.shrink = &hpa_shrink;
shard->pai.dalloc = &hpa_dalloc; shard->pai.dalloc = &hpa_dalloc;
shard->ind = ind;
assert(ind == base_ind_get(edata_cache->base));
return false; return false;
} }
@ -96,176 +92,333 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
* locking here. * locking here.
*/ */
void void
hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) { hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
psset_stats_accum(&dst->psset_stats, &src->psset_stats); psset_stats_accum(&dst->psset_stats, &src->psset_stats);
} }
void void
hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) { hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
hpa_shard_stats_t *dst) {
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
psset_stats_accum(&dst->psset_stats, &shard->psset.stats); psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
} }
static edata_t * static bool
hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min, hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) {
size_t size_goal) {
bool err;
edata_t *edata;
hpa_t *hpa = shard->hpa;
malloc_mutex_lock(tsdn, &hpa->mtx);
edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
size_goal);
malloc_mutex_unlock(tsdn, &hpa->mtx);
if (edata != NULL) {
edata_arena_ind_set(edata, shard->ind);
return edata;
}
/* No existing range can satisfy the request; try to grow. */
malloc_mutex_lock(tsdn, &hpa->grow_mtx);
/* /*
* We could have raced with other grow attempts; re-check to see if we * For now, just use a static check; hugify a page if it's <= 5%
* did, and are now able to satisfy the request. * inactive. Eventually, this should be a malloc conf option.
*/ */
malloc_mutex_lock(tsdn, &hpa->mtx); return !edata_hugeified_get(ps)
edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
size_goal); }
malloc_mutex_unlock(tsdn, &hpa->mtx);
if (edata != NULL) {
malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
edata_arena_ind_set(edata, shard->ind);
return edata;
}
/* Returns true on error. */
static void
hpa_hugify(edata_t *ps) {
assert(edata_size_get(ps) == HUGEPAGE);
assert(edata_hugeified_get(ps));
bool err = pages_huge(edata_base_get(ps), HUGEPAGE);
/* /*
* No such luck. We've dropped mtx, so other allocations can proceed * Eat the error; even if the hugeification failed, it's still safe to
* while we allocate the new extent. We know no one else will grow in * pretend it didn't (and would require extraordinary measures to
* the meantime, though, since we still hold grow_mtx. * unhugify).
*/
size_t alloc_size;
pszind_t skip;
size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min,
&alloc_size, &skip);
if (err) {
malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
return NULL;
}
alloc_size = HUGEPAGE_CEILING(alloc_size);
/*
* Eventually, we need to think about this more systematically, and in
* terms of extent hooks. For now, though, we know we only care about
* overcommitting systems, and we're not going to purge much.
*/
bool commit = true;
void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit);
if (addr == NULL) {
malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
return NULL;
}
err = pages_huge(addr, alloc_size);
/*
* Ignore this for now; even if the allocation fails, the address space
* should still be usable.
*/ */
(void)err; (void)err;
}
edata = edata_cache_get(tsdn, hpa->edata_cache); static void
if (edata == NULL) { hpa_dehugify(edata_t *ps) {
malloc_mutex_unlock(tsdn, &hpa->grow_mtx); /* Purge, then dehugify while unbacked. */
pages_unmap(addr, alloc_size); pages_purge_forced(edata_addr_get(ps), HUGEPAGE);
return NULL; pages_nohuge(edata_addr_get(ps), HUGEPAGE);
edata_hugeified_set(ps, false);
}
static edata_t *
hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
edata_t *ps = NULL;
/* Is there address space waiting for reuse? */
malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
ps = edata_list_inactive_first(&shard->unused_slabs);
if (ps != NULL) {
edata_list_inactive_remove(&shard->unused_slabs, ps);
return ps;
}
/* Is eden a perfect fit? */
if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) {
ps = shard->eden;
shard->eden = NULL;
return ps;
} }
/* /*
* The serial number here is just a placeholder; the hpa_central gets to * We're about to try to allocate from eden by splitting. If eden is
* decide how it wants to fill it in. * NULL, we have to allocate it too. Otherwise, we just have to
* * allocate an edata_t for the new psset.
* The grow edata is associated with the hpa_central_t arena ind; the
* subsequent allocation we get (in the hpa_central_alloc_grow call
* below) will be filled in with the shard ind.
*/ */
edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false, if (shard->eden == NULL) {
SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true, /*
/* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true); * During development, we're primarily concerned with systems
* with overcommit. Eventually, we should be more careful here.
malloc_mutex_lock(tsdn, &hpa->mtx); */
/* Note that this replace edata with the allocation to return. */ bool commit = true;
err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata); /* Allocate address space, bailing if we fail. */
malloc_mutex_unlock(tsdn, &hpa->mtx); void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
&commit);
if (!err) { if (new_eden == NULL) {
exp_grow_size_commit(&hpa->exp_grow, skip); return NULL;
} }
malloc_mutex_unlock(tsdn, &hpa->grow_mtx); malloc_mutex_lock(tsdn, &shard->mtx);
edata_arena_ind_set(edata, shard->ind); /* Allocate ps edata, bailing if we fail. */
ps = edata_cache_small_get(tsdn, &shard->ecs);
if (ps == NULL) {
malloc_mutex_unlock(tsdn, &shard->mtx);
pages_unmap(new_eden, HPA_EDEN_SIZE);
return NULL;
}
/* Allocate eden edata, bailing if we fail. */
shard->eden = edata_cache_small_get(tsdn, &shard->ecs);
if (shard->eden == NULL) {
edata_cache_small_put(tsdn, &shard->ecs, ps);
malloc_mutex_unlock(tsdn, &shard->mtx);
pages_unmap(new_eden, HPA_EDEN_SIZE);
return NULL;
}
/* Success. */
malloc_mutex_unlock(tsdn, &shard->mtx);
/*
* Note that the values here don't really make sense (e.g. eden
* is actually zeroed). But we don't use the slab metadata in
* determining subsequent allocation metadata (e.g. zero
* tracking should be done at the per-page level, not at the
* level of the hugepage). It's just a convenient data
* structure that contains much of the helpers we need (defined
* lists, a bitmap, an address field, etc.). Eventually, we'll
* have a "real" representation of a hugepage that's unconnected
* to the edata_ts it will serve allocations into.
*/
edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE,
/* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty,
/* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
/* is_head */ true);
edata_hugeified_set(shard->eden, false);
} else {
/* Eden is already nonempty; only need an edata for ps. */
malloc_mutex_lock(tsdn, &shard->mtx);
ps = edata_cache_small_get(tsdn, &shard->ecs);
malloc_mutex_unlock(tsdn, &shard->mtx);
if (ps == NULL) {
return NULL;
}
}
/*
* We should have dropped mtx since we're not touching ecs any more, but
* we should continue to hold the grow mutex, since we're about to touch
* eden.
*/
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
assert(shard->eden != NULL);
assert(edata_size_get(shard->eden) > HUGEPAGE);
assert(edata_size_get(shard->eden) % HUGEPAGE == 0);
assert(edata_addr_get(shard->eden)
== HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden)));
malloc_mutex_lock(tsdn, &shard->mtx);
ps = edata_cache_small_get(tsdn, &shard->ecs);
malloc_mutex_unlock(tsdn, &shard->mtx);
if (ps == NULL) {
return NULL;
}
edata_init(ps, edata_arena_ind_get(shard->eden),
edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false,
/* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty,
/* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
/* is_head */ true);
edata_hugeified_set(ps, false);
edata_addr_set(shard->eden, edata_past_get(ps));
edata_size_set(shard->eden,
edata_size_get(shard->eden) - HUGEPAGE);
return ps;
}
/*
* The psset does not hold empty slabs. Upon becoming empty, then, we need to
* put them somewhere. We take this as an opportunity to purge, and retain
* their address space in a list outside the psset.
*/
static void
hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
/*
* We do relatively expensive system calls. The ps was evicted, so no
* one should touch it while we're also touching it.
*/
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
assert(edata_size_get(ps) == HUGEPAGE);
assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps));
/*
* We do this unconditionally, even for pages which were not originally
* hugeified; it has the same effect.
*/
hpa_dehugify(ps);
malloc_mutex_lock(tsdn, &shard->grow_mtx);
edata_list_inactive_prepend(&shard->unused_slabs, ps);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
}
static edata_t *
hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
bool err;
malloc_mutex_lock(tsdn, &shard->mtx);
edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
*oom = false;
if (edata == NULL) {
malloc_mutex_unlock(tsdn, &shard->mtx);
*oom = true;
return NULL;
}
assert(edata_arena_ind_get(edata) == shard->ind);
err = psset_alloc_reuse(&shard->psset, edata, size);
if (err) { if (err) {
pages_unmap(addr, alloc_size); edata_cache_small_put(tsdn, &shard->ecs, edata);
edata_cache_put(tsdn, hpa->edata_cache, edata); malloc_mutex_unlock(tsdn, &shard->mtx);
return NULL;
}
/*
* This could theoretically be moved outside of the critical section,
* but that introduces the potential for a race. Without the lock, the
* (initially nonempty, since this is the reuse pathway) pageslab we
* allocated out of could become otherwise empty while the lock is
* dropped. This would force us to deal with a pageslab eviction down
* the error pathway, which is a pain.
*/
err = emap_register_boundary(tsdn, shard->emap, edata,
SC_NSIZES, /* slab */ false);
if (err) {
edata_t *ps = psset_dalloc(&shard->psset, edata);
/*
* The pageslab was nonempty before we started; it
* should still be nonempty now, and so shouldn't get
* evicted.
*/
assert(ps == NULL);
edata_cache_small_put(tsdn, &shard->ecs, edata);
malloc_mutex_unlock(tsdn, &shard->mtx);
*oom = true;
return NULL; return NULL;
} }
edata_t *ps = edata_ps_get(edata);
assert(ps != NULL);
bool hugify = hpa_should_hugify(shard, ps);
if (hugify) {
/*
* Do the metadata modification while holding the lock; we'll
* actually change state with the lock dropped.
*/
psset_hugify(&shard->psset, ps);
}
malloc_mutex_unlock(tsdn, &shard->mtx);
if (hugify) {
/*
* Hugifying with the lock dropped is safe, even with
* concurrent modifications to the ps. This relies on
* the fact that the current implementation will never
* dehugify a non-empty pageslab, and ps will never
* become empty before we return edata to the user to be
* freed.
*
* Note that holding the lock would prevent not just operations
* on this page slab, but also operations any other alloc/dalloc
* operations in this hpa shard.
*/
hpa_hugify(ps);
}
return edata; return edata;
} }
static edata_t * static edata_t *
hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
assert(size <= shard->ps_alloc_max); assert(size <= shard->alloc_max);
bool err; bool err;
malloc_mutex_lock(tsdn, &shard->mtx); bool oom;
edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs); edata_t *edata;
if (edata == NULL) {
malloc_mutex_unlock(tsdn, &shard->mtx);
return NULL;
}
edata_arena_ind_set(edata, shard->ind);
err = psset_alloc_reuse(&shard->psset, edata, size); edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
malloc_mutex_unlock(tsdn, &shard->mtx); if (edata != NULL) {
if (!err) {
return edata; return edata;
} }
/* Nothing in the psset works; we have to grow it. */ /* Nothing in the psset works; we have to grow it. */
malloc_mutex_lock(tsdn, &shard->grow_mtx); malloc_mutex_lock(tsdn, &shard->grow_mtx);
/*
/* As above; check for grow races. */ * Check for grow races; maybe some earlier thread expanded the psset
malloc_mutex_lock(tsdn, &shard->mtx); * in between when we dropped the main mutex and grabbed the grow mutex.
err = psset_alloc_reuse(&shard->psset, edata, size); */
malloc_mutex_unlock(tsdn, &shard->mtx); edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
if (!err) { if (edata != NULL || oom) {
malloc_mutex_unlock(tsdn, &shard->grow_mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx);
return edata; return edata;
} }
edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size, /*
shard->ps_goal); * Note that we don't hold shard->mtx here (while growing);
* deallocations (and allocations of smaller sizes) may still succeed
* while we're doing this potentially expensive system call.
*/
edata_t *grow_edata = hpa_grow(tsdn, shard);
if (grow_edata == NULL) { if (grow_edata == NULL) {
malloc_mutex_unlock(tsdn, &shard->grow_mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx);
malloc_mutex_lock(tsdn, &shard->mtx);
edata_cache_small_put(tsdn, &shard->ecs, edata);
malloc_mutex_unlock(tsdn, &shard->mtx);
return NULL; return NULL;
} }
edata_arena_ind_set(grow_edata, shard->ind); assert(edata_arena_ind_get(grow_edata) == shard->ind);
edata_slab_set(grow_edata, true); edata_slab_set(grow_edata, true);
fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap; fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
fb_init(fb, shard->ps_goal / PAGE); fb_init(fb, HUGEPAGE / PAGE);
/* We got the new edata; allocate from it. */ /* We got the new edata; allocate from it. */
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
psset_alloc_new(&shard->psset, grow_edata, edata, size); edata = edata_cache_small_get(tsdn, &shard->ecs);
if (edata == NULL) {
malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
return NULL;
}
psset_alloc_new(&shard->psset, grow_edata, edata, size);
err = emap_register_boundary(tsdn, shard->emap, edata,
SC_NSIZES, /* slab */ false);
if (err) {
edata_t *ps = psset_dalloc(&shard->psset, edata);
/*
* The pageslab was empty except for the new allocation; it
* should get evicted.
*/
assert(ps == grow_edata);
edata_cache_small_put(tsdn, &shard->ecs, edata);
/*
* Technically the same as fallthrough at the time of this
* writing, but consistent with the error handling in the rest
* of the function.
*/
malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx);
hpa_handle_ps_eviction(tsdn, shard, ps);
return NULL;
}
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
malloc_mutex_unlock(tsdn, &shard->grow_mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx);
return edata; return edata;
} }
@ -283,33 +436,25 @@ static edata_t *
hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
size_t alignment, bool zero) { size_t alignment, bool zero) {
assert((size & PAGE_MASK) == 0); assert((size & PAGE_MASK) == 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
hpa_shard_t *shard = hpa_from_pai(self); hpa_shard_t *shard = hpa_from_pai(self);
/* We don't handle alignment or zeroing for now. */ /* We don't handle alignment or zeroing for now. */
if (alignment > PAGE || zero) { if (alignment > PAGE || zero) {
return NULL; return NULL;
} }
if (size > shard->small_max && size < shard->large_min) { if (size > shard->alloc_max) {
return NULL; return NULL;
} }
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), edata_t *edata = hpa_alloc_psset(tsdn, shard, size);
WITNESS_RANK_CORE, 0);
edata_t *edata;
if (size <= shard->ps_alloc_max) {
edata = hpa_alloc_psset(tsdn, shard, size);
if (edata != NULL) {
emap_register_boundary(tsdn, shard->hpa->central.emap,
edata, SC_NSIZES, /* slab */ false);
}
} else {
edata = hpa_alloc_central(tsdn, shard, size, size);
}
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0); WITNESS_RANK_CORE, 0);
if (edata != NULL) { if (edata != NULL) {
emap_assert_mapped(tsdn, shard->hpa->central.emap, edata); emap_assert_mapped(tsdn, shard->emap, edata);
assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
assert(edata_state_get(edata) == extent_state_active); assert(edata_state_get(edata) == extent_state_active);
assert(edata_arena_ind_get(edata) == shard->ind); assert(edata_arena_ind_get(edata) == shard->ind);
@ -336,16 +481,6 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
return true; return true;
} }
static void
hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
hpa_t *hpa = shard->hpa;
edata_arena_ind_set(edata, hpa->ind);
malloc_mutex_lock(tsdn, &hpa->mtx);
hpa_central_dalloc(tsdn, &hpa->central, edata);
malloc_mutex_unlock(tsdn, &hpa->mtx);
}
static void static void
hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
hpa_shard_t *shard = hpa_from_pai(self); hpa_shard_t *shard = hpa_from_pai(self);
@ -361,54 +496,27 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
assert(edata_committed_get(edata)); assert(edata_committed_get(edata));
assert(edata_base_get(edata) != NULL); assert(edata_base_get(edata) != NULL);
/* edata_t *ps = edata_ps_get(edata);
* There are two cases: /* Currently, all edatas come from pageslabs. */
* - The psset field is NULL. In this case, the edata comes directly assert(ps != NULL);
* from the hpa_central_t and should be returned to it. emap_deregister_boundary(tsdn, shard->emap, edata);
* - THe psset field is not NULL, in which case we return the edata to
* the appropriate slab (which may in turn cause it to become empty,
* triggering an eviction of the whole slab, which should then be
* returned to the hpa_central_t).
*/
if (edata_ps_get(edata) != NULL) {
emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata);
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
/*
* Note that the shard mutex protects the edata hugeified field, too.
* Page slabs can move between pssets (and have their hugeified status
* change) in racy ways.
*/
edata_t *evicted_ps = psset_dalloc(&shard->psset, edata); edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
/*
* If a pageslab became empty because of the dalloc, it better have been
* the one we expected.
*/
assert(evicted_ps == NULL || evicted_ps == ps);
edata_cache_small_put(tsdn, &shard->ecs, edata); edata_cache_small_put(tsdn, &shard->ecs, edata);
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
if (evicted_ps != NULL) { if (evicted_ps != NULL) {
/* hpa_handle_ps_eviction(tsdn, shard, evicted_ps);
* The deallocation caused a pageslab to become empty.
* Free it back to the centralized allocator.
*/
bool err = emap_register_boundary(tsdn,
shard->hpa->central.emap, evicted_ps, SC_NSIZES,
/* slab */ false);
/*
* Registration can only fail on OOM, but the boundary
* mappings should have been initialized during
* allocation.
*/
assert(!err);
edata_slab_set(evicted_ps, false);
edata_ps_set(evicted_ps, NULL);
assert(edata_arena_ind_get(evicted_ps) == shard->ind);
hpa_dalloc_central(tsdn, shard, evicted_ps);
} }
} else {
hpa_dalloc_central(tsdn, shard, edata);
}
}
static void
hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
assert(bin_stats->npageslabs == 0);
assert(bin_stats->nactive == 0);
assert(bin_stats->ninactive == 0);
} }
void void
@ -418,6 +526,29 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
} }
static void
hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
assert(bin_stats->npageslabs_huge == 0);
assert(bin_stats->nactive_huge == 0);
assert(bin_stats->ninactive_huge == 0);
assert(bin_stats->npageslabs_nonhuge == 0);
assert(bin_stats->nactive_nonhuge == 0);
assert(bin_stats->ninactive_nonhuge == 0);
}
static void
hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
edata_t edata = {0};
malloc_mutex_assert_owner(tsdn, &shard->mtx);
bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE);
assert(psset_empty);
hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
hpa_shard_assert_stats_empty(
&psset->stats.nonfull_slabs[i]);
}
}
void void
hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
/* /*
@ -427,17 +558,15 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
* 1-page allocation. * 1-page allocation.
*/ */
if (config_debug) { if (config_debug) {
edata_t edata = {0};
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
bool psset_empty = psset_alloc_reuse(&shard->psset, &edata, hpa_assert_empty(tsdn, shard, &shard->psset);
PAGE);
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
assert(psset_empty);
hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs);
for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
hpa_shard_assert_stats_empty(
&shard->psset.stats.nonfull_slabs[i]);
} }
edata_t *ps;
while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) {
assert(edata_size_get(ps) == HUGEPAGE);
edata_list_inactive_remove(&shard->unused_slabs, ps);
pages_unmap(edata_base_get(ps), HUGEPAGE);
} }
} }
@ -462,21 +591,3 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
malloc_mutex_postfork_child(tsdn, &shard->mtx); malloc_mutex_postfork_child(tsdn, &shard->mtx);
} }
void
hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
malloc_mutex_prefork(tsdn, &hpa->mtx);
}
void
hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) {
malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx);
malloc_mutex_postfork_parent(tsdn, &hpa->mtx);
}
void
hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) {
malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx);
malloc_mutex_postfork_child(tsdn, &hpa->mtx);
}

View File

@ -1802,31 +1802,19 @@ malloc_init_hard_a0_locked() {
} }
a0 = arena_get(TSDN_NULL, 0, false); a0 = arena_get(TSDN_NULL, 0, false);
if (opt_hpa && LG_SIZEOF_PTR == 2) { if (opt_hpa && !hpa_supported()) {
malloc_printf("<jemalloc>: HPA not supported in the current "
"configuration; %s.",
opt_abort_conf ? "aborting" : "disabling");
if (opt_abort_conf) { if (opt_abort_conf) {
malloc_printf("<jemalloc>: Hugepages not currently " malloc_abort_invalid_conf();
"supported on 32-bit architectures; aborting.");
} else { } else {
malloc_printf("<jemalloc>: Hugepages not currently "
"supported on 32-bit architectures; disabling.");
opt_hpa = false; opt_hpa = false;
} }
} else if (opt_hpa) { } else if (opt_hpa) {
/* if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc,
* The global HPA uses the edata cache from a0, and so needs to opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
* be initialized specially, after a0 is. The arena init code opt_hpa_sec_max_bytes)) {
* handles this case specially, and does not turn on the HPA for
* a0 when opt_hpa is true. This lets us do global HPA
* initialization against a valid a0.
*/
if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global,
&a0->pa_shard.edata_cache)) {
return true;
}
if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
return true; return true;
} }
} }
@ -4346,9 +4334,6 @@ _malloc_prefork(void)
} }
} }
} }
if (i == 4 && opt_hpa) {
hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
}
} }
prof_prefork1(tsd_tsdn(tsd)); prof_prefork1(tsd_tsdn(tsd));
@ -4388,9 +4373,6 @@ _malloc_postfork(void)
arena_postfork_parent(tsd_tsdn(tsd), arena); arena_postfork_parent(tsd_tsdn(tsd), arena);
} }
} }
if (opt_hpa) {
hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global);
}
prof_postfork_parent(tsd_tsdn(tsd)); prof_postfork_parent(tsd_tsdn(tsd));
if (have_background_thread) { if (have_background_thread) {
background_thread_postfork_parent(tsd_tsdn(tsd)); background_thread_postfork_parent(tsd_tsdn(tsd));
@ -4421,9 +4403,6 @@ jemalloc_postfork_child(void) {
arena_postfork_child(tsd_tsdn(tsd), arena); arena_postfork_child(tsd_tsdn(tsd), arena);
} }
} }
if (opt_hpa) {
hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global);
}
prof_postfork_child(tsd_tsdn(tsd)); prof_postfork_child(tsd_tsdn(tsd));
if (have_background_thread) { if (have_background_thread) {
background_thread_postfork_child(tsd_tsdn(tsd)); background_thread_postfork_child(tsd_tsdn(tsd));

View File

@ -49,17 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
} }
bool bool
pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal, pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_alloc_max, size_t sec_bytes_max) {
size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) { if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache,
ps_goal &= ~PAGE_MASK; shard->ind, alloc_max)) {
ps_alloc_max &= ~PAGE_MASK;
if (ps_alloc_max > ps_goal) {
ps_alloc_max = ps_goal;
}
if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
return true; return true;
} }
if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards, if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,

View File

@ -150,7 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
} }
if (shard->ever_used_hpa) { if (shard->ever_used_hpa) {
hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out); hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out); sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
} }
} }

View File

@ -20,9 +20,13 @@ psset_init(psset_t *psset) {
static void static void
psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) { psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
dst->npageslabs += src->npageslabs; dst->npageslabs_huge += src->npageslabs_huge;
dst->nactive += src->nactive; dst->nactive_huge += src->nactive_huge;
dst->ninactive += src->ninactive; dst->ninactive_huge += src->ninactive_huge;
dst->npageslabs_nonhuge += src->npageslabs_nonhuge;
dst->nactive_nonhuge += src->nactive_nonhuge;
dst->ninactive_nonhuge += src->ninactive_nonhuge;
} }
void void
@ -45,29 +49,62 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
* ensure we don't miss any heap modification operations. * ensure we don't miss any heap modification operations.
*/ */
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) { psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
size_t mul = inc ? (size_t)1 : (size_t)-1; bool insert) {
size_t *npageslabs_dst = edata_hugeified_get(ps)
? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
size_t *nactive_dst = edata_hugeified_get(ps)
? &binstats->nactive_huge : &binstats->nactive_nonhuge;
size_t *ninactive_dst = edata_hugeified_get(ps)
? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
size_t npages = edata_size_get(ps) >> LG_PAGE; size_t npages = edata_size_get(ps) >> LG_PAGE;
size_t ninactive = edata_nfree_get(ps); size_t ninactive = edata_nfree_get(ps);
size_t nactive = npages - ninactive; size_t nactive = npages - ninactive;
binstats->npageslabs += mul * 1;
binstats->nactive += mul * nactive; size_t mul = insert ? (size_t)1 : (size_t)-1;
binstats->ninactive += mul * ninactive; *npageslabs_dst += mul * 1;
*nactive_dst += mul * nactive;
*ninactive_dst += mul * ninactive;
}
static void
psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) {
psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
}
static void
psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) {
psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
}
/*
* We don't currently need an "activate" equivalent to this, since down the
* allocation pathways we don't do the optimization in which we change a slab
* without first removing it from a bin.
*/
static void
psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
size_t *nactive_dst = huge
? &binstats->nactive_huge : &binstats->nactive_nonhuge;
size_t *ninactive_dst = huge
? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
assert(*nactive_dst >= num);
*nactive_dst -= num;
*ninactive_dst += num;
} }
static void static void
psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) { psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
edata_age_heap_remove(&psset->pageslabs[pind], ps); edata_age_heap_remove(&psset->pageslabs[pind], ps);
psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps, psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
/* inc */ false);
} }
static void static void
psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) { psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
edata_age_heap_insert(&psset->pageslabs[pind], ps); edata_age_heap_insert(&psset->pageslabs[pind], ps);
psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps, psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
/* inc */ true);
} }
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
@ -86,8 +123,7 @@ psset_insert(psset_t *psset, edata_t *ps) {
* We don't ned to track full slabs; just pretend to for stats * We don't ned to track full slabs; just pretend to for stats
* purposes. See the comment at psset_bin_stats_adjust. * purposes. See the comment at psset_bin_stats_adjust.
*/ */
psset_bin_stats_adjust(&psset->stats.full_slabs, ps, psset_bin_stats_insert(&psset->stats.full_slabs, ps);
/* inc */ true);
return; return;
} }
@ -107,8 +143,7 @@ psset_remove(psset_t *psset, edata_t *ps) {
size_t longest_free_range = edata_longest_free_range_get(ps); size_t longest_free_range = edata_longest_free_range_get(ps);
if (longest_free_range == 0) { if (longest_free_range == 0) {
psset_bin_stats_adjust(&psset->stats.full_slabs, ps, psset_bin_stats_remove(&psset->stats.full_slabs, ps);
/* inc */ true);
return; return;
} }
@ -121,6 +156,26 @@ psset_remove(psset_t *psset, edata_t *ps) {
} }
} }
void
psset_hugify(psset_t *psset, edata_t *ps) {
assert(!edata_hugeified_get(ps));
psset_assert_ps_consistent(ps);
size_t longest_free_range = edata_longest_free_range_get(ps);
psset_bin_stats_t *bin_stats;
if (longest_free_range == 0) {
bin_stats = &psset->stats.full_slabs;
} else {
pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
longest_free_range << LG_PAGE));
assert(pind < PSSET_NPSIZES);
bin_stats = &psset->stats.nonfull_slabs[pind];
}
psset_bin_stats_remove(bin_stats, ps);
edata_hugeified_set(ps, true);
psset_bin_stats_insert(bin_stats, ps);
}
/* /*
* Similar to PAC's extent_recycle_extract. Out of all the pageslabs in the * Similar to PAC's extent_recycle_extract. Out of all the pageslabs in the
* set, picks one that can satisfy the allocation and remove it from the set. * set, picks one that can satisfy the allocation and remove it from the set.
@ -225,8 +280,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
} }
edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range); edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
if (largest_unchosen_range == 0) { if (largest_unchosen_range == 0) {
psset_bin_stats_adjust(&psset->stats.full_slabs, ps, psset_bin_stats_insert(&psset->stats.full_slabs, ps);
/* inc */ true);
} else { } else {
psset_insert(psset, ps); psset_insert(psset, ps);
} }
@ -258,8 +312,8 @@ edata_t *
psset_dalloc(psset_t *psset, edata_t *edata) { psset_dalloc(psset_t *psset, edata_t *edata) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
assert(edata_ps_get(edata) != NULL); assert(edata_ps_get(edata) != NULL);
edata_t *ps = edata_ps_get(edata); edata_t *ps = edata_ps_get(edata);
fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap; fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
size_t ps_old_longest_free_range = edata_longest_free_range_get(ps); size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
pszind_t old_pind = SC_NPSIZES; pszind_t old_pind = SC_NPSIZES;
@ -274,22 +328,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
>> LG_PAGE; >> LG_PAGE;
size_t len = edata_size_get(edata) >> LG_PAGE; size_t len = edata_size_get(edata) >> LG_PAGE;
fb_unset_range(ps_fb, ps_npages, begin, len); fb_unset_range(ps_fb, ps_npages, begin, len);
if (ps_old_longest_free_range == 0) {
/* We were in the (imaginary) full bin; update stats for it. */ /* The pageslab is still in the bin; adjust its stats first. */
psset_bin_stats_adjust(&psset->stats.full_slabs, ps, psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
/* inc */ false); ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
} else { psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len);
/*
* The edata is still in the bin, need to update its
* contribution.
*/
psset->stats.nonfull_slabs[old_pind].nactive -= len;
psset->stats.nonfull_slabs[old_pind].ninactive += len;
}
/*
* Note that we want to do this after the stats updates, since if it was
* full it psset_bin_stats_adjust would have looked at the old version.
*/
edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len)); edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
/* We might have just created a new, larger range. */ /* We might have just created a new, larger range. */
@ -327,6 +371,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
bitmap_set(psset->bitmap, &psset_bitmap_info, bitmap_set(psset->bitmap, &psset_bitmap_info,
(size_t)old_pind); (size_t)old_pind);
} }
} else {
/*
* Otherwise, the bin was full, and we need to adjust the full
* bin stats.
*/
psset_bin_stats_remove(&psset->stats.full_slabs, ps);
} }
/* If the pageslab is empty, it gets evicted from the set. */ /* If the pageslab is empty, it gets evicted from the set. */
if (new_range_len == ps_npages) { if (new_range_len == ps_npages) {

View File

@ -667,16 +667,27 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
emitter_row_t row; emitter_row_t row;
emitter_row_init(&row); emitter_row_init(&row);
size_t npageslabs; size_t npageslabs_huge;
size_t nactive; size_t nactive_huge;
size_t ninactive; size_t ninactive_huge;
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs", size_t npageslabs_nonhuge;
i, &npageslabs, size_t); size_t nactive_nonhuge;
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive", size_t ninactive_nonhuge;
i, &nactive, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive", CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
i, &ninactive, size_t); i, &npageslabs_huge, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
i, &nactive_huge, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge",
i, &ninactive_huge, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
i, &npageslabs_nonhuge, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
i, &nactive_nonhuge, size_t);
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge",
i, &ninactive_nonhuge, size_t);
size_t sec_bytes; size_t sec_bytes;
CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t); CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@ -686,39 +697,62 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
emitter_table_printf(emitter, emitter_table_printf(emitter,
"HPA shard stats:\n" "HPA shard stats:\n"
" In full slabs:\n" " In full slabs:\n"
" npageslabs: %zu\n" " npageslabs: %zu huge, %zu nonhuge\n"
" nactive: %zu\n" " nactive: %zu huge, %zu nonhuge \n"
" ninactive: %zu\n", " ninactive: %zu huge, %zu nonhuge \n",
npageslabs, nactive, ninactive); npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
ninactive_huge, ninactive_nonhuge);
emitter_json_object_kv_begin(emitter, "hpa_shard"); emitter_json_object_kv_begin(emitter, "hpa_shard");
emitter_json_object_kv_begin(emitter, "full_slabs"); emitter_json_object_kv_begin(emitter, "full_slabs");
emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs); emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive); &npageslabs_huge);
emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive); emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
&npageslabs_nonhuge);
emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
&nactive_huge);
emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
&nactive_nonhuge);
emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
&ninactive_huge);
emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
&ninactive_nonhuge);
emitter_json_object_end(emitter); /* End "full_slabs" */ emitter_json_object_end(emitter); /* End "full_slabs" */
COL_HDR(row, size, NULL, right, 20, size) COL_HDR(row, size, NULL, right, 20, size)
COL_HDR(row, ind, NULL, right, 4, unsigned) COL_HDR(row, ind, NULL, right, 4, unsigned)
COL_HDR(row, npageslabs, NULL, right, 13, size) COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
COL_HDR(row, nactive, NULL, right, 13, size) COL_HDR(row, nactive_huge, NULL, right, 16, size)
COL_HDR(row, ninactive, NULL, right, 13, size) COL_HDR(row, ninactive_huge, NULL, right, 16, size)
COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
emitter_table_row(emitter, &header_row); emitter_table_row(emitter, &header_row);
emitter_json_array_kv_begin(emitter, "nonfull_slabs"); emitter_json_array_kv_begin(emitter, "nonfull_slabs");
bool in_gap = false; bool in_gap = false;
for (pszind_t j = 0; j < PSSET_NPSIZES; j++) { for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
CTL_M2_M5_GET( CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs", "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge",
i, j, &npageslabs, size_t); i, j, &npageslabs_huge, size_t);
CTL_M2_M5_GET( CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive", "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge",
i, j, &nactive, size_t); i, j, &nactive_huge, size_t);
CTL_M2_M5_GET( CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive", "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge",
i, j, &ninactive, size_t); i, j, &ninactive_huge, size_t);
CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge",
i, j, &npageslabs_nonhuge, size_t);
CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge",
i, j, &nactive_nonhuge, size_t);
CTL_M2_M5_GET(
"stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge",
i, j, &ninactive_nonhuge, size_t);
bool in_gap_prev = in_gap; bool in_gap_prev = in_gap;
in_gap = (npageslabs == 0); in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
if (in_gap_prev && !in_gap) { if (in_gap_prev && !in_gap) {
emitter_table_printf(emitter, emitter_table_printf(emitter,
" ---\n"); " ---\n");
@ -726,20 +760,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
col_size.size_val = sz_pind2sz(j); col_size.size_val = sz_pind2sz(j);
col_ind.size_val = j; col_ind.size_val = j;
col_npageslabs.size_val = npageslabs; col_npageslabs_huge.size_val = npageslabs_huge;
col_nactive.size_val = nactive; col_nactive_huge.size_val = nactive_huge;
col_ninactive.size_val = ninactive; col_ninactive_huge.size_val = ninactive_huge;
col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
col_nactive_nonhuge.size_val = nactive_nonhuge;
col_ninactive_nonhuge.size_val = ninactive_nonhuge;
if (!in_gap) { if (!in_gap) {
emitter_table_row(emitter, &row); emitter_table_row(emitter, &row);
} }
emitter_json_object_begin(emitter); emitter_json_object_begin(emitter);
emitter_json_kv(emitter, "npageslabs", emitter_type_size, emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
&npageslabs); &npageslabs_huge);
emitter_json_kv(emitter, "nactive", emitter_type_size, emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
&nactive); &nactive_huge);
emitter_json_kv(emitter, "ninactive", emitter_type_size, emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
&ninactive); &ninactive_huge);
emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
&npageslabs_nonhuge);
emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
&nactive_nonhuge);
emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
&ninactive_huge);
emitter_json_object_end(emitter); emitter_json_object_end(emitter);
} }
emitter_json_array_end(emitter); /* End "nonfull_slabs" */ emitter_json_array_end(emitter); /* End "nonfull_slabs" */

View File

@ -2,14 +2,9 @@
#include "jemalloc/internal/hpa.h" #include "jemalloc/internal/hpa.h"
#define HPA_IND 111 #define SHARD_IND 111
#define SHARD_IND 222
#define PS_GOAL (128 * PAGE) #define ALLOC_MAX (HUGEPAGE / 4)
#define PS_ALLOC_MAX (64 * PAGE)
#define HPA_SMALL_MAX (200 * PAGE)
#define HPA_LARGE_MIN (300 * PAGE)
typedef struct test_data_s test_data_t; typedef struct test_data_s test_data_t;
struct test_data_s { struct test_data_s {
@ -18,50 +13,32 @@ struct test_data_s {
* test_data_t and the hpa_shard_t; * test_data_t and the hpa_shard_t;
*/ */
hpa_shard_t shard; hpa_shard_t shard;
base_t *shard_base; base_t *base;
edata_cache_t shard_edata_cache; edata_cache_t shard_edata_cache;
hpa_t hpa;
base_t *hpa_base;
edata_cache_t hpa_edata_cache;
emap_t emap; emap_t emap;
}; };
static hpa_shard_t * static hpa_shard_t *
create_test_data() { create_test_data() {
bool err; bool err;
base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND, base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
&ehooks_default_extent_hooks); &ehooks_default_extent_hooks);
assert_ptr_not_null(shard_base, ""); assert_ptr_not_null(base, "");
base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND,
&ehooks_default_extent_hooks);
assert_ptr_not_null(hpa_base, "");
test_data_t *test_data = malloc(sizeof(test_data_t)); test_data_t *test_data = malloc(sizeof(test_data_t));
assert_ptr_not_null(test_data, ""); assert_ptr_not_null(test_data, "");
test_data->shard_base = shard_base; test_data->base = base;
test_data->hpa_base = hpa_base;
err = edata_cache_init(&test_data->shard_edata_cache, shard_base); err = edata_cache_init(&test_data->shard_edata_cache, base);
assert_false(err, ""); assert_false(err, "");
err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base); err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
assert_false(err, ""); assert_false(err, "");
err = emap_init(&test_data->emap, test_data->hpa_base, err = hpa_shard_init(&test_data->shard, &test_data->emap,
/* zeroed */ false); &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX);
assert_false(err, "");
err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap,
&test_data->hpa_edata_cache);
assert_false(err, "");
err = hpa_shard_init(&test_data->shard, &test_data->hpa,
&test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX,
HPA_SMALL_MAX, HPA_LARGE_MIN);
assert_false(err, ""); assert_false(err, "");
return (hpa_shard_t *)test_data; return (hpa_shard_t *)test_data;
@ -70,12 +47,11 @@ create_test_data() {
static void static void
destroy_test_data(hpa_shard_t *shard) { destroy_test_data(hpa_shard_t *shard) {
test_data_t *test_data = (test_data_t *)shard; test_data_t *test_data = (test_data_t *)shard;
base_delete(TSDN_NULL, test_data->shard_base); base_delete(TSDN_NULL, test_data->base);
base_delete(TSDN_NULL, test_data->hpa_base);
free(test_data); free(test_data);
} }
TEST_BEGIN(test_small_max_large_min) { TEST_BEGIN(test_alloc_max) {
test_skip_if(LG_SIZEOF_PTR != 3); test_skip_if(LG_SIZEOF_PTR != 3);
hpa_shard_t *shard = create_test_data(); hpa_shard_t *shard = create_test_data();
@ -84,18 +60,11 @@ TEST_BEGIN(test_small_max_large_min) {
edata_t *edata; edata_t *edata;
/* Small max */ /* Small max */
edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false); edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
expect_ptr_not_null(edata, "Allocation of small max failed"); expect_ptr_not_null(edata, "Allocation of small max failed");
edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false); edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
expect_ptr_null(edata, "Allocation of larger than small max succeeded"); expect_ptr_null(edata, "Allocation of larger than small max succeeded");
/* Large min */
edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false);
expect_ptr_not_null(edata, "Allocation of large min failed");
edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false);
expect_ptr_null(edata,
"Allocation of smaller than large min succeeded");
destroy_test_data(shard); destroy_test_data(shard);
} }
TEST_END TEST_END
@ -178,26 +147,19 @@ TEST_BEGIN(test_stress) {
mem_tree_new(&tree); mem_tree_new(&tree);
for (size_t i = 0; i < 100 * 1000; i++) { for (size_t i = 0; i < 100 * 1000; i++) {
size_t operation = prng_range_zu(&prng_state, 4); size_t operation = prng_range_zu(&prng_state, 2);
if (operation < 2) { if (operation == 0) {
/* Alloc */ /* Alloc */
if (nlive_edatas == nlive_edatas_max) { if (nlive_edatas == nlive_edatas_max) {
continue; continue;
} }
size_t npages_min;
size_t npages_max;
/* /*
* We make sure to get an even balance of small and * We make sure to get an even balance of small and
* large allocations. * large allocations.
*/ */
if (operation == 0) { size_t npages_min = 1;
npages_min = 1; size_t npages_max = ALLOC_MAX / PAGE;
npages_max = HPA_SMALL_MAX / PAGE;
} else {
npages_min = HPA_LARGE_MIN / PAGE;
npages_max = HPA_LARGE_MIN / PAGE + 20;
}
size_t npages = npages_min + prng_range_zu(&prng_state, size_t npages = npages_min + prng_range_zu(&prng_state,
npages_max - npages_min); npages_max - npages_min);
edata_t *edata = pai_alloc(tsdn, &shard->pai, edata_t *edata = pai_alloc(tsdn, &shard->pai,
@ -260,6 +222,6 @@ main(void) {
(void)mem_tree_reverse_iter; (void)mem_tree_reverse_iter;
(void)mem_tree_destroy; (void)mem_tree_destroy;
return test_no_reentrancy( return test_no_reentrancy(
test_small_max_large_min, test_alloc_max,
test_stress); test_stress);
} }

View File

@ -2,7 +2,7 @@
#include "jemalloc/internal/psset.h" #include "jemalloc/internal/psset.h"
#define PAGESLAB_PAGES 64 #define PAGESLAB_PAGES (HUGEPAGE / PAGE)
#define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE) #define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
#define PAGESLAB_SN 123 #define PAGESLAB_SN 123
#define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE)) #define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
@ -296,22 +296,23 @@ TEST_END
static void static void
stats_expect_empty(psset_bin_stats_t *stats) { stats_expect_empty(psset_bin_stats_t *stats) {
assert_zu_eq(0, stats->npageslabs, assert_zu_eq(0, stats->npageslabs_nonhuge,
"Supposedly empty bin had positive npageslabs"); "Supposedly empty bin had positive npageslabs");
expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin" expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin"
"Supposedly empty bin had positive nactive"); "Supposedly empty bin had positive nactive");
expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin" expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin"
"Supposedly empty bin had positive ninactive"); "Supposedly empty bin had positive ninactive");
} }
static void static void
stats_expect(psset_t *psset, size_t nactive) { stats_expect(psset_t *psset, size_t nactive) {
if (nactive == PAGESLAB_PAGES) { if (nactive == PAGESLAB_PAGES) {
expect_zu_eq(1, psset->stats.full_slabs.npageslabs, expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
"Expected a full slab"); "Expected a full slab");
expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive, expect_zu_eq(PAGESLAB_PAGES,
psset->stats.full_slabs.nactive_nonhuge,
"Should have exactly filled the bin"); "Should have exactly filled the bin");
expect_zu_eq(0, psset->stats.full_slabs.ninactive, expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
"Should never have inactive pages in a full slab"); "Should never have inactive pages in a full slab");
} else { } else {
stats_expect_empty(&psset->stats.full_slabs); stats_expect_empty(&psset->stats.full_slabs);
@ -325,13 +326,13 @@ stats_expect(psset_t *psset, size_t nactive) {
for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
if (i == nonempty_pind) { if (i == nonempty_pind) {
assert_zu_eq(1, assert_zu_eq(1,
psset->stats.nonfull_slabs[i].npageslabs, psset->stats.nonfull_slabs[i].npageslabs_nonhuge,
"Should have found a slab"); "Should have found a slab");
expect_zu_eq(nactive, expect_zu_eq(nactive,
psset->stats.nonfull_slabs[i].nactive, psset->stats.nonfull_slabs[i].nactive_nonhuge,
"Mismatch in active pages"); "Mismatch in active pages");
expect_zu_eq(ninactive, expect_zu_eq(ninactive,
psset->stats.nonfull_slabs[i].ninactive, psset->stats.nonfull_slabs[i].ninactive_nonhuge,
"Mismatch in inactive pages"); "Mismatch in inactive pages");
} else { } else {
stats_expect_empty(&psset->stats.nonfull_slabs[i]); stats_expect_empty(&psset->stats.nonfull_slabs[i]);