HPA: Manage whole hugepages at a time.

This redesigns the HPA implementation to allow us to manage hugepages all at
once, locally, without relying on a global fallback.
This commit is contained in:
David Goldblatt
2020-11-09 13:49:30 -08:00
committed by David Goldblatt
parent 63677dde63
commit 43af63fff4
16 changed files with 700 additions and 550 deletions

View File

@@ -16,7 +16,6 @@ extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock;
extern emap_t arena_emap_global;
extern hpa_t arena_hpa_global;
extern size_t opt_oversize_threshold;
extern size_t oversize_threshold;

View File

@@ -208,9 +208,9 @@ struct edata_s {
*/
/*
* If this edata is from an HPA, it may be part of some larger
* pageslab. Track it if so. Otherwise (either because it's
* not part of a pageslab, or not from the HPA at all), NULL.
* If this edata is a user allocation from an HPA, it comes out
* of some pageslab (we don't yet support huegpage allocations
* that don't fit into pageslabs). This tracks it.
*/
edata_t *ps;
/*
@@ -225,6 +225,8 @@ struct edata_s {
* between heaps.
*/
uint32_t longest_free_range;
/* Whether or not the slab is backed by a hugepage. */
bool hugeified;
};
};
@@ -328,6 +330,11 @@ edata_pai_get(const edata_t *edata) {
EDATA_BITS_PAI_SHIFT);
}
static inline bool
edata_hugeified_get(const edata_t *edata) {
return edata->hugeified;
}
static inline bool
edata_slab_get(const edata_t *edata) {
return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
@@ -559,6 +566,11 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
}
static inline void
edata_hugeified_set(edata_t *edata, bool hugeified) {
edata->hugeified = hugeified;
}
static inline void
edata_slab_set(edata_t *edata, bool slab) {
edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |

View File

@@ -6,32 +6,6 @@
#include "jemalloc/internal/pai.h"
#include "jemalloc/internal/psset.h"
typedef struct hpa_s hpa_t;
struct hpa_s {
/*
* We have two mutexes for the central allocator; mtx protects its
* state, while grow_mtx protects controls the ability to grow the
* backing store. This prevents race conditions in which the central
* allocator has exhausted its memory while mutiple threads are trying
* to allocate. If they all reserved more address space from the OS
* without synchronization, we'd end consuming much more than necessary.
*/
malloc_mutex_t grow_mtx;
malloc_mutex_t mtx;
hpa_central_t central;
/* The arena ind we're associated with. */
unsigned ind;
/*
* This edata cache is the global one that we use for new allocations in
* growing; practically, it comes from a0.
*
* We don't use an edata_cache_small in front of this, since we expect a
* small finite number of allocations from it.
*/
edata_cache_t *edata_cache;
exp_grow_t exp_grow;
};
/* Used only by CTL; not actually stored here (i.e., all derived). */
typedef struct hpa_shard_stats_s hpa_shard_stats_t;
struct hpa_shard_stats_s {
@@ -53,44 +27,53 @@ struct hpa_shard_s {
* allocator, and so will use its edata_cache.
*/
edata_cache_small_t ecs;
hpa_t *hpa;
psset_t psset;
/*
* When we're grabbing a new ps from the central allocator, how big
* would we like it to be? This is mostly about the level of batching
* we use in our requests to the centralized allocator.
* The largest size we'll allocate out of the shard. For those
* allocations refused, the caller (in practice, the PA module) will
* fall back to the more general (for now) PAC, which can always handle
* any allocation request.
*/
size_t ps_goal;
size_t alloc_max;
/*
* What's the maximum size we'll try to allocate out of the psset? We
* don't want this to be too large relative to ps_goal, as a
* fragmentation avoidance measure.
* Slabs currently purged away. They are hugepage-sized and
* hugepage-aligned, but have had pages_nohuge and pages_purge_forced
* called on them.
*
* Guarded by grow_mtx.
*/
size_t ps_alloc_max;
edata_list_inactive_t unused_slabs;
/*
* What's the maximum size we'll try to allocate out of the shard at
* all?
* Either NULL (if empty), or some integer multiple of a
* hugepage-aligned number of hugepages. We carve them off one at a
* time to satisfy new pageslab requests.
*
* Guarded by grow_mtx.
*/
size_t small_max;
/*
* What's the minimum size for which we'll go straight to the global
* arena?
*/
size_t large_min;
edata_t *eden;
/* The arena ind we're associated with. */
unsigned ind;
emap_t *emap;
};
bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
edata_cache_t *edata_cache);
bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
size_t ps_alloc_max, size_t small_max, size_t large_min);
/*
* Whether or not the HPA can be used given the current configuration. This is
* is not necessarily a guarantee that it backs its allocations by hugepages,
* just that it can function properly given the system it's running on.
*/
bool hpa_supported();
bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
hpa_shard_stats_t *dst);
void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
/*
* Notify the shard that we won't use it for allocations much longer. Due to
* the possibility of races, we don't actually prevent allocations; just flush
@@ -108,14 +91,4 @@ void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
/*
* These should be acquired after all the shard locks in phase 4, but before any
* locks in phase 4. The central HPA may acquire an edata cache mutex (of a0),
* so it needs to be lower in the witness ordering, but it's also logically
* global and not tied to any particular arena.
*/
void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
#endif /* JEMALLOC_INTERNAL_HPA_H */

View File

@@ -11,9 +11,7 @@
OP(ctl) \
OP(prof) \
OP(prof_thds_data) \
OP(prof_dump) \
OP(hpa_central) \
OP(hpa_central_grow)
OP(prof_dump)
typedef enum {
#define OP(mtx) global_prof_mutex_##mtx,

View File

@@ -130,9 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
* This isn't exposed to users; we allow late enablement of the HPA shard so
* that we can boot without worrying about the HPA, then turn it on in a0.
*/
bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
size_t sec_alloc_max, size_t sec_bytes_max);
bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
/*
* We stop using the HPA when custom extent hooks are installed, but still
* redirect deallocations to it.

View File

@@ -24,11 +24,14 @@
typedef struct psset_bin_stats_s psset_bin_stats_t;
struct psset_bin_stats_s {
/* How many pageslabs are in this bin? */
size_t npageslabs;
size_t npageslabs_huge;
size_t npageslabs_nonhuge;
/* Of them, how many pages are active? */
size_t nactive;
size_t nactive_huge;
size_t nactive_nonhuge;
/* How many are inactive? */
size_t ninactive;
size_t ninactive_huge;
size_t ninactive_nonhuge;
};
/* Used only by CTL; not actually stored here (i.e., all derived). */
@@ -62,6 +65,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
void psset_insert(psset_t *psset, edata_t *ps);
void psset_remove(psset_t *psset, edata_t *ps);
void psset_hugify(psset_t *psset, edata_t *ps);
/*
* Tries to obtain a chunk from an existing pageslab already in the set.
* Returns true on failure.