HPA: Tie components into a PAI implementation.

2020-08-14 13:36:41 -07:00
parent c8209150f9
commit 1c7da33317
25 changed files with 972 additions and 11 deletions
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,6 +16,7 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
+extern hpa_t arena_hpa_global;

 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_HPA_H
+#define JEMALLOC_INTERNAL_HPA_H
+
+#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/psset.h"
+
+typedef struct hpa_s hpa_t;
+struct hpa_s {
+	/*
+	 * We have two mutexes for the central allocator; mtx protects its
+	 * state, while grow_mtx protects controls the ability to grow the
+	 * backing store.  This prevents race conditions in which the central
+	 * allocator has exhausted its memory while mutiple threads are trying
+	 * to allocate.  If they all reserved more address space from the OS
+	 * without synchronization, we'd end consuming much more than necessary.
+	 */
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	hpa_central_t central;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+	/*
+	 * This edata cache is the global one that we use for new allocations in
+	 * growing; practically, it comes from a0.
+	 */
+	edata_cache_t *edata_cache;
+	geom_grow_t geom_grow;
+};
+
+typedef struct hpa_shard_s hpa_shard_t;
+struct hpa_shard_s {
+	/*
+	 * pai must be the first member; we cast from a pointer to it to a
+	 * pointer to the hpa_shard_t.
+	 */
+	pai_t pai;
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	/*
+	 * This edata cache is the one we use when allocating a small extent
+	 * from a pageslab.  The pageslab itself comes from the centralized
+	 * allocator, and so will use its edata_cache.
+	 */
+	edata_cache_t *edata_cache;
+	hpa_t *hpa;
+	psset_t psset;
+
+	/*
+	 * When we're grabbing a new ps from the central allocator, how big
+	 * would we like it to be?  This is mostly about the level of batching
+	 * we use in our requests to the centralized allocator.
+	 */
+	size_t ps_goal;
+	/*
+	 * What's the maximum size we'll try to allocate out of the psset?  We
+	 * don't want this to be too large relative to ps_goal, as a
+	 * fragmentation avoidance measure.
+	 */
+	size_t ps_alloc_max;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+};
+
+bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
+    edata_cache_t *edata_cache);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
+    edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
+    size_t ps_alloc_max);
+void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * We share the fork ordering with the PA and arena prefork handling; that's why
+ * these are 2 and 3 rather than 0 or 1.
+ */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * These should be acquired after all the shard locks in phase 4, but before any
+ * locks in phase 4.  The central HPA may acquire an edata cache mutex (of a0),
+ * so it needs to be lower in the witness ordering, but it's also logically
+ * global and not tied to any particular arena.
+ */
+void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
+
+#endif /* JEMALLOC_INTERNAL_HPA_H */
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -12,6 +12,7 @@ extern bool malloc_slow;
 extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
+extern bool opt_hpa;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/hpa.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
@@ -66,12 +67,32 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;

+	/*
+	 * Whether or not we should prefer the hugepage allocator.  Atomic since
+	 * it may be concurrently modified by a thread setting extent hooks.
+	 * Note that we still may do HPA operations in this arena; if use_hpa is
+	 * changed from true to false, we'll free back to the hugepage allocator
+	 * for those allocations.
+	 */
+	atomic_b_t use_hpa;
+	/*
+	 * If we never used the HPA to begin with, it wasn't initialized, and so
+	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
+	 * tracks that knowledge.
+	 */
+	bool ever_used_hpa;
+
 	/* Allocates from a PAC. */
 	pac_t pac;

+	/* Allocates from a HPA. */
+	hpa_shard_t hpa_shard;
+
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;

+	unsigned ind;
+
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;

@@ -98,6 +119,17 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);

+/*
+ * This isn't exposed to users; we allow late enablement of the HPA shard so
+ * that we can boot without worrying about the HPA, then turn it on in a0.
+ */
+bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa);
+/*
+ * We stop using the HPA when custom extent hooks are installed, but still
+ * redirect deallocations to it.
+ */
+void pa_shard_disable_hpa(pa_shard_t *shard);
+
 /*
 * This does the PA-specific parts of arena reset (i.e. freeing all active
 * allocations).
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -49,7 +49,6 @@ struct psset_s {

 void psset_init(psset_t *psset);

-
 /*
 * Tries to obtain a chunk from an existing pageslab already in the set.
 * Returns true on failure.
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,8 +43,16 @@ enum witness_rank_e {
 	WITNESS_RANK_CORE,
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
+
 	WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
+
 	WITNESS_RANK_EXTENTS,
+	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
+
+	WITNESS_RANK_HPA_GROW,
+	WITNESS_RANK_HPA,
+
 	WITNESS_RANK_EDATA_CACHE,

 	WITNESS_RANK_EMAP,