PA: Use an SEC in fron of the HPA shard.
This commit is contained in:
parent
ea51e97bb8
commit
6599651aee
@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
|
||||
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
|
||||
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
|
||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats);
|
||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
|
||||
void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
|
||||
#ifdef JEMALLOC_JET
|
||||
size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
|
||||
@ -99,6 +99,7 @@ void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
|
||||
void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
|
||||
|
||||
|
@ -46,6 +46,7 @@ typedef struct ctl_arena_stats_s {
|
||||
arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
|
||||
pac_estats_t estats[SC_NPSIZES];
|
||||
hpa_shard_stats_t hpastats;
|
||||
sec_stats_t secstats;
|
||||
} ctl_arena_stats_t;
|
||||
|
||||
typedef struct ctl_stats_s {
|
||||
|
@ -90,10 +90,10 @@ void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
|
||||
/*
|
||||
* We share the fork ordering with the PA and arena prefork handling; that's why
|
||||
* these are 2 and 3 rather than 0 or 1.
|
||||
* these are 3 and 4 rather than 0 and 1.
|
||||
*/
|
||||
void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
|
||||
@ -103,7 +103,7 @@ void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
|
||||
* so it needs to be lower in the witness ordering, but it's also logically
|
||||
* global and not tied to any particular arena.
|
||||
*/
|
||||
void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
|
||||
void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
|
||||
void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
|
||||
void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
|
||||
|
||||
|
@ -17,6 +17,11 @@ extern size_t opt_hpa_slab_goal;
|
||||
extern size_t opt_hpa_slab_max_alloc;
|
||||
extern size_t opt_hpa_small_max;
|
||||
extern size_t opt_hpa_large_min;
|
||||
|
||||
extern size_t opt_hpa_sec_max_alloc;
|
||||
extern size_t opt_hpa_sec_max_bytes;
|
||||
extern size_t opt_hpa_sec_nshards;
|
||||
|
||||
extern const char *opt_junk;
|
||||
extern bool opt_junk_alloc;
|
||||
extern bool opt_junk_free;
|
||||
|
@ -33,7 +33,8 @@ typedef enum {
|
||||
OP(base) \
|
||||
OP(tcache_list) \
|
||||
OP(hpa_shard) \
|
||||
OP(hpa_shard_grow)
|
||||
OP(hpa_shard_grow) \
|
||||
OP(hpa_sec)
|
||||
|
||||
typedef enum {
|
||||
#define OP(mtx) arena_prof_mutex_##mtx,
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "jemalloc/internal/lockedint.h"
|
||||
#include "jemalloc/internal/pac.h"
|
||||
#include "jemalloc/internal/pai.h"
|
||||
#include "jemalloc/internal/sec.h"
|
||||
|
||||
/*
|
||||
* The page allocator; responsible for acquiring pages of memory for
|
||||
@ -85,7 +86,12 @@ struct pa_shard_s {
|
||||
/* Allocates from a PAC. */
|
||||
pac_t pac;
|
||||
|
||||
/* Allocates from a HPA. */
|
||||
/*
|
||||
* We place a small extent cache in front of the HPA, since we intend
|
||||
* these configurations to use many fewer arenas, and therefore have a
|
||||
* higher risk of hot locks.
|
||||
*/
|
||||
sec_t hpa_sec;
|
||||
hpa_shard_t hpa_shard;
|
||||
|
||||
/* The source of edata_t objects. */
|
||||
@ -124,18 +130,20 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
|
||||
* that we can boot without worrying about the HPA, then turn it on in a0.
|
||||
*/
|
||||
bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
|
||||
size_t ps_alloc_max, size_t small_max, size_t large_min);
|
||||
size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
|
||||
size_t sec_alloc_max, size_t sec_bytes_max);
|
||||
/*
|
||||
* We stop using the HPA when custom extent hooks are installed, but still
|
||||
* redirect deallocations to it.
|
||||
*/
|
||||
void pa_shard_disable_hpa(pa_shard_t *shard);
|
||||
void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
|
||||
/*
|
||||
* This does the PA-specific parts of arena reset (i.e. freeing all active
|
||||
* allocations).
|
||||
*/
|
||||
void pa_shard_reset(pa_shard_t *shard);
|
||||
void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
|
||||
/*
|
||||
* Destroy all the remaining retained extents. Should only be called after
|
||||
* decaying all active, dirty, and muzzy extents to the retained state, as the
|
||||
@ -184,6 +192,7 @@ void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
|
||||
|
||||
@ -192,7 +201,8 @@ void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
|
||||
|
||||
void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
|
||||
pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
|
||||
hpa_shard_stats_t *hpa_stats_out, size_t *resident);
|
||||
hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
|
||||
size_t *resident);
|
||||
|
||||
/*
|
||||
* Reads the PA-owned mutex stats into the output stats array, at the
|
||||
|
28
src/arena.c
28
src/arena.c
@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
|
||||
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
|
||||
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
|
||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
|
||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
|
||||
cassert(config_stats);
|
||||
|
||||
arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
|
||||
@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
}
|
||||
|
||||
pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
|
||||
estats, hpastats, &astats->resident);
|
||||
estats, hpastats, secstats, &astats->resident);
|
||||
|
||||
LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
|
||||
|
||||
@ -483,6 +483,14 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
|
||||
|
||||
void
|
||||
arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
|
||||
if (all) {
|
||||
/*
|
||||
* We should take a purge of "all" to mean "save as much memory
|
||||
* as possible", including flushing any caches (for situations
|
||||
* like thread death, or manual purge calls).
|
||||
*/
|
||||
sec_flush(tsdn, &arena->pa_shard.hpa_sec);
|
||||
}
|
||||
if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
|
||||
return;
|
||||
}
|
||||
@ -631,7 +639,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
|
||||
&arena->bins[i].bin_shards[j]);
|
||||
}
|
||||
}
|
||||
pa_shard_reset(&arena->pa_shard);
|
||||
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
|
||||
}
|
||||
|
||||
void
|
||||
@ -1362,7 +1370,7 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
|
||||
}
|
||||
/* No using the HPA now that we have the custom hooks. */
|
||||
pa_shard_disable_hpa(&arena->pa_shard);
|
||||
pa_shard_disable_hpa(tsd_tsdn(tsd), &arena->pa_shard);
|
||||
extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
|
||||
if (have_background_thread) {
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
|
||||
@ -1529,7 +1537,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
|
||||
if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
|
||||
if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
|
||||
opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
|
||||
opt_hpa_small_max, opt_hpa_large_min)) {
|
||||
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
|
||||
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
|
||||
goto label_error;
|
||||
}
|
||||
}
|
||||
@ -1658,16 +1667,21 @@ arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
|
||||
|
||||
void
|
||||
arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
|
||||
base_prefork(tsdn, arena->base);
|
||||
pa_shard_prefork5(tsdn, &arena->pa_shard);
|
||||
}
|
||||
|
||||
void
|
||||
arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
|
||||
malloc_mutex_prefork(tsdn, &arena->large_mtx);
|
||||
base_prefork(tsdn, arena->base);
|
||||
}
|
||||
|
||||
void
|
||||
arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
|
||||
malloc_mutex_prefork(tsdn, &arena->large_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
|
||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
|
||||
bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
|
||||
|
19
src/ctl.c
19
src/ctl.c
@ -95,6 +95,9 @@ CTL_PROTO(opt_hpa_slab_goal)
|
||||
CTL_PROTO(opt_hpa_slab_max_alloc)
|
||||
CTL_PROTO(opt_hpa_small_max)
|
||||
CTL_PROTO(opt_hpa_large_min)
|
||||
CTL_PROTO(opt_hpa_sec_max_alloc)
|
||||
CTL_PROTO(opt_hpa_sec_max_bytes)
|
||||
CTL_PROTO(opt_hpa_sec_nshards)
|
||||
CTL_PROTO(opt_metadata_thp)
|
||||
CTL_PROTO(opt_retain)
|
||||
CTL_PROTO(opt_dss)
|
||||
@ -246,6 +249,7 @@ CTL_PROTO(stats_arenas_i_metadata_thp)
|
||||
CTL_PROTO(stats_arenas_i_tcache_bytes)
|
||||
CTL_PROTO(stats_arenas_i_resident)
|
||||
CTL_PROTO(stats_arenas_i_abandoned_vm)
|
||||
CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
|
||||
INDEX_PROTO(stats_arenas_i)
|
||||
CTL_PROTO(stats_allocated)
|
||||
CTL_PROTO(stats_active)
|
||||
@ -360,6 +364,9 @@ static const ctl_named_node_t opt_node[] = {
|
||||
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
|
||||
{NAME("hpa_small_max"), CTL(opt_hpa_small_max)},
|
||||
{NAME("hpa_large_min"), CTL(opt_hpa_large_min)},
|
||||
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
|
||||
{NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
|
||||
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
|
||||
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
|
||||
{NAME("retain"), CTL(opt_retain)},
|
||||
{NAME("dss"), CTL(opt_dss)},
|
||||
@ -650,6 +657,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
|
||||
{NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
|
||||
{NAME("resident"), CTL(stats_arenas_i_resident)},
|
||||
{NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
|
||||
{NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
|
||||
{NAME("small"), CHILD(named, stats_arenas_i_small)},
|
||||
{NAME("large"), CHILD(named, stats_arenas_i_large)},
|
||||
{NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
|
||||
@ -889,6 +897,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
|
||||
sizeof(pac_estats_t));
|
||||
memset(&ctl_arena->astats->hpastats, 0,
|
||||
sizeof(hpa_shard_stats_t));
|
||||
memset(&ctl_arena->astats->secstats, 0,
|
||||
sizeof(sec_stats_t));
|
||||
}
|
||||
}
|
||||
|
||||
@ -903,7 +913,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
|
||||
&ctl_arena->pdirty, &ctl_arena->pmuzzy,
|
||||
&ctl_arena->astats->astats, ctl_arena->astats->bstats,
|
||||
ctl_arena->astats->lstats, ctl_arena->astats->estats,
|
||||
&ctl_arena->astats->hpastats);
|
||||
&ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
|
||||
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
bin_stats_t *bstats =
|
||||
@ -1089,6 +1099,7 @@ MUTEX_PROF_ARENA_MUTEXES
|
||||
&astats->hpastats.psset_slab_stats[i]);
|
||||
}
|
||||
|
||||
sec_stats_accum(&sdstats->secstats, &astats->secstats);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1895,6 +1906,9 @@ CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
|
||||
CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
|
||||
CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
|
||||
const char *)
|
||||
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
|
||||
@ -3114,6 +3128,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
|
||||
&arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
|
||||
ATOMIC_RELAXED), size_t)
|
||||
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
|
||||
arenas_i(mib[2])->astats->secstats.bytes, size_t)
|
||||
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
|
||||
arenas_i(mib[2])->astats->allocated_small, size_t)
|
||||
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
|
||||
|
@ -411,12 +411,12 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
}
|
||||
|
||||
void
|
||||
hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
malloc_mutex_prefork(tsdn, &shard->grow_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
malloc_mutex_prefork(tsdn, &shard->mtx);
|
||||
}
|
||||
|
||||
@ -433,7 +433,7 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
|
||||
}
|
||||
|
||||
void
|
||||
hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
|
||||
hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
|
||||
malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
|
||||
malloc_mutex_prefork(tsdn, &hpa->mtx);
|
||||
}
|
||||
|
@ -141,6 +141,11 @@ size_t opt_hpa_slab_max_alloc = 256 * 1024;
|
||||
size_t opt_hpa_small_max = 32 * 1024;
|
||||
size_t opt_hpa_large_min = 4 * 1024 * 1024;
|
||||
|
||||
size_t opt_hpa_sec_max_alloc = 32 * 1024;
|
||||
/* These settings correspond to a maximum of 1MB cached per arena. */
|
||||
size_t opt_hpa_sec_max_bytes = 256 * 1024;
|
||||
size_t opt_hpa_sec_nshards = 4;
|
||||
|
||||
/*
|
||||
* Arenas that are used to service external requests. Not all elements of the
|
||||
* arenas array are necessarily used; arenas are created lazily as needed.
|
||||
@ -1494,11 +1499,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
||||
true)
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
|
||||
"hpa_slab_max_alloc", PAGE, 512 * PAGE,
|
||||
CONF_CHECK_MIN, CONF_CHECK_MAX, true)
|
||||
CONF_CHECK_MIN, CONF_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
|
||||
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
|
||||
0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
|
||||
|
||||
if (CONF_MATCH("slab_sizes")) {
|
||||
if (CONF_MATCH_VALUE("default")) {
|
||||
@ -1808,7 +1820,8 @@ malloc_init_hard_a0_locked() {
|
||||
}
|
||||
if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
|
||||
opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
|
||||
opt_hpa_small_max, opt_hpa_large_min)) {
|
||||
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
|
||||
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -4226,7 +4239,7 @@ _malloc_prefork(void)
|
||||
background_thread_prefork1(tsd_tsdn(tsd));
|
||||
}
|
||||
/* Break arena prefork into stages to preserve lock order. */
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (i = 0; i < 9; i++) {
|
||||
for (j = 0; j < narenas; j++) {
|
||||
if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
|
||||
NULL) {
|
||||
@ -4255,12 +4268,15 @@ _malloc_prefork(void)
|
||||
case 7:
|
||||
arena_prefork7(tsd_tsdn(tsd), arena);
|
||||
break;
|
||||
case 8:
|
||||
arena_prefork8(tsd_tsdn(tsd), arena);
|
||||
break;
|
||||
default: not_reached();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i == 3 && opt_hpa) {
|
||||
hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
|
||||
if (i == 4 && opt_hpa) {
|
||||
hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
|
||||
}
|
||||
|
||||
}
|
||||
|
19
src/pa.c
19
src/pa.c
@ -49,7 +49,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
|
||||
|
||||
bool
|
||||
pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
|
||||
size_t ps_alloc_max, size_t small_max, size_t large_min) {
|
||||
size_t ps_alloc_max, size_t small_max, size_t large_min,
|
||||
size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
|
||||
ps_goal &= ~PAGE_MASK;
|
||||
ps_alloc_max &= ~PAGE_MASK;
|
||||
|
||||
@ -60,6 +61,10 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
|
||||
shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
|
||||
return true;
|
||||
}
|
||||
if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
|
||||
sec_alloc_max, sec_bytes_max)) {
|
||||
return true;
|
||||
}
|
||||
shard->ever_used_hpa = true;
|
||||
atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
|
||||
|
||||
@ -67,24 +72,27 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
|
||||
}
|
||||
|
||||
void
|
||||
pa_shard_disable_hpa(pa_shard_t *shard) {
|
||||
pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
|
||||
sec_disable(tsdn, &shard->hpa_sec);
|
||||
}
|
||||
|
||||
void
|
||||
pa_shard_reset(pa_shard_t *shard) {
|
||||
pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
|
||||
sec_flush(tsdn, &shard->hpa_sec);
|
||||
}
|
||||
|
||||
void
|
||||
pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
sec_flush(tsdn, &shard->hpa_sec);
|
||||
pac_destroy(tsdn, &shard->pac);
|
||||
}
|
||||
|
||||
static pai_t *
|
||||
pa_get_pai(pa_shard_t *shard, edata_t *edata) {
|
||||
return (edata_pai_get(edata) == EXTENT_PAI_PAC
|
||||
? &shard->pac.pai : &shard->hpa_shard.pai);
|
||||
? &shard->pac.pai : &shard->hpa_sec.pai);
|
||||
}
|
||||
|
||||
edata_t *
|
||||
@ -95,7 +103,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
|
||||
|
||||
edata_t *edata = NULL;
|
||||
if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
|
||||
edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
|
||||
edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
|
||||
zero);
|
||||
}
|
||||
/*
|
||||
@ -173,6 +181,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
|
||||
emap_deregister_interior(tsdn, shard->emap, edata);
|
||||
edata_slab_set(edata, false);
|
||||
}
|
||||
edata_addr_set(edata, edata_base_get(edata));
|
||||
edata_szind_set(edata, SC_NSIZES);
|
||||
pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
|
||||
pai_t *pai = pa_get_pai(shard, edata);
|
||||
|
@ -16,17 +16,14 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
|
||||
void
|
||||
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_prefork2(tsdn, &shard->hpa_shard);
|
||||
sec_prefork2(tsdn, &shard->hpa_sec);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_dirty);
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_retained);
|
||||
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_prefork3(tsdn, &shard->hpa_shard);
|
||||
}
|
||||
@ -34,6 +31,16 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
|
||||
void
|
||||
pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_dirty);
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
|
||||
ecache_prefork(tsdn, &shard->pac.ecache_retained);
|
||||
if (shard->ever_used_hpa) {
|
||||
hpa_shard_prefork4(tsdn, &shard->hpa_shard);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
edata_cache_prefork(tsdn, &shard->edata_cache);
|
||||
}
|
||||
|
||||
@ -47,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
|
||||
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
sec_postfork_parent(tsdn, &shard->hpa_sec);
|
||||
hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
|
||||
}
|
||||
}
|
||||
@ -61,6 +69,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
|
||||
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
|
||||
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
|
||||
if (shard->ever_used_hpa) {
|
||||
sec_postfork_child(tsdn, &shard->hpa_sec);
|
||||
hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
|
||||
}
|
||||
}
|
||||
@ -76,7 +85,8 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
|
||||
void
|
||||
pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
|
||||
pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
|
||||
hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
|
||||
hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
|
||||
size_t *resident) {
|
||||
cassert(config_stats);
|
||||
|
||||
pa_shard_stats_out->pac_stats.retained +=
|
||||
@ -149,6 +159,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
|
||||
&shard->hpa_shard.psset.slab_stats[i]);
|
||||
}
|
||||
malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
|
||||
sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,5 +193,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
|
||||
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
|
||||
&shard->hpa_shard.grow_mtx,
|
||||
arena_prof_mutex_hpa_shard_grow);
|
||||
sec_mutex_stats_read(tsdn, &shard->hpa_sec,
|
||||
&mutex_prof_data[arena_prof_mutex_hpa_sec]);
|
||||
}
|
||||
}
|
||||
|
@ -678,6 +678,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
|
||||
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
|
||||
i, &ninactive, size_t);
|
||||
|
||||
size_t sec_bytes;
|
||||
CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
|
||||
emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
|
||||
emitter_type_size, &sec_bytes);
|
||||
|
||||
emitter_table_printf(emitter,
|
||||
"HPA shard stats:\n"
|
||||
" In full slabs:\n"
|
||||
@ -1194,6 +1199,9 @@ stats_general_print(emitter_t *emitter) {
|
||||
OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
|
||||
OPT_WRITE_SIZE_T("hpa_small_max")
|
||||
OPT_WRITE_SIZE_T("hpa_large_min")
|
||||
OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
|
||||
OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
|
||||
OPT_WRITE_SIZE_T("hpa_sec_nshards")
|
||||
OPT_WRITE_CHAR_P("metadata_thp")
|
||||
OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
|
||||
OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
|
||||
|
@ -716,9 +716,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
||||
if (arena_nthreads_get(arena, false) == 0 &&
|
||||
!background_thread_enabled()) {
|
||||
/* Force purging when no threads assigned to the arena anymore. */
|
||||
arena_decay(tsd_tsdn(tsd), arena, false, true);
|
||||
arena_decay(tsd_tsdn(tsd), arena,
|
||||
/* is_background_thread */ false, /* all */ true);
|
||||
} else {
|
||||
arena_decay(tsd_tsdn(tsd), arena, false, false);
|
||||
arena_decay(tsd_tsdn(tsd), arena,
|
||||
/* is_background_thread */ false, /* all */ false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,6 +168,9 @@ TEST_BEGIN(test_mallctl_opt) {
|
||||
TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
|
||||
TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
|
||||
TEST_MALLCTL_OPT(unsigned, narenas, always);
|
||||
TEST_MALLCTL_OPT(const char *, percpu_arena, always);
|
||||
TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
|
||||
|
Loading…
Reference in New Issue
Block a user