PA: Use an SEC in fron of the HPA shard.

This commit is contained in:
David Goldblatt
2020-10-16 13:14:59 -07:00
committed by David Goldblatt
parent ea51e97bb8
commit 6599651aee
15 changed files with 141 additions and 41 deletions

View File

@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
cassert(config_stats);
arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
}
pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
estats, hpastats, &astats->resident);
estats, hpastats, secstats, &astats->resident);
LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
@@ -483,6 +483,14 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void
arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
if (all) {
/*
* We should take a purge of "all" to mean "save as much memory
* as possible", including flushing any caches (for situations
* like thread death, or manual purge calls).
*/
sec_flush(tsdn, &arena->pa_shard.hpa_sec);
}
if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
return;
}
@@ -631,7 +639,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
&arena->bins[i].bin_shards[j]);
}
}
pa_shard_reset(&arena->pa_shard);
pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
}
void
@@ -1362,7 +1370,7 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
}
/* No using the HPA now that we have the custom hooks. */
pa_shard_disable_hpa(&arena->pa_shard);
pa_shard_disable_hpa(tsd_tsdn(tsd), &arena->pa_shard);
extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
if (have_background_thread) {
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -1529,7 +1537,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
opt_hpa_small_max, opt_hpa_large_min)) {
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
goto label_error;
}
}
@@ -1658,16 +1667,21 @@ arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
void
arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
base_prefork(tsdn, arena->base);
pa_shard_prefork5(tsdn, &arena->pa_shard);
}
void
arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
malloc_mutex_prefork(tsdn, &arena->large_mtx);
base_prefork(tsdn, arena->base);
}
void
arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
malloc_mutex_prefork(tsdn, &arena->large_mtx);
}
void
arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
for (unsigned i = 0; i < SC_NBINS; i++) {
for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);

View File

@@ -95,6 +95,9 @@ CTL_PROTO(opt_hpa_slab_goal)
CTL_PROTO(opt_hpa_slab_max_alloc)
CTL_PROTO(opt_hpa_small_max)
CTL_PROTO(opt_hpa_large_min)
CTL_PROTO(opt_hpa_sec_max_alloc)
CTL_PROTO(opt_hpa_sec_max_bytes)
CTL_PROTO(opt_hpa_sec_nshards)
CTL_PROTO(opt_metadata_thp)
CTL_PROTO(opt_retain)
CTL_PROTO(opt_dss)
@@ -246,6 +249,7 @@ CTL_PROTO(stats_arenas_i_metadata_thp)
CTL_PROTO(stats_arenas_i_tcache_bytes)
CTL_PROTO(stats_arenas_i_resident)
CTL_PROTO(stats_arenas_i_abandoned_vm)
CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
INDEX_PROTO(stats_arenas_i)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
@@ -360,6 +364,9 @@ static const ctl_named_node_t opt_node[] = {
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
{NAME("hpa_small_max"), CTL(opt_hpa_small_max)},
{NAME("hpa_large_min"), CTL(opt_hpa_large_min)},
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
{NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
{NAME("retain"), CTL(opt_retain)},
{NAME("dss"), CTL(opt_dss)},
@@ -650,6 +657,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
{NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
{NAME("resident"), CTL(stats_arenas_i_resident)},
{NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
{NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
{NAME("small"), CHILD(named, stats_arenas_i_small)},
{NAME("large"), CHILD(named, stats_arenas_i_large)},
{NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
@@ -889,6 +897,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
sizeof(pac_estats_t));
memset(&ctl_arena->astats->hpastats, 0,
sizeof(hpa_shard_stats_t));
memset(&ctl_arena->astats->secstats, 0,
sizeof(sec_stats_t));
}
}
@@ -903,7 +913,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
&ctl_arena->pdirty, &ctl_arena->pmuzzy,
&ctl_arena->astats->astats, ctl_arena->astats->bstats,
ctl_arena->astats->lstats, ctl_arena->astats->estats,
&ctl_arena->astats->hpastats);
&ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
for (i = 0; i < SC_NBINS; i++) {
bin_stats_t *bstats =
@@ -1089,6 +1099,7 @@ MUTEX_PROF_ARENA_MUTEXES
&astats->hpastats.psset_slab_stats[i]);
}
sec_stats_accum(&sdstats->secstats, &astats->secstats);
}
}
@@ -1895,6 +1906,9 @@ CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
const char *)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
@@ -3114,6 +3128,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
&arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
ATOMIC_RELAXED), size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
arenas_i(mib[2])->astats->secstats.bytes, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
arenas_i(mib[2])->astats->allocated_small, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,

View File

@@ -411,12 +411,12 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
}
void
hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->grow_mtx);
}
void
hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->mtx);
}
@@ -433,7 +433,7 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
}
void
hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
malloc_mutex_prefork(tsdn, &hpa->mtx);
}

View File

@@ -141,6 +141,11 @@ size_t opt_hpa_slab_max_alloc = 256 * 1024;
size_t opt_hpa_small_max = 32 * 1024;
size_t opt_hpa_large_min = 4 * 1024 * 1024;
size_t opt_hpa_sec_max_alloc = 32 * 1024;
/* These settings correspond to a maximum of 1MB cached per arena. */
size_t opt_hpa_sec_max_bytes = 256 * 1024;
size_t opt_hpa_sec_nshards = 4;
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
@@ -1494,11 +1499,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
true)
CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
"hpa_slab_max_alloc", PAGE, 512 * PAGE,
CONF_CHECK_MIN, CONF_CHECK_MAX, true)
CONF_CHECK_MIN, CONF_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
if (CONF_MATCH("slab_sizes")) {
if (CONF_MATCH_VALUE("default")) {
@@ -1808,7 +1820,8 @@ malloc_init_hard_a0_locked() {
}
if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
opt_hpa_small_max, opt_hpa_large_min)) {
opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
return true;
}
}
@@ -4226,7 +4239,7 @@ _malloc_prefork(void)
background_thread_prefork1(tsd_tsdn(tsd));
}
/* Break arena prefork into stages to preserve lock order. */
for (i = 0; i < 8; i++) {
for (i = 0; i < 9; i++) {
for (j = 0; j < narenas; j++) {
if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
NULL) {
@@ -4255,12 +4268,15 @@ _malloc_prefork(void)
case 7:
arena_prefork7(tsd_tsdn(tsd), arena);
break;
case 8:
arena_prefork8(tsd_tsdn(tsd), arena);
break;
default: not_reached();
}
}
}
if (i == 3 && opt_hpa) {
hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
if (i == 4 && opt_hpa) {
hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
}
}

View File

@@ -49,7 +49,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
bool
pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
size_t ps_alloc_max, size_t small_max, size_t large_min) {
size_t ps_alloc_max, size_t small_max, size_t large_min,
size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
ps_goal &= ~PAGE_MASK;
ps_alloc_max &= ~PAGE_MASK;
@@ -60,6 +61,10 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
return true;
}
if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
sec_alloc_max, sec_bytes_max)) {
return true;
}
shard->ever_used_hpa = true;
atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
@@ -67,24 +72,27 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
}
void
pa_shard_disable_hpa(pa_shard_t *shard) {
pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
sec_disable(tsdn, &shard->hpa_sec);
}
void
pa_shard_reset(pa_shard_t *shard) {
pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
sec_flush(tsdn, &shard->hpa_sec);
}
void
pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
sec_flush(tsdn, &shard->hpa_sec);
pac_destroy(tsdn, &shard->pac);
}
static pai_t *
pa_get_pai(pa_shard_t *shard, edata_t *edata) {
return (edata_pai_get(edata) == EXTENT_PAI_PAC
? &shard->pac.pai : &shard->hpa_shard.pai);
? &shard->pac.pai : &shard->hpa_sec.pai);
}
edata_t *
@@ -95,7 +103,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
edata_t *edata = NULL;
if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
zero);
}
/*
@@ -173,6 +181,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
emap_deregister_interior(tsdn, shard->emap, edata);
edata_slab_set(edata, false);
}
edata_addr_set(edata, edata_base_get(edata));
edata_szind_set(edata, SC_NSIZES);
pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
pai_t *pai = pa_get_pai(shard, edata);

View File

@@ -16,17 +16,14 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
if (shard->ever_used_hpa) {
hpa_shard_prefork2(tsdn, &shard->hpa_shard);
sec_prefork2(tsdn, &shard->hpa_sec);
}
}
void
pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_prefork(tsdn, &shard->pac.ecache_dirty);
ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
ecache_prefork(tsdn, &shard->pac.ecache_retained);
malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
if (shard->ever_used_hpa) {
hpa_shard_prefork3(tsdn, &shard->hpa_shard);
}
@@ -34,6 +31,16 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
void
pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
ecache_prefork(tsdn, &shard->pac.ecache_dirty);
ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
ecache_prefork(tsdn, &shard->pac.ecache_retained);
if (shard->ever_used_hpa) {
hpa_shard_prefork4(tsdn, &shard->hpa_shard);
}
}
void
pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard) {
edata_cache_prefork(tsdn, &shard->edata_cache);
}
@@ -47,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
sec_postfork_parent(tsdn, &shard->hpa_sec);
hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
}
}
@@ -61,6 +69,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
if (shard->ever_used_hpa) {
sec_postfork_child(tsdn, &shard->hpa_sec);
hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
}
}
@@ -76,7 +85,8 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
void
pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
size_t *resident) {
cassert(config_stats);
pa_shard_stats_out->pac_stats.retained +=
@@ -149,6 +159,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
&shard->hpa_shard.psset.slab_stats[i]);
}
malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
}
}
@@ -182,5 +193,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
&shard->hpa_shard.grow_mtx,
arena_prof_mutex_hpa_shard_grow);
sec_mutex_stats_read(tsdn, &shard->hpa_sec,
&mutex_prof_data[arena_prof_mutex_hpa_sec]);
}
}

View File

@@ -678,6 +678,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
i, &ninactive, size_t);
size_t sec_bytes;
CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
emitter_type_size, &sec_bytes);
emitter_table_printf(emitter,
"HPA shard stats:\n"
" In full slabs:\n"
@@ -1194,6 +1199,9 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
OPT_WRITE_SIZE_T("hpa_small_max")
OPT_WRITE_SIZE_T("hpa_large_min")
OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
OPT_WRITE_SIZE_T("hpa_sec_nshards")
OPT_WRITE_CHAR_P("metadata_thp")
OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")

View File

@@ -716,9 +716,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
if (arena_nthreads_get(arena, false) == 0 &&
!background_thread_enabled()) {
/* Force purging when no threads assigned to the arena anymore. */
arena_decay(tsd_tsdn(tsd), arena, false, true);
arena_decay(tsd_tsdn(tsd), arena,
/* is_background_thread */ false, /* all */ true);
} else {
arena_decay(tsd_tsdn(tsd), arena, false, false);
arena_decay(tsd_tsdn(tsd), arena,
/* is_background_thread */ false, /* all */ false);
}
}