HPA: Do deferred operations on background threads.

This commit is contained in:
David Goldblatt 2021-06-07 11:45:57 -07:00 committed by David Goldblatt
parent 583284f2d9
commit 1d4a7666d5
12 changed files with 256 additions and 7 deletions

View File

@ -221,6 +221,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/hash.c \ $(srcroot)test/unit/hash.c \
$(srcroot)test/unit/hook.c \ $(srcroot)test/unit/hook.c \
$(srcroot)test/unit/hpa.c \ $(srcroot)test/unit/hpa.c \
$(srcroot)test/unit/hpa_background_thread.c \
$(srcroot)test/unit/hpa_central.c \ $(srcroot)test/unit/hpa_central.c \
$(srcroot)test/unit/hpdata.c \ $(srcroot)test/unit/hpdata.c \
$(srcroot)test/unit/huge.c \ $(srcroot)test/unit/huge.c \

View File

@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
extern bool opt_background_thread; extern bool opt_background_thread;
extern ssize_t opt_background_thread_hpa_interval_max_ms;
extern size_t opt_max_background_threads; extern size_t opt_max_background_threads;
extern malloc_mutex_t background_thread_lock; extern malloc_mutex_t background_thread_lock;
extern atomic_b_t background_thread_enabled_state; extern atomic_b_t background_thread_enabled_state;

View File

@ -11,6 +11,14 @@
#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT #define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
#define DEFAULT_NUM_BACKGROUND_THREAD 4 #define DEFAULT_NUM_BACKGROUND_THREAD 4
/*
* These exist only as a transitional state. Eventually, deferral should be
* part of the PAI, and each implementation can indicate wait times with more
* specificity.
*/
#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
typedef enum { typedef enum {
background_thread_stopped, background_thread_stopped,
background_thread_started, background_thread_started,

View File

@ -172,11 +172,21 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
*/ */
void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
bool *generated_dirty); bool *generated_dirty);
bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state, bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
ssize_t decay_ms, pac_purge_eagerness_t eagerness); ssize_t decay_ms, pac_purge_eagerness_t eagerness);
ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state); ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
/*
* Do deferred work on this PA shard.
*
* Morally, this should do both PAC decay and the HPA deferred work. For now,
* though, the arena, background thread, and PAC modules are tightly interwoven
* in a way that's tricky to extricate, so we only do the HPA-specific parts.
*/
void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
bool deferral_allowed);
void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
/******************************************************************************/ /******************************************************************************/
/* /*
* Various bits of "boring" functionality that are still part of this module, * Various bits of "boring" functionality that are still part of this module,

View File

@ -465,6 +465,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
void void
arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) { arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
arena_decay(tsdn, arena, true, false); arena_decay(tsdn, arena, true, false);
pa_shard_do_deferred_work(tsdn, &arena->pa_shard);
} }
void void

View File

@ -13,6 +13,13 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
/* Read-only after initialization. */ /* Read-only after initialization. */
bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1; size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
/*
* This is disabled (and set to -1) if the HPA is. If the HPA is enabled,
* malloc_conf initialization sets it to
* BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED.
*/
ssize_t opt_background_thread_hpa_interval_max_ms =
BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED;
/* Used for thread creation, termination and stats. */ /* Used for thread creation, termination and stats. */
malloc_mutex_t background_thread_lock; malloc_mutex_t background_thread_lock;
@ -209,7 +216,20 @@ arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
i2 = arena_decay_compute_purge_interval_impl(tsdn, i2 = arena_decay_compute_purge_interval_impl(tsdn,
&arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy); &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
return i1 < i2 ? i1 : i2; uint64_t min_so_far = i1 < i2 ? i1 : i2;
if (opt_background_thread_hpa_interval_max_ms >= 0) {
uint64_t hpa_interval = 1000 * 1000 *
(uint64_t)opt_background_thread_hpa_interval_max_ms;
if (hpa_interval < min_so_far) {
if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
} else {
min_so_far = hpa_interval;
}
}
}
return min_so_far;
} }
static void static void
@ -607,16 +627,16 @@ background_threads_enable(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
VARIABLE_ARRAY(bool, marked, max_background_threads); VARIABLE_ARRAY(bool, marked, max_background_threads);
unsigned i, nmarked; unsigned nmarked;
for (i = 0; i < max_background_threads; i++) { for (unsigned i = 0; i < max_background_threads; i++) {
marked[i] = false; marked[i] = false;
} }
nmarked = 0; nmarked = 0;
/* Thread 0 is required and created at the end. */ /* Thread 0 is required and created at the end. */
marked[0] = true; marked[0] = true;
/* Mark the threads we need to create for thread 0. */ /* Mark the threads we need to create for thread 0. */
unsigned n = narenas_total_get(); unsigned narenas = narenas_total_get();
for (i = 1; i < n; i++) { for (unsigned i = 1; i < narenas; i++) {
if (marked[i % max_background_threads] || if (marked[i % max_background_threads] ||
arena_get(tsd_tsdn(tsd), i, false) == NULL) { arena_get(tsd_tsdn(tsd), i, false) == NULL) {
continue; continue;
@ -633,7 +653,18 @@ background_threads_enable(tsd_t *tsd) {
} }
} }
return background_thread_create_locked(tsd, 0); bool err = background_thread_create_locked(tsd, 0);
if (err) {
return true;
}
for (unsigned i = 0; i < narenas; i++) {
arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
if (arena != NULL) {
pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
&arena->pa_shard, true);
}
}
return false;
} }
bool bool
@ -647,6 +678,14 @@ background_threads_disable(tsd_t *tsd) {
return true; return true;
} }
assert(n_background_threads == 0); assert(n_background_threads == 0);
unsigned narenas = narenas_total_get();
for (unsigned i = 0; i < narenas; i++) {
arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
if (arena != NULL) {
pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
&arena->pa_shard, false);
}
}
return false; return false;
} }

View File

@ -111,6 +111,7 @@ CTL_PROTO(opt_percpu_arena)
CTL_PROTO(opt_oversize_threshold) CTL_PROTO(opt_oversize_threshold)
CTL_PROTO(opt_background_thread) CTL_PROTO(opt_background_thread)
CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_max_background_threads)
CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_dirty_decay_ms)
CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_muzzy_decay_ms)
CTL_PROTO(opt_stats_print) CTL_PROTO(opt_stats_print)
@ -423,6 +424,8 @@ static const ctl_named_node_t opt_node[] = {
{NAME("oversize_threshold"), CTL(opt_oversize_threshold)}, {NAME("oversize_threshold"), CTL(opt_oversize_threshold)},
{NAME("background_thread"), CTL(opt_background_thread)}, {NAME("background_thread"), CTL(opt_background_thread)},
{NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("max_background_threads"), CTL(opt_max_background_threads)},
{NAME("background_thread_hpa_interval_max_ms"),
CTL(opt_background_thread_hpa_interval_max_ms)},
{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
{NAME("stats_print"), CTL(opt_stats_print)}, {NAME("stats_print"), CTL(opt_stats_print)},
@ -2139,6 +2142,8 @@ CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t) CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
CTL_RO_NL_GEN(opt_background_thread_hpa_interval_max_ms,
opt_background_thread_hpa_interval_max_ms, ssize_t)
CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)

View File

@ -1410,6 +1410,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_CHECK_MIN, CONF_CHECK_MAX, CONF_CHECK_MIN, CONF_CHECK_MAX,
true); true);
CONF_HANDLE_BOOL(opt_hpa, "hpa") CONF_HANDLE_BOOL(opt_hpa, "hpa")
CONF_HANDLE_SSIZE_T(
opt_background_thread_hpa_interval_max_ms,
"background_thread_hpa_interval_max_ms", -1,
SSIZE_MAX)
CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc, CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
"hpa_slab_max_alloc", PAGE, HUGEPAGE, "hpa_slab_max_alloc", PAGE, HUGEPAGE,
CONF_CHECK_MIN, CONF_CHECK_MAX, true); CONF_CHECK_MIN, CONF_CHECK_MAX, true);
@ -1659,6 +1663,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf); malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache, malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
NULL); NULL);
if (opt_hpa && opt_background_thread_hpa_interval_max_ms
== BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED) {
opt_background_thread_hpa_interval_max_ms =
BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED;
}
} }
#undef MALLOC_CONF_NSOURCES #undef MALLOC_CONF_NSOURCES

View File

@ -208,3 +208,15 @@ ssize_t
pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) { pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
return pac_decay_ms_get(&shard->pac, state); return pac_decay_ms_get(&shard->pac, state);
} }
void
pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
bool deferral_allowed) {
hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
deferral_allowed);
}
void
pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
}

View File

@ -1494,6 +1494,7 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra") OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
OPT_WRITE_CHAR_P("metadata_thp") OPT_WRITE_CHAR_P("metadata_thp")
OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms") OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
OPT_WRITE_SIZE_T("lg_extent_max_active_fit") OPT_WRITE_SIZE_T("lg_extent_max_active_fit")

View File

@ -0,0 +1,158 @@
#include "test/jemalloc_test.h"
#include "test/sleep.h"
static void
sleep_for_background_thread_interval() {
/*
* The sleep interval set in our .sh file is 50ms. So it should
* definitely run if we sleep for for times that.
*/
sleep_ns(200 * 1000 * 1000);
}
static unsigned
create_arena() {
unsigned arena_ind;
size_t sz;
sz = sizeof(unsigned);
expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
0, "Unexpected mallctl() failure");
return arena_ind;
}
static size_t
get_empty_ndirty(unsigned arena_ind) {
int err;
size_t ndirty_huge;
size_t ndirty_nonhuge;
uint64_t epoch = 1;
size_t sz = sizeof(epoch);
err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
sizeof(epoch));
expect_d_eq(0, err, "Unexpected mallctl() failure");
size_t mib[6];
size_t miblen = sizeof(mib)/sizeof(mib[0]);
err = mallctlnametomib(
"stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
&miblen);
expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
sz = sizeof(ndirty_nonhuge);
mib[2] = arena_ind;
err = mallctlbymib(mib, miblen, &ndirty_nonhuge, &sz, NULL, 0);
expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
err = mallctlnametomib(
"stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
&miblen);
expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
sz = sizeof(ndirty_huge);
mib[2] = arena_ind;
err = mallctlbymib(mib, miblen, &ndirty_huge, &sz, NULL, 0);
expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
return ndirty_huge + ndirty_nonhuge;
}
static void
set_background_thread_enabled(bool enabled) {
int err;
err = je_mallctl("background_thread", NULL, NULL, &enabled,
sizeof(enabled));
expect_d_eq(0, err, "Unexpected mallctl failure");
}
static void
expect_purging(unsigned arena_ind, bool expect_deferred) {
size_t empty_ndirty;
empty_ndirty = get_empty_ndirty(arena_ind);
expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
/*
* It's possible that we get unlucky with our stats collection timing,
* and the background thread runs in between the deallocation and the
* stats collection. So we retry 10 times, and see if we *ever* see
* deferred reclamation.
*/
bool observed_dirty_page = false;
for (int i = 0; i < 10; i++) {
void *ptr = mallocx(PAGE,
MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
empty_ndirty = get_empty_ndirty(arena_ind);
expect_zu_eq(0, empty_ndirty, "All pages should be active");
dallocx(ptr, MALLOCX_TCACHE_NONE);
empty_ndirty = get_empty_ndirty(arena_ind);
if (expect_deferred) {
expect_true(empty_ndirty == 0 || empty_ndirty == 1,
"Unexpected extra dirty page count: %zu",
empty_ndirty);
} else {
assert_zu_eq(0, empty_ndirty,
"Saw dirty pages without deferred purging");
}
if (empty_ndirty > 0) {
observed_dirty_page = true;
break;
}
}
expect_b_eq(expect_deferred, observed_dirty_page, "");
if (expect_deferred) {
sleep_for_background_thread_interval();
}
empty_ndirty = get_empty_ndirty(arena_ind);
expect_zu_eq(0, empty_ndirty, "Should have seen a background purge");
}
TEST_BEGIN(test_hpa_background_thread_purges) {
test_skip_if(!config_stats);
test_skip_if(!hpa_supported());
test_skip_if(!have_background_thread);
unsigned arena_ind = create_arena();
/*
* Our .sh sets dirty mult to 0, so all dirty pages should get purged
* any time any thread frees.
*/
expect_purging(arena_ind, /* expect_deferred */ true);
}
TEST_END
TEST_BEGIN(test_hpa_background_thread_enable_disable) {
test_skip_if(!config_stats);
test_skip_if(!hpa_supported());
test_skip_if(!have_background_thread);
unsigned arena_ind = create_arena();
set_background_thread_enabled(false);
expect_purging(arena_ind, false);
set_background_thread_enabled(true);
expect_purging(arena_ind, true);
}
TEST_END
int
main(void) {
/*
* OK, this is a sort of nasty hack. We don't want to add *another*
* config option for HPA (the intent is that it becomes available on
* more platforms over time, and we're trying to prune back config
* options generally. But we'll get initialization errors on other
* platforms if we set hpa:true in the MALLOC_CONF (even if we set
* abort_conf:false as well). So we reach into the internals and set
* them directly, but only if we know that we're actually going to do
* something nontrivial in the tests.
*/
if (config_stats && hpa_supported() && have_background_thread) {
opt_hpa = true;
opt_background_thread = true;
}
return test_no_reentrancy(
test_hpa_background_thread_purges,
test_hpa_background_thread_enable_disable);
}

View File

@ -0,0 +1,4 @@
#!/bin/sh
export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"