HPA: Hugification hysteresis.

We wait a while after deciding a huge extent should get hugified to see if it
gets purged before long.  This avoids hugifying extents that might shortly get
dehugified for purging.

Rename and use the hpa_dehugification_threshold option support code for this,
since it's now ignored.
This commit is contained in:
David Goldblatt 2021-06-14 14:53:23 -07:00 committed by David Goldblatt
parent 113938b6f4
commit 6630c59896
9 changed files with 234 additions and 60 deletions

View File

@ -8,6 +8,7 @@ struct hpa_hooks_s {
void (*purge)(void *ptr, size_t size);
void (*hugify)(void *ptr, size_t size);
void (*dehugify)(void *ptr, size_t size);
void (*curtime)(nstime_t *r_time);
};
extern hpa_hooks_t hpa_hooks_default;

View File

@ -17,16 +17,13 @@ struct hpa_shard_opts_s {
* any allocation request.
*/
size_t slab_max_alloc;
/*
* When the number of active bytes in a hugepage is >=
* hugification_threshold, we force hugify it.
*/
size_t hugification_threshold;
/*
* When the number of dirty bytes in a hugepage is >=
* dehugification_threshold, we force dehugify it.
*/
size_t dehugification_threshold;
/*
* The HPA purges whenever the number of pages exceeds dirty_mult *
* active_pages. This may be set to (fxp_t)-1 to disable purging.
@ -40,6 +37,12 @@ struct hpa_shard_opts_s {
* ourselves for encapsulation purposes.
*/
bool deferral_allowed;
/*
* How long a hugepage has to be a hugification candidate before it will
* actually get hugified.
*/
uint64_t hugify_delay_ms;
};
#define HPA_SHARD_OPTS_DEFAULT { \
@ -47,8 +50,6 @@ struct hpa_shard_opts_s {
64 * 1024, \
/* hugification_threshold */ \
HUGEPAGE * 95 / 100, \
/* dehugification_threshold */ \
HUGEPAGE * 20 / 100, \
/* dirty_mult */ \
FXP_INIT_PERCENT(25), \
/* \
@ -58,7 +59,9 @@ struct hpa_shard_opts_s {
* or by an hpa_shard_set_deferral_allowed call, so the value \
* we put here doesn't matter. \
*/ \
false \
false, \
/* hugify_delay_ms */ \
10 * 1000 \
}
#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */

View File

@ -61,6 +61,8 @@ struct hpdata_s {
/* And with hugifying. */
bool h_hugify_allowed;
/* When we became a hugification candidate. */
nstime_t h_time_hugify_allowed;
bool h_in_psset_hugify_container;
/* Whether or not a purge or hugify is currently happening. */
@ -175,8 +177,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
static inline void
hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
assert(purge_allowed == false || !hpdata->h_mid_purge);
hpdata->h_purge_allowed = purge_allowed;
assert(purge_allowed == false || !hpdata->h_mid_purge);
hpdata->h_purge_allowed = purge_allowed;
}
static inline bool
@ -185,9 +187,20 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
}
static inline void
hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
assert(hugify_allowed == false || !hpdata->h_mid_hugify);
hpdata->h_hugify_allowed = hugify_allowed;
hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
assert(!hpdata->h_mid_hugify);
hpdata->h_hugify_allowed = true;
hpdata->h_time_hugify_allowed = now;
}
static inline nstime_t
hpdata_time_hugify_allowed(hpdata_t *hpdata) {
return hpdata->h_time_hugify_allowed;
}
static inline void
hpdata_disallow_hugify(hpdata_t *hpdata) {
hpdata->h_hugify_allowed = false;
}
static inline bool

View File

@ -96,7 +96,7 @@ CTL_PROTO(opt_confirm_conf)
CTL_PROTO(opt_hpa)
CTL_PROTO(opt_hpa_slab_max_alloc)
CTL_PROTO(opt_hpa_hugification_threshold)
CTL_PROTO(opt_hpa_dehugification_threshold)
CTL_PROTO(opt_hpa_hugify_delay_ms)
CTL_PROTO(opt_hpa_dirty_mult)
CTL_PROTO(opt_hpa_sec_nshards)
CTL_PROTO(opt_hpa_sec_max_alloc)
@ -406,8 +406,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
{NAME("hpa_hugification_threshold"),
CTL(opt_hpa_hugification_threshold)},
{NAME("hpa_dehugification_threshold"),
CTL(opt_hpa_dehugification_threshold)},
{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
{NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
{NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
@ -2114,8 +2113,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
opt_hpa_opts.hugification_threshold, size_t)
CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
opt_hpa_opts.dehugification_threshold, size_t)
CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
/*
* This will have to change before we publicly document this option; fxp_t and
* its representation are internal implementation details.

View File

@ -198,7 +198,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (hpdata_changing_state_get(ps)) {
hpdata_purge_allowed_set(ps, false);
hpdata_hugify_allowed_set(ps, false);
hpdata_disallow_hugify(ps);
return;
}
/*
@ -226,7 +226,24 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
if (hpa_good_hugification_candidate(shard, ps)
&& !hpdata_huge_get(ps)) {
hpdata_hugify_allowed_set(ps, true);
nstime_t now;
shard->hooks.curtime(&now);
hpdata_allow_hugify(ps, now);
}
/*
* Once a hugepage has become eligible for hugification, we don't mark
* it as ineligible just because it stops meeting the criteria (this
* could lead to situations where a hugepage that spends most of its
* time meeting the criteria never quite getting hugified if there are
* intervening deallocations). The idea is that the hugification delay
* will allow them to get purged, reseting their "hugify-allowed" bit.
* If they don't get purged, then the hugification isn't hurting and
* might help. As an exception, we don't hugify hugepages that are now
* empty; it definitely doesn't help there until the hugepage gets
* reused, which is likely not for a while.
*/
if (hpdata_nactive_get(ps) == 0) {
hpdata_disallow_hugify(ps);
}
}
@ -309,7 +326,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
assert(hpdata_alloc_allowed_get(to_purge));
hpdata_mid_purge_set(to_purge, true);
hpdata_purge_allowed_set(to_purge, false);
hpdata_hugify_allowed_set(to_purge, false);
hpdata_disallow_hugify(to_purge);
/*
* Unlike with hugification (where concurrent
* allocations are allowed), concurrent allocation out
@ -386,6 +403,16 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
assert(hpdata_hugify_allowed_get(to_hugify));
assert(!hpdata_changing_state_get(to_hugify));
/* Make sure that it's been hugifiable for long enough. */
nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
nstime_t nstime;
shard->hooks.curtime(&nstime);
nstime_subtract(&nstime, &time_hugify_allowed);
uint64_t millis = nstime_msec(&nstime);
if (millis < shard->opts.hugify_delay_ms) {
return false;
}
/*
* Don't let anyone else purge or hugify this page while
* we're hugifying it (allocations and deallocations are
@ -394,7 +421,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
psset_update_begin(&shard->psset, to_hugify);
hpdata_mid_hugify_set(to_hugify, true);
hpdata_purge_allowed_set(to_hugify, false);
hpdata_hugify_allowed_set(to_hugify, false);
hpdata_disallow_hugify(to_hugify);
assert(hpdata_alloc_allowed_get(to_hugify));
psset_update_end(&shard->psset, to_hugify);
@ -421,9 +448,6 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
static void
hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
bool forced) {
bool hugified;
bool purged;
size_t nloop = 0;
malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (!forced && shard->opts.deferral_allowed) {
return;
@ -433,16 +457,29 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
* be done. Otherwise, bound latency to not be *too* bad by doing at
* most a small fixed number of operations.
*/
size_t maxloops = (forced ? (size_t)-1 : 8);
bool hugified = false;
bool purged = false;
size_t max_ops = (forced ? (size_t)-1 : 16);
size_t nops = 0;
do {
hugified = hpa_try_hugify(tsdn, shard);
malloc_mutex_assert_owner(tsdn, &shard->mtx);
/*
* Always purge before hugifying, to make sure we get some
* ability to hit our quiescence targets.
*/
purged = false;
if (hpa_should_purge(tsdn, shard)) {
while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
purged = hpa_try_purge(tsdn, shard);
if (purged) {
nops++;
}
}
hugified = hpa_try_hugify(tsdn, shard);
if (hugified) {
nops++;
}
malloc_mutex_assert_owner(tsdn, &shard->mtx);
} while ((hugified || purged) && nloop++ < maxloops);
malloc_mutex_assert_owner(tsdn, &shard->mtx);
} while ((hugified || purged) && nops < max_ops);
}
static edata_t *

View File

@ -8,6 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
static void hpa_hooks_purge(void *ptr, size_t size);
static void hpa_hooks_hugify(void *ptr, size_t size);
static void hpa_hooks_dehugify(void *ptr, size_t size);
static void hpa_hooks_curtime(nstime_t *r_nstime);
hpa_hooks_t hpa_hooks_default = {
&hpa_hooks_map,
@ -15,6 +16,7 @@ hpa_hooks_t hpa_hooks_default = {
&hpa_hooks_purge,
&hpa_hooks_hugify,
&hpa_hooks_dehugify,
&hpa_hooks_curtime,
};
static void *
@ -44,3 +46,8 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
bool err = pages_nohuge(ptr, size);
(void)err;
}
static void
hpa_hooks_curtime(nstime_t *r_nstime) {
nstime_update(r_nstime);
}

View File

@ -1145,6 +1145,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip) \
CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max, \
check_min, check_max, clip)
#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
CONF_HANDLE_T_U(uint64_t, o, n, min, max, \
check_min, check_max, clip)
#define CONF_HANDLE_SSIZE_T(o, n, min, max) \
CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max, \
CONF_CHECK_MIN, CONF_CHECK_MAX, false)
@ -1441,26 +1444,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_CONTINUE;
}
/* And the same for the dehugification_threhsold. */
CONF_HANDLE_SIZE_T(
opt_hpa_opts.dehugification_threshold,
"hpa_dehugification_threshold", PAGE, HUGEPAGE,
CONF_CHECK_MIN, CONF_CHECK_MAX, true);
if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
fxp_t ratio;
char *end;
bool err = fxp_parse(&ratio, v,
&end);
if (err || (size_t)(end - v) != vlen
|| ratio > FXP_INIT_INT(1)) {
CONF_ERROR("Invalid conf value",
k, klen, v, vlen);
} else {
opt_hpa_opts.dehugification_threshold =
fxp_mul_frac(HUGEPAGE, ratio);
}
CONF_CONTINUE;
}
opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
if (CONF_MATCH("hpa_dirty_mult")) {
if (CONF_MATCH_VALUE("-1")) {

View File

@ -1376,7 +1376,7 @@ stats_general_print(emitter_t *emitter) {
uint64_t u64v;
int64_t i64v;
ssize_t ssv, ssv2;
size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
bsz = sizeof(bool);
usz = sizeof(unsigned);
@ -1385,6 +1385,7 @@ stats_general_print(emitter_t *emitter) {
cpsz = sizeof(const char *);
u32sz = sizeof(uint32_t);
i64sz = sizeof(int64_t);
u64sz = sizeof(uint64_t);
CTL_GET("version", &cpv, const char *);
emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@ -1442,6 +1443,8 @@ stats_general_print(emitter_t *emitter) {
#define OPT_WRITE_INT64(name) \
OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
#define OPT_WRITE_UINT64(name) \
OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
#define OPT_WRITE_SIZE_T(name) \
OPT_WRITE(name, sv, ssz, emitter_type_size)
@ -1468,7 +1471,7 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_BOOL("hpa")
OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
OPT_WRITE_SIZE_T("hpa_hugification_threshold")
OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
OPT_WRITE_UINT64("hpa_hugify_delay_ms")
if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
== 0) {
/*

View File

@ -19,8 +19,21 @@ struct test_data_s {
emap_t emap;
};
static hpa_shard_opts_t test_hpa_shard_opts_default = {
/* slab_max_alloc */
ALLOC_MAX,
/* hugification threshold */
HUGEPAGE,
/* dirty_mult */
FXP_INIT_PERCENT(25),
/* deferral_allowed */
false,
/* hugify_delay_ms */
10 * 1000,
};
static hpa_shard_t *
create_test_data() {
create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
bool err;
base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
&ehooks_default_extent_hooks);
@ -37,12 +50,9 @@ create_test_data() {
err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
assert_false(err, "");
hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT;
opts.slab_max_alloc = ALLOC_MAX;
err = hpa_shard_init(&test_data->shard, &test_data->emap,
test_data->base, &test_data->shard_edata_cache, SHARD_IND,
&hpa_hooks_default, &opts);
hooks, opts);
assert_false(err, "");
return (hpa_shard_t *)test_data;
@ -58,7 +68,8 @@ destroy_test_data(hpa_shard_t *shard) {
TEST_BEGIN(test_alloc_max) {
test_skip_if(!hpa_supported());
hpa_shard_t *shard = create_test_data();
hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
&test_hpa_shard_opts_default);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
edata_t *edata;
@ -134,7 +145,8 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
TEST_BEGIN(test_stress) {
test_skip_if(!hpa_supported());
hpa_shard_t *shard = create_test_data();
hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
&test_hpa_shard_opts_default);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
@ -224,7 +236,8 @@ expect_contiguous(edata_t **edatas, size_t nedatas) {
TEST_BEGIN(test_alloc_dalloc_batch) {
test_skip_if(!hpa_supported());
hpa_shard_t *shard = create_test_data();
hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
&test_hpa_shard_opts_default);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
enum {NALLOCS = 8};
@ -282,6 +295,117 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
}
TEST_END
static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
static void *
defer_test_map(size_t size) {
void *result = (void *)defer_bump_ptr;
defer_bump_ptr += size;
return result;
}
static void
defer_test_unmap(void *ptr, size_t size) {
(void)ptr;
(void)size;
}
static bool defer_purge_called = false;
static void
defer_test_purge(void *ptr, size_t size) {
(void)ptr;
(void)size;
defer_purge_called = true;
}
static bool defer_hugify_called = false;
static void
defer_test_hugify(void *ptr, size_t size) {
defer_hugify_called = true;
}
static bool defer_dehugify_called = false;
static void
defer_test_dehugify(void *ptr, size_t size) {
defer_dehugify_called = true;
}
static nstime_t defer_curtime;
static void
defer_test_curtime(nstime_t *r_time) {
*r_time = defer_curtime;
}
TEST_BEGIN(test_defer_time) {
test_skip_if(!hpa_supported());
hpa_hooks_t hooks;
hooks.map = &defer_test_map;
hooks.unmap = &defer_test_unmap;
hooks.purge = &defer_test_purge;
hooks.hugify = &defer_test_hugify;
hooks.dehugify = &defer_test_dehugify;
hooks.curtime = &defer_test_curtime;
hpa_shard_opts_t opts = test_hpa_shard_opts_default;
opts.deferral_allowed = true;
hpa_shard_t *shard = create_test_data(&hooks, &opts);
nstime_init(&defer_curtime, 0);
tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
edata_t *edatas[HUGEPAGE_PAGES];
for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_false(defer_hugify_called, "Hugified too early");
/* Hugification delay is set to 10 seconds in options. */
nstime_init2(&defer_curtime, 11, 0);
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(defer_hugify_called, "Failed to hugify");
defer_hugify_called = false;
/* Purge. Recall that dirty_mult is .25. */
for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
pai_dalloc(tsdn, &shard->pai, edatas[i]);
}
hpa_shard_do_deferred_work(tsdn, shard);
expect_false(defer_hugify_called, "Hugified too early");
expect_true(defer_dehugify_called, "Should have dehugified");
expect_true(defer_purge_called, "Should have purged");
defer_hugify_called = false;
defer_dehugify_called = false;
defer_purge_called = false;
/*
* Refill the page. We now meet the hugification threshold; we should
* be marked for pending hugify.
*/
for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
expect_ptr_not_null(edatas[i], "Unexpected null edata");
}
/*
* We would be ineligible for hugification, had we not already met the
* threshold before dipping below it.
*/
pai_dalloc(tsdn, &shard->pai, edatas[0]);
/* Wait for the threshold again. */
nstime_init2(&defer_curtime, 22, 0);
hpa_shard_do_deferred_work(tsdn, shard);
expect_true(defer_hugify_called, "Hugified too early");
expect_false(defer_dehugify_called, "Unexpected dehugify");
expect_false(defer_purge_called, "Unexpected purge");
destroy_test_data(shard);
}
TEST_END
int
main(void) {
/*
@ -299,5 +423,6 @@ main(void) {
return test_no_reentrancy(
test_alloc_max,
test_stress,
test_alloc_dalloc_batch);
test_alloc_dalloc_batch,
test_defer_time);
}