From 6630c5989672cbbd5ec2369aaa46ce6f5ce1ed4e Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Mon, 14 Jun 2021 14:53:23 -0700 Subject: [PATCH] HPA: Hugification hysteresis. We wait a while after deciding a huge extent should get hugified to see if it gets purged before long. This avoids hugifying extents that might shortly get dehugified for purging. Rename and use the hpa_dehugification_threshold option support code for this, since it's now ignored. --- include/jemalloc/internal/hpa_hooks.h | 1 + include/jemalloc/internal/hpa_opts.h | 19 ++-- include/jemalloc/internal/hpdata.h | 23 ++++- src/ctl.c | 9 +- src/hpa.c | 61 ++++++++--- src/hpa_hooks.c | 7 ++ src/jemalloc.c | 24 +---- src/stats.c | 7 +- test/unit/hpa.c | 143 ++++++++++++++++++++++++-- 9 files changed, 234 insertions(+), 60 deletions(-) diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h index 5c5b5f67..3e21d855 100644 --- a/include/jemalloc/internal/hpa_hooks.h +++ b/include/jemalloc/internal/hpa_hooks.h @@ -8,6 +8,7 @@ struct hpa_hooks_s { void (*purge)(void *ptr, size_t size); void (*hugify)(void *ptr, size_t size); void (*dehugify)(void *ptr, size_t size); + void (*curtime)(nstime_t *r_time); }; extern hpa_hooks_t hpa_hooks_default; diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h index ef162193..2548f44f 100644 --- a/include/jemalloc/internal/hpa_opts.h +++ b/include/jemalloc/internal/hpa_opts.h @@ -17,16 +17,13 @@ struct hpa_shard_opts_s { * any allocation request. */ size_t slab_max_alloc; + /* * When the number of active bytes in a hugepage is >= * hugification_threshold, we force hugify it. */ size_t hugification_threshold; - /* - * When the number of dirty bytes in a hugepage is >= - * dehugification_threshold, we force dehugify it. - */ - size_t dehugification_threshold; + /* * The HPA purges whenever the number of pages exceeds dirty_mult * * active_pages. This may be set to (fxp_t)-1 to disable purging. @@ -40,6 +37,12 @@ struct hpa_shard_opts_s { * ourselves for encapsulation purposes. */ bool deferral_allowed; + + /* + * How long a hugepage has to be a hugification candidate before it will + * actually get hugified. + */ + uint64_t hugify_delay_ms; }; #define HPA_SHARD_OPTS_DEFAULT { \ @@ -47,8 +50,6 @@ struct hpa_shard_opts_s { 64 * 1024, \ /* hugification_threshold */ \ HUGEPAGE * 95 / 100, \ - /* dehugification_threshold */ \ - HUGEPAGE * 20 / 100, \ /* dirty_mult */ \ FXP_INIT_PERCENT(25), \ /* \ @@ -58,7 +59,9 @@ struct hpa_shard_opts_s { * or by an hpa_shard_set_deferral_allowed call, so the value \ * we put here doesn't matter. \ */ \ - false \ + false, \ + /* hugify_delay_ms */ \ + 10 * 1000 \ } #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */ diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h index 32e26248..2a12add9 100644 --- a/include/jemalloc/internal/hpdata.h +++ b/include/jemalloc/internal/hpdata.h @@ -61,6 +61,8 @@ struct hpdata_s { /* And with hugifying. */ bool h_hugify_allowed; + /* When we became a hugification candidate. */ + nstime_t h_time_hugify_allowed; bool h_in_psset_hugify_container; /* Whether or not a purge or hugify is currently happening. */ @@ -175,8 +177,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) { static inline void hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) { - assert(purge_allowed == false || !hpdata->h_mid_purge); - hpdata->h_purge_allowed = purge_allowed; + assert(purge_allowed == false || !hpdata->h_mid_purge); + hpdata->h_purge_allowed = purge_allowed; } static inline bool @@ -185,9 +187,20 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) { } static inline void -hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) { - assert(hugify_allowed == false || !hpdata->h_mid_hugify); - hpdata->h_hugify_allowed = hugify_allowed; +hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) { + assert(!hpdata->h_mid_hugify); + hpdata->h_hugify_allowed = true; + hpdata->h_time_hugify_allowed = now; +} + +static inline nstime_t +hpdata_time_hugify_allowed(hpdata_t *hpdata) { + return hpdata->h_time_hugify_allowed; +} + +static inline void +hpdata_disallow_hugify(hpdata_t *hpdata) { + hpdata->h_hugify_allowed = false; } static inline bool diff --git a/src/ctl.c b/src/ctl.c index c66b4d8c..b3e62dfa 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -96,7 +96,7 @@ CTL_PROTO(opt_confirm_conf) CTL_PROTO(opt_hpa) CTL_PROTO(opt_hpa_slab_max_alloc) CTL_PROTO(opt_hpa_hugification_threshold) -CTL_PROTO(opt_hpa_dehugification_threshold) +CTL_PROTO(opt_hpa_hugify_delay_ms) CTL_PROTO(opt_hpa_dirty_mult) CTL_PROTO(opt_hpa_sec_nshards) CTL_PROTO(opt_hpa_sec_max_alloc) @@ -406,8 +406,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)}, {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)}, - {NAME("hpa_dehugification_threshold"), - CTL(opt_hpa_dehugification_threshold)}, + {NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)}, {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)}, {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)}, {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)}, @@ -2114,8 +2113,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool) CTL_RO_NL_GEN(opt_hpa_hugification_threshold, opt_hpa_opts.hugification_threshold, size_t) -CTL_RO_NL_GEN(opt_hpa_dehugification_threshold, - opt_hpa_opts.dehugification_threshold, size_t) +CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t) + /* * This will have to change before we publicly document this option; fxp_t and * its representation are internal implementation details. diff --git a/src/hpa.c b/src/hpa.c index 07ad117f..4ae30b97 100644 --- a/src/hpa.c +++ b/src/hpa.c @@ -198,7 +198,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard, malloc_mutex_assert_owner(tsdn, &shard->mtx); if (hpdata_changing_state_get(ps)) { hpdata_purge_allowed_set(ps, false); - hpdata_hugify_allowed_set(ps, false); + hpdata_disallow_hugify(ps); return; } /* @@ -226,7 +226,24 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0); if (hpa_good_hugification_candidate(shard, ps) && !hpdata_huge_get(ps)) { - hpdata_hugify_allowed_set(ps, true); + nstime_t now; + shard->hooks.curtime(&now); + hpdata_allow_hugify(ps, now); + } + /* + * Once a hugepage has become eligible for hugification, we don't mark + * it as ineligible just because it stops meeting the criteria (this + * could lead to situations where a hugepage that spends most of its + * time meeting the criteria never quite getting hugified if there are + * intervening deallocations). The idea is that the hugification delay + * will allow them to get purged, reseting their "hugify-allowed" bit. + * If they don't get purged, then the hugification isn't hurting and + * might help. As an exception, we don't hugify hugepages that are now + * empty; it definitely doesn't help there until the hugepage gets + * reused, which is likely not for a while. + */ + if (hpdata_nactive_get(ps) == 0) { + hpdata_disallow_hugify(ps); } } @@ -309,7 +326,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { assert(hpdata_alloc_allowed_get(to_purge)); hpdata_mid_purge_set(to_purge, true); hpdata_purge_allowed_set(to_purge, false); - hpdata_hugify_allowed_set(to_purge, false); + hpdata_disallow_hugify(to_purge); /* * Unlike with hugification (where concurrent * allocations are allowed), concurrent allocation out @@ -386,6 +403,16 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { assert(hpdata_hugify_allowed_get(to_hugify)); assert(!hpdata_changing_state_get(to_hugify)); + /* Make sure that it's been hugifiable for long enough. */ + nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify); + nstime_t nstime; + shard->hooks.curtime(&nstime); + nstime_subtract(&nstime, &time_hugify_allowed); + uint64_t millis = nstime_msec(&nstime); + if (millis < shard->opts.hugify_delay_ms) { + return false; + } + /* * Don't let anyone else purge or hugify this page while * we're hugifying it (allocations and deallocations are @@ -394,7 +421,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { psset_update_begin(&shard->psset, to_hugify); hpdata_mid_hugify_set(to_hugify, true); hpdata_purge_allowed_set(to_hugify, false); - hpdata_hugify_allowed_set(to_hugify, false); + hpdata_disallow_hugify(to_hugify); assert(hpdata_alloc_allowed_get(to_hugify)); psset_update_end(&shard->psset, to_hugify); @@ -421,9 +448,6 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { static void hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard, bool forced) { - bool hugified; - bool purged; - size_t nloop = 0; malloc_mutex_assert_owner(tsdn, &shard->mtx); if (!forced && shard->opts.deferral_allowed) { return; @@ -433,16 +457,29 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard, * be done. Otherwise, bound latency to not be *too* bad by doing at * most a small fixed number of operations. */ - size_t maxloops = (forced ? (size_t)-1 : 8); + bool hugified = false; + bool purged = false; + size_t max_ops = (forced ? (size_t)-1 : 16); + size_t nops = 0; do { - hugified = hpa_try_hugify(tsdn, shard); - malloc_mutex_assert_owner(tsdn, &shard->mtx); + /* + * Always purge before hugifying, to make sure we get some + * ability to hit our quiescence targets. + */ purged = false; - if (hpa_should_purge(tsdn, shard)) { + while (hpa_should_purge(tsdn, shard) && nops < max_ops) { purged = hpa_try_purge(tsdn, shard); + if (purged) { + nops++; + } + } + hugified = hpa_try_hugify(tsdn, shard); + if (hugified) { + nops++; } malloc_mutex_assert_owner(tsdn, &shard->mtx); - } while ((hugified || purged) && nloop++ < maxloops); + malloc_mutex_assert_owner(tsdn, &shard->mtx); + } while ((hugified || purged) && nops < max_ops); } static edata_t * diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c index 7e07c31a..6f377613 100644 --- a/src/hpa_hooks.c +++ b/src/hpa_hooks.c @@ -8,6 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size); static void hpa_hooks_purge(void *ptr, size_t size); static void hpa_hooks_hugify(void *ptr, size_t size); static void hpa_hooks_dehugify(void *ptr, size_t size); +static void hpa_hooks_curtime(nstime_t *r_nstime); hpa_hooks_t hpa_hooks_default = { &hpa_hooks_map, @@ -15,6 +16,7 @@ hpa_hooks_t hpa_hooks_default = { &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify, + &hpa_hooks_curtime, }; static void * @@ -44,3 +46,8 @@ hpa_hooks_dehugify(void *ptr, size_t size) { bool err = pages_nohuge(ptr, size); (void)err; } + +static void +hpa_hooks_curtime(nstime_t *r_nstime) { + nstime_update(r_nstime); +} diff --git a/src/jemalloc.c b/src/jemalloc.c index 5adb5637..71efcb61 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1145,6 +1145,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], #define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip) \ CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max, \ check_min, check_max, clip) +#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\ + CONF_HANDLE_T_U(uint64_t, o, n, min, max, \ + check_min, check_max, clip) #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max, \ CONF_CHECK_MIN, CONF_CHECK_MAX, false) @@ -1441,26 +1444,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], CONF_CONTINUE; } - /* And the same for the dehugification_threhsold. */ CONF_HANDLE_SIZE_T( - opt_hpa_opts.dehugification_threshold, - "hpa_dehugification_threshold", PAGE, HUGEPAGE, - CONF_CHECK_MIN, CONF_CHECK_MAX, true); - if (CONF_MATCH("hpa_dehugification_threshold_ratio")) { - fxp_t ratio; - char *end; - bool err = fxp_parse(&ratio, v, - &end); - if (err || (size_t)(end - v) != vlen - || ratio > FXP_INIT_INT(1)) { - CONF_ERROR("Invalid conf value", - k, klen, v, vlen); - } else { - opt_hpa_opts.dehugification_threshold = - fxp_mul_frac(HUGEPAGE, ratio); - } - CONF_CONTINUE; - } + opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms", + 0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true); if (CONF_MATCH("hpa_dirty_mult")) { if (CONF_MATCH_VALUE("-1")) { diff --git a/src/stats.c b/src/stats.c index 4e6c3922..16aa3fd4 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1376,7 +1376,7 @@ stats_general_print(emitter_t *emitter) { uint64_t u64v; int64_t i64v; ssize_t ssv, ssv2; - size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz; + size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz; bsz = sizeof(bool); usz = sizeof(unsigned); @@ -1385,6 +1385,7 @@ stats_general_print(emitter_t *emitter) { cpsz = sizeof(const char *); u32sz = sizeof(uint32_t); i64sz = sizeof(int64_t); + u64sz = sizeof(uint64_t); CTL_GET("version", &cpv, const char *); emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv); @@ -1442,6 +1443,8 @@ stats_general_print(emitter_t *emitter) { #define OPT_WRITE_INT64(name) \ OPT_WRITE(name, i64v, i64sz, emitter_type_int64) +#define OPT_WRITE_UINT64(name) \ + OPT_WRITE(name, u64v, u64sz, emitter_type_uint64) #define OPT_WRITE_SIZE_T(name) \ OPT_WRITE(name, sv, ssz, emitter_type_size) @@ -1468,7 +1471,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("hpa") OPT_WRITE_SIZE_T("hpa_slab_max_alloc") OPT_WRITE_SIZE_T("hpa_hugification_threshold") - OPT_WRITE_SIZE_T("hpa_dehugification_threshold") + OPT_WRITE_UINT64("hpa_hugify_delay_ms") if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0) == 0) { /* diff --git a/test/unit/hpa.c b/test/unit/hpa.c index 0558680f..a9e551fc 100644 --- a/test/unit/hpa.c +++ b/test/unit/hpa.c @@ -19,8 +19,21 @@ struct test_data_s { emap_t emap; }; +static hpa_shard_opts_t test_hpa_shard_opts_default = { + /* slab_max_alloc */ + ALLOC_MAX, + /* hugification threshold */ + HUGEPAGE, + /* dirty_mult */ + FXP_INIT_PERCENT(25), + /* deferral_allowed */ + false, + /* hugify_delay_ms */ + 10 * 1000, +}; + static hpa_shard_t * -create_test_data() { +create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) { bool err; base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND, &ehooks_default_extent_hooks); @@ -37,12 +50,9 @@ create_test_data() { err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false); assert_false(err, ""); - hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT; - opts.slab_max_alloc = ALLOC_MAX; - err = hpa_shard_init(&test_data->shard, &test_data->emap, test_data->base, &test_data->shard_edata_cache, SHARD_IND, - &hpa_hooks_default, &opts); + hooks, opts); assert_false(err, ""); return (hpa_shard_t *)test_data; @@ -58,7 +68,8 @@ destroy_test_data(hpa_shard_t *shard) { TEST_BEGIN(test_alloc_max) { test_skip_if(!hpa_supported()); - hpa_shard_t *shard = create_test_data(); + hpa_shard_t *shard = create_test_data(&hpa_hooks_default, + &test_hpa_shard_opts_default); tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); edata_t *edata; @@ -134,7 +145,8 @@ node_remove(mem_tree_t *tree, edata_t *edata) { TEST_BEGIN(test_stress) { test_skip_if(!hpa_supported()); - hpa_shard_t *shard = create_test_data(); + hpa_shard_t *shard = create_test_data(&hpa_hooks_default, + &test_hpa_shard_opts_default); tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); @@ -224,7 +236,8 @@ expect_contiguous(edata_t **edatas, size_t nedatas) { TEST_BEGIN(test_alloc_dalloc_batch) { test_skip_if(!hpa_supported()); - hpa_shard_t *shard = create_test_data(); + hpa_shard_t *shard = create_test_data(&hpa_hooks_default, + &test_hpa_shard_opts_default); tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); enum {NALLOCS = 8}; @@ -282,6 +295,117 @@ TEST_BEGIN(test_alloc_dalloc_batch) { } TEST_END +static uintptr_t defer_bump_ptr = HUGEPAGE * 123; +static void * +defer_test_map(size_t size) { + void *result = (void *)defer_bump_ptr; + defer_bump_ptr += size; + return result; +} + +static void +defer_test_unmap(void *ptr, size_t size) { + (void)ptr; + (void)size; +} + +static bool defer_purge_called = false; +static void +defer_test_purge(void *ptr, size_t size) { + (void)ptr; + (void)size; + defer_purge_called = true; +} + +static bool defer_hugify_called = false; +static void +defer_test_hugify(void *ptr, size_t size) { + defer_hugify_called = true; +} + +static bool defer_dehugify_called = false; +static void +defer_test_dehugify(void *ptr, size_t size) { + defer_dehugify_called = true; +} + +static nstime_t defer_curtime; +static void +defer_test_curtime(nstime_t *r_time) { + *r_time = defer_curtime; +} + +TEST_BEGIN(test_defer_time) { + test_skip_if(!hpa_supported()); + + hpa_hooks_t hooks; + hooks.map = &defer_test_map; + hooks.unmap = &defer_test_unmap; + hooks.purge = &defer_test_purge; + hooks.hugify = &defer_test_hugify; + hooks.dehugify = &defer_test_dehugify; + hooks.curtime = &defer_test_curtime; + + hpa_shard_opts_t opts = test_hpa_shard_opts_default; + opts.deferral_allowed = true; + + hpa_shard_t *shard = create_test_data(&hooks, &opts); + + nstime_init(&defer_curtime, 0); + tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); + edata_t *edatas[HUGEPAGE_PAGES]; + for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + hpa_shard_do_deferred_work(tsdn, shard); + expect_false(defer_hugify_called, "Hugified too early"); + + /* Hugification delay is set to 10 seconds in options. */ + nstime_init2(&defer_curtime, 11, 0); + hpa_shard_do_deferred_work(tsdn, shard); + expect_true(defer_hugify_called, "Failed to hugify"); + + defer_hugify_called = false; + + /* Purge. Recall that dirty_mult is .25. */ + for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) { + pai_dalloc(tsdn, &shard->pai, edatas[i]); + } + + hpa_shard_do_deferred_work(tsdn, shard); + + expect_false(defer_hugify_called, "Hugified too early"); + expect_true(defer_dehugify_called, "Should have dehugified"); + expect_true(defer_purge_called, "Should have purged"); + defer_hugify_called = false; + defer_dehugify_called = false; + defer_purge_called = false; + + /* + * Refill the page. We now meet the hugification threshold; we should + * be marked for pending hugify. + */ + for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) { + edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false); + expect_ptr_not_null(edatas[i], "Unexpected null edata"); + } + /* + * We would be ineligible for hugification, had we not already met the + * threshold before dipping below it. + */ + pai_dalloc(tsdn, &shard->pai, edatas[0]); + /* Wait for the threshold again. */ + nstime_init2(&defer_curtime, 22, 0); + hpa_shard_do_deferred_work(tsdn, shard); + expect_true(defer_hugify_called, "Hugified too early"); + expect_false(defer_dehugify_called, "Unexpected dehugify"); + expect_false(defer_purge_called, "Unexpected purge"); + + destroy_test_data(shard); +} +TEST_END + int main(void) { /* @@ -299,5 +423,6 @@ main(void) { return test_no_reentrancy( test_alloc_max, test_stress, - test_alloc_dalloc_batch); + test_alloc_dalloc_batch, + test_defer_time); }