From e4f090e8df5adf180662c5eeac2af214f9594de4 Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Fri, 16 Feb 2018 14:19:19 -0800 Subject: [PATCH] Add opt.thp which allows explicit hugepage usage. "always" marks all user mappings as MADV_HUGEPAGE; while "never" marks all mappings as MADV_NOHUGEPAGE. The default setting "default" does not change any settings. Note that all the madvise calls are part of the default extent hooks by design, so that customized extent hooks have complete control over the mappings including hugepage settings. --- doc/jemalloc.xml.in | 22 ++++++++ include/jemalloc/internal/pages.h | 16 +++++- src/base.c | 3 +- src/ctl.c | 3 ++ src/extent.c | 12 ++--- src/jemalloc.c | 24 ++++++++- src/pages.c | 88 +++++++++++++++++++++++++------ src/stats.c | 1 + test/unit/mallctl.c | 1 + test/unit/pages.c | 2 +- 10 files changed, 143 insertions(+), 29 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 4fdb53fc..9ecd8a1f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1217,6 +1217,28 @@ malloc_conf = "xmalloc:true";]]> default maximum is 32 KiB (2^15). + + + opt.thp + (const char *) + r- + + Transparent hugepage (THP) mode. Settings "always", + "never" and "default" are available if THP is supported by the operating + system. The "always" setting enables transparent hugepage for all user + memory mappings with + MADV_HUGEPAGE; "never" + ensures no transparent hugepage with + MADV_NOHUGEPAGE; the default + setting "default" makes no changes. Note that: this option does not + affect THP for jemalloc internal metadata (see opt.metadata_thp); + in addition, for arenas with customized extent_hooks, + this option is bypassed as it is implemented as part of the default + extent hooks. + + opt.prof diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h index dff20515..7dae633a 100644 --- a/include/jemalloc/internal/pages.h +++ b/include/jemalloc/internal/pages.h @@ -58,8 +58,19 @@ static const bool pages_can_purge_forced = #endif ; -/* Whether transparent huge page state is "madvise". */ -extern bool thp_state_madvise; +typedef enum { + thp_mode_default = 0, /* Do not change hugepage settings. */ + thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */ + thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */ + + thp_mode_names_limit = 3, /* Used for option processing. */ + thp_mode_not_supported = 3 /* No THP support detected. */ +} thp_mode_t; + +#define THP_MODE_DEFAULT thp_mode_default +extern thp_mode_t opt_thp; +extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */ +extern const char *thp_mode_names[]; void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); void pages_unmap(void *addr, size_t size); @@ -72,5 +83,6 @@ bool pages_nohuge(void *addr, size_t size); bool pages_dontdump(void *addr, size_t size); bool pages_dodump(void *addr, size_t size); bool pages_boot(void); +void pages_set_thp_state (void *ptr, size_t size); #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/src/base.c b/src/base.c index cc3d9781..bb897a25 100644 --- a/src/base.c +++ b/src/base.c @@ -24,7 +24,8 @@ const char *metadata_thp_mode_names[] = { static inline bool metadata_thp_madvise(void) { - return (metadata_thp_enabled() && thp_state_madvise); + return (metadata_thp_enabled() && + (init_system_thp_mode == thp_mode_default)); } static void * diff --git a/src/ctl.c b/src/ctl.c index 17672493..aaf6e35a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -94,6 +94,7 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_thp) CTL_PROTO(opt_lg_extent_max_active_fit) CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) @@ -292,6 +293,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("thp"), CTL(opt_thp)}, {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, @@ -1597,6 +1599,7 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *) CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, size_t) CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/extent.c b/src/extent.c index 517780ee..88d331f7 100644 --- a/src/extent.c +++ b/src/extent.c @@ -1173,11 +1173,12 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, static void * extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret; - - ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, + void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, commit, (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED)); + if (have_madvise_huge && ret) { + pages_set_thp_state(ret, size); + } return ret; } @@ -1266,9 +1267,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, void *ptr; if (*r_extent_hooks == &extent_hooks_default) { - ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE, - &zeroed, &committed, (dss_prec_t)atomic_load_u( - &arena->dss_prec, ATOMIC_RELAXED)); + ptr = extent_alloc_default_impl(tsdn, arena, NULL, + alloc_size, PAGE, &zeroed, &committed); } else { extent_hook_pre_reentrancy(tsdn, arena); ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL, diff --git a/src/jemalloc.c b/src/jemalloc.c index f4fd805e..4dde8fbc 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1152,9 +1152,8 @@ malloc_conf_init(void) { CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) if (strncmp("percpu_arena", k, klen) == 0) { - int i; bool match = false; - for (i = percpu_arena_mode_names_base; i < + for (int i = percpu_arena_mode_names_base; i < percpu_arena_mode_names_limit; i++) { if (strncmp(percpu_arena_mode_names[i], v, vlen) == 0) { @@ -1204,6 +1203,27 @@ malloc_conf_init(void) { continue; } } + if (CONF_MATCH("thp")) { + bool match = false; + for (int i = 0; i < thp_mode_names_limit; i++) { + if (strncmp(thp_mode_names[i],v, vlen) + == 0) { + if (!have_madvise_huge) { + malloc_conf_error( + "No THP support", + k, klen, v, vlen); + } + opt_thp = i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_MATCH diff --git a/src/pages.c b/src/pages.c index c839471f..82405219 100644 --- a/src/pages.c +++ b/src/pages.c @@ -28,7 +28,14 @@ static int mmap_flags; #endif static bool os_overcommits; -bool thp_state_madvise; +const char *thp_mode_names[] = { + "default", + "always", + "never", + "not supported" +}; +thp_mode_t opt_thp = THP_MODE_DEFAULT; +thp_mode_t init_system_thp_mode; /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ static bool pages_can_purge_lazy_runtime = true; @@ -307,11 +314,12 @@ pages_purge_forced(void *addr, size_t size) { #endif } -bool -pages_huge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); - +static bool +pages_huge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } #ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_HUGEPAGE) != 0); #else @@ -320,9 +328,21 @@ pages_huge(void *addr, size_t size) { } bool -pages_nohuge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); +pages_huge(void *addr, size_t size) { + return pages_huge_impl(addr, size, true); +} + +static bool +pages_huge_unaligned(void *addr, size_t size) { + return pages_huge_impl(addr, size, false); +} + +static bool +pages_nohuge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } #ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); @@ -331,6 +351,16 @@ pages_nohuge(void *addr, size_t size) { #endif } +bool +pages_nohuge(void *addr, size_t size) { + return pages_nohuge_impl(addr, size, true); +} + +static bool +pages_nohuge_unaligned(void *addr, size_t size) { + return pages_nohuge_impl(addr, size, false); +} + bool pages_dontdump(void *addr, size_t size) { assert(PAGE_ADDR2BASE(addr) == addr); @@ -469,6 +499,25 @@ os_overcommits_proc(void) { } #endif +void +pages_set_thp_state (void *ptr, size_t size) { + if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { + return; + } + assert(opt_thp != thp_mode_not_supported && + init_system_thp_mode != thp_mode_not_supported); + + if (opt_thp == thp_mode_always + && init_system_thp_mode != thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default); + pages_huge_unaligned(ptr, size); + } else if (opt_thp == thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default || + init_system_thp_mode == thp_mode_always); + pages_nohuge_unaligned(ptr, size); + } +} + static void init_thp_state(void) { if (!have_madvise_huge) { @@ -479,8 +528,10 @@ init_thp_state(void) { goto label_error; } - static const char madvise_state[] = "always [madvise] never\n"; - char buf[sizeof(madvise_state)]; + static const char sys_state_madvise[] = "always [madvise] never\n"; + static const char sys_state_always[] = "[always] madvise never\n"; + static const char sys_state_never[] = "always madvise [never]\n"; + char buf[sizeof(sys_state_madvise)]; #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) int fd = (int)syscall(SYS_open, @@ -504,15 +555,18 @@ init_thp_state(void) { close(fd); #endif - if (nread < 1) { + if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_default; + } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_always; + } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_never; + } else { goto label_error; } - if (strncmp(buf, madvise_state, (size_t)nread) == 0) { - thp_state_madvise = true; - return; - } + return; label_error: - thp_state_madvise = false; + opt_thp = init_system_thp_mode = thp_mode_not_supported; } bool diff --git a/src/stats.c b/src/stats.c index 11959cbe..9efb9a19 100644 --- a/src/stats.c +++ b/src/stats.c @@ -837,6 +837,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(xmalloc, ",") OPT_WRITE_BOOL(tcache, ",") OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_CHAR_P(thp, ",") OPT_WRITE_BOOL(prof, ",") OPT_WRITE_CHAR_P(prof_prefix, ",") OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index e812b52f..c9ba6c5d 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -174,6 +174,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, tcache, always); TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always); TEST_MALLCTL_OPT(size_t, lg_tcache_max, always); + TEST_MALLCTL_OPT(const char *, thp, always); TEST_MALLCTL_OPT(bool, prof, prof); TEST_MALLCTL_OPT(const char *, prof_prefix, prof); TEST_MALLCTL_OPT(bool, prof_active, prof); diff --git a/test/unit/pages.c b/test/unit/pages.c index 49ad0091..ee729eec 100644 --- a/test/unit/pages.c +++ b/test/unit/pages.c @@ -10,7 +10,7 @@ TEST_BEGIN(test_pages_huge) { pages = pages_map(NULL, alloc_size, PAGE, &commit); assert_ptr_not_null(pages, "Unexpected pages_map() error"); - if (thp_state_madvise) { + if (init_system_thp_mode == thp_mode_default) { hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE)); assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge, "Unexpected pages_huge() result");