From 8fdd9a579779b84d6af27f94c295f82a4df8e5be Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Thu, 10 Aug 2017 13:14:26 -0700 Subject: [PATCH] Implement opt.metadata_thp This option enables transparent huge page for base allocators (require MADV_HUGEPAGE support). --- configure.ac | 3 ++ doc/jemalloc.xml.in | 17 +++++- include/jemalloc/internal/base_externs.h | 2 + include/jemalloc/internal/base_types.h | 2 + .../internal/jemalloc_internal_defs.h.in | 6 +++ include/jemalloc/internal/pages.h | 3 ++ src/base.c | 43 ++++++++++----- src/ctl.c | 3 ++ src/jemalloc.c | 1 + src/pages.c | 53 ++++++++++++++++++- src/stats.c | 1 + test/unit/mallctl.c | 1 + 12 files changed, 118 insertions(+), 17 deletions(-) diff --git a/configure.ac b/configure.ac index ba0409a5..e1a7343f 100644 --- a/configure.ac +++ b/configure.ac @@ -1824,6 +1824,9 @@ if test "x${je_cv_madvise}" = "xyes" ; then madvise((void *)0, 0, MADV_HUGEPAGE); madvise((void *)0, 0, MADV_NOHUGEPAGE); ], [je_cv_thp]) + if test "x${je_cv_thp}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ]) + fi fi dnl Enable transparent huge page support by default. diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 21e401ac..f1712f05 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -916,6 +916,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", + + + opt.metadata_thp + (bool) + r- + + If true, allow jemalloc to use transparent huge page + (THP) for internal metadata (see stats.metadata for details). This + option is disabled by default. + + opt.retain @@ -2187,7 +2199,10 @@ struct extent_hooks_s { metadata structures (see stats.arenas.<i>.base) and internal allocations (see stats.arenas.<i>.internal). + linkend="stats.arenas.i.internal">stats.arenas.<i>.internal). + Transparent huge page (enabled with opt.metadata_thp) usage is not + considered. diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h index a4fd5ac7..a5cb8a8d 100644 --- a/include/jemalloc/internal/base_externs.h +++ b/include/jemalloc/internal/base_externs.h @@ -1,6 +1,8 @@ #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H #define JEMALLOC_INTERNAL_BASE_EXTERNS_H +extern bool opt_metadata_thp; + base_t *b0get(void); base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); void base_delete(tsdn_t *tsdn, base_t *base); diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h index be7ee825..6e710334 100644 --- a/include/jemalloc/internal/base_types.h +++ b/include/jemalloc/internal/base_types.h @@ -4,4 +4,6 @@ typedef struct base_block_s base_block_t; typedef struct base_s base_t; +#define METADATA_THP_DEFAULT false + #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index b73daf04..5fa7f51f 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -260,6 +260,12 @@ /* Defined if madvise(2) is available. */ #undef JEMALLOC_HAVE_MADVISE +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#undef JEMALLOC_HAVE_MADVISE_HUGE + /* * Methods for purging unused pages differ between operating systems. * diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h index 28383b7f..121fff38 100644 --- a/include/jemalloc/internal/pages.h +++ b/include/jemalloc/internal/pages.h @@ -58,6 +58,9 @@ static const bool pages_can_purge_forced = #endif ; +/* Whether transparent huge page state is "madvise". */ +extern bool thp_state_madvise; + void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); void pages_unmap(void *addr, size_t size); bool pages_commit(void *addr, size_t size); diff --git a/src/base.c b/src/base.c index 97078b13..99259783 100644 --- a/src/base.c +++ b/src/base.c @@ -10,7 +10,9 @@ /******************************************************************************/ /* Data. */ -static base_t *b0; +static base_t *b0; + +bool opt_metadata_thp = METADATA_THP_DEFAULT; /******************************************************************************/ @@ -20,19 +22,26 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) bool zero = true; bool commit = true; + /* We use hugepage sizes regardless of opt_metadata_thp. */ assert(size == HUGEPAGE_CEILING(size)); - + size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE; if (extent_hooks == &extent_hooks_default) { - addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit); + addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); } else { /* No arena context as we are creating new arenas. */ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); pre_reentrancy(tsd, NULL); - addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE, + addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment, &zero, &commit, ind); post_reentrancy(tsd); } + if (addr != NULL && opt_metadata_thp && thp_state_madvise) { + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (size & HUGEPAGE_MASK) == 0); + pages_huge(addr, size); + } + return addr; } @@ -51,16 +60,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, */ if (extent_hooks == &extent_hooks_default) { if (!extent_dalloc_mmap(addr, size)) { - return; + goto label_done; } if (!pages_decommit(addr, size)) { - return; + goto label_done; } if (!pages_purge_forced(addr, size)) { - return; + goto label_done; } if (!pages_purge_lazy(addr, size)) { - return; + goto label_done; } /* Nothing worked. This should never happen. */ not_reached(); @@ -70,27 +79,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, if (extent_hooks->dalloc != NULL && !extent_hooks->dalloc(extent_hooks, addr, size, true, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->decommit != NULL && !extent_hooks->decommit(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_forced != NULL && !extent_hooks->purge_forced(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_lazy != NULL && !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } /* Nothing worked. That's the application's problem. */ - label_done: + label_post_reentrancy: post_reentrancy(tsd); - return; + } +label_done: + if (opt_metadata_thp && thp_state_madvise) { + /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (size & HUGEPAGE_MASK) == 0); + pages_nohuge(addr, size); } } diff --git a/src/ctl.c b/src/ctl.c index 36bc8fb5..c2991036 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -80,6 +80,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) @@ -274,6 +275,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, @@ -1568,6 +1570,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) diff --git a/src/jemalloc.c b/src/jemalloc.c index 4c73ba4a..cbae259d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1055,6 +1055,7 @@ malloc_conf_init(void) { if (opt_abort_conf && had_conf_error) { malloc_abort_invalid_conf(); } + CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp") CONF_HANDLE_BOOL(opt_retain, "retain") if (strncmp("dss", k, klen) == 0) { int i; diff --git a/src/pages.c b/src/pages.c index f8ef2bcb..9561f6de 100644 --- a/src/pages.c +++ b/src/pages.c @@ -25,6 +25,8 @@ static int mmap_flags; #endif static bool os_overcommits; +bool thp_state_madvise; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -291,7 +293,7 @@ pages_huge(void *addr, size_t size) { assert(HUGEPAGE_ADDR2BASE(addr) == addr); assert(HUGEPAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_HUGEPAGE) != 0); #else return true; @@ -303,7 +305,7 @@ pages_nohuge(void *addr, size_t size) { assert(HUGEPAGE_ADDR2BASE(addr) == addr); assert(HUGEPAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else return false; @@ -413,6 +415,51 @@ os_overcommits_proc(void) { } #endif +static void +init_thp_state(void) { +#ifndef JEMALLOC_HAVE_MADVISE_HUGE + if (opt_metadata_thp && opt_abort) { + malloc_write(": no MADV_HUGEPAGE support\n"); + abort(); + } + goto label_error; +#endif + static const char madvise_state[] = "always [madvise] never\n"; + char buf[sizeof(madvise_state)]; + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + int fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else + ssize_t nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (nread < 1) { + goto label_error; + } + if (strncmp(buf, madvise_state, (size_t)nread) == 0) { + thp_state_madvise = true; + return; + } +label_error: + thp_state_madvise = false; +} + bool pages_boot(void) { os_page = os_page_detect(); @@ -441,5 +488,7 @@ pages_boot(void) { os_overcommits = false; #endif + init_thp_state(); + return false; } diff --git a/src/stats.c b/src/stats.c index 087df767..746cc426 100644 --- a/src/stats.c +++ b/src/stats.c @@ -802,6 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, } OPT_WRITE_BOOL(abort, ",") OPT_WRITE_BOOL(abort_conf, ",") + OPT_WRITE_BOOL(metadata_thp, ",") OPT_WRITE_BOOL(retain, ",") OPT_WRITE_CHAR_P(dss, ",") OPT_WRITE_UNSIGNED(narenas, ",") diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index f6116549..d9fdd058 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) { } while (0) TEST_MALLCTL_OPT(bool, abort, always); + TEST_MALLCTL_OPT(bool, metadata_thp, always); TEST_MALLCTL_OPT(bool, retain, always); TEST_MALLCTL_OPT(const char *, dss, always); TEST_MALLCTL_OPT(unsigned, narenas, always);