diff --git a/configure.ac b/configure.ac
index ba0409a5..e1a7343f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1824,6 +1824,9 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_HUGEPAGE);
madvise((void *)0, 0, MADV_NOHUGEPAGE);
], [je_cv_thp])
+ if test "x${je_cv_thp}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+ fi
fi
dnl Enable transparent huge page support by default.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 21e401ac..f1712f05 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -916,6 +916,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
+
+
+ opt.metadata_thp
+ (bool)
+ r-
+
+ If true, allow jemalloc to use transparent huge page
+ (THP) for internal metadata (see stats.metadata for details). This
+ option is disabled by default.
+
+
opt.retain
@@ -2187,7 +2199,10 @@ struct extent_hooks_s {
metadata structures (see stats.arenas.<i>.base)
and internal allocations (see stats.arenas.<i>.internal).
+ linkend="stats.arenas.i.internal">stats.arenas.<i>.internal).
+ Transparent huge page (enabled with opt.metadata_thp) usage is not
+ considered.
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a4fd5ac7..a5cb8a8d 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
+extern bool opt_metadata_thp;
+
base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void base_delete(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index be7ee825..6e710334 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,4 +4,6 @@
typedef struct base_block_s base_block_t;
typedef struct base_s base_t;
+#define METADATA_THP_DEFAULT false
+
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b73daf04..5fa7f51f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -260,6 +260,12 @@
/* Defined if madvise(2) is available. */
#undef JEMALLOC_HAVE_MADVISE
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
+
/*
* Methods for purging unused pages differ between operating systems.
*
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 28383b7f..121fff38 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,9 @@ static const bool pages_can_purge_forced =
#endif
;
+/* Whether transparent huge page state is "madvise". */
+extern bool thp_state_madvise;
+
void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
void pages_unmap(void *addr, size_t size);
bool pages_commit(void *addr, size_t size);
diff --git a/src/base.c b/src/base.c
index 97078b13..99259783 100644
--- a/src/base.c
+++ b/src/base.c
@@ -10,7 +10,9 @@
/******************************************************************************/
/* Data. */
-static base_t *b0;
+static base_t *b0;
+
+bool opt_metadata_thp = METADATA_THP_DEFAULT;
/******************************************************************************/
@@ -20,19 +22,26 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
bool zero = true;
bool commit = true;
+ /* We use hugepage sizes regardless of opt_metadata_thp. */
assert(size == HUGEPAGE_CEILING(size));
-
+ size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
if (extent_hooks == &extent_hooks_default) {
- addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
+ addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
} else {
/* No arena context as we are creating new arenas. */
tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
pre_reentrancy(tsd, NULL);
- addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
+ addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
&zero, &commit, ind);
post_reentrancy(tsd);
}
+ if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
+ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+ (size & HUGEPAGE_MASK) == 0);
+ pages_huge(addr, size);
+ }
+
return addr;
}
@@ -51,16 +60,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
*/
if (extent_hooks == &extent_hooks_default) {
if (!extent_dalloc_mmap(addr, size)) {
- return;
+ goto label_done;
}
if (!pages_decommit(addr, size)) {
- return;
+ goto label_done;
}
if (!pages_purge_forced(addr, size)) {
- return;
+ goto label_done;
}
if (!pages_purge_lazy(addr, size)) {
- return;
+ goto label_done;
}
/* Nothing worked. This should never happen. */
not_reached();
@@ -70,27 +79,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
if (extent_hooks->dalloc != NULL &&
!extent_hooks->dalloc(extent_hooks, addr, size, true,
ind)) {
- goto label_done;
+ goto label_post_reentrancy;
}
if (extent_hooks->decommit != NULL &&
!extent_hooks->decommit(extent_hooks, addr, size, 0, size,
ind)) {
- goto label_done;
+ goto label_post_reentrancy;
}
if (extent_hooks->purge_forced != NULL &&
!extent_hooks->purge_forced(extent_hooks, addr, size, 0,
size, ind)) {
- goto label_done;
+ goto label_post_reentrancy;
}
if (extent_hooks->purge_lazy != NULL &&
!extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
ind)) {
- goto label_done;
+ goto label_post_reentrancy;
}
/* Nothing worked. That's the application's problem. */
- label_done:
+ label_post_reentrancy:
post_reentrancy(tsd);
- return;
+ }
+label_done:
+ if (opt_metadata_thp && thp_state_madvise) {
+ /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
+ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+ (size & HUGEPAGE_MASK) == 0);
+ pages_nohuge(addr, size);
}
}
diff --git a/src/ctl.c b/src/ctl.c
index 36bc8fb5..c2991036 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -80,6 +80,7 @@ CTL_PROTO(config_utrace)
CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_metadata_thp)
CTL_PROTO(opt_retain)
CTL_PROTO(opt_dss)
CTL_PROTO(opt_narenas)
@@ -274,6 +275,7 @@ static const ctl_named_node_t config_node[] = {
static const ctl_named_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)},
{NAME("abort_conf"), CTL(opt_abort_conf)},
+ {NAME("metadata_thp"), CTL(opt_metadata_thp)},
{NAME("retain"), CTL(opt_retain)},
{NAME("dss"), CTL(opt_dss)},
{NAME("narenas"), CTL(opt_narenas)},
@@ -1568,6 +1570,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c73ba4a..cbae259d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,6 +1055,7 @@ malloc_conf_init(void) {
if (opt_abort_conf && had_conf_error) {
malloc_abort_invalid_conf();
}
+ CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
CONF_HANDLE_BOOL(opt_retain, "retain")
if (strncmp("dss", k, klen) == 0) {
int i;
diff --git a/src/pages.c b/src/pages.c
index f8ef2bcb..9561f6de 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -25,6 +25,8 @@ static int mmap_flags;
#endif
static bool os_overcommits;
+bool thp_state_madvise;
+
/******************************************************************************/
/*
* Function prototypes for static functions that are referenced prior to
@@ -291,7 +293,7 @@ pages_huge(void *addr, size_t size) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size);
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_HUGEPAGE) != 0);
#else
return true;
@@ -303,7 +305,7 @@ pages_nohuge(void *addr, size_t size) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size);
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
#else
return false;
@@ -413,6 +415,51 @@ os_overcommits_proc(void) {
}
#endif
+static void
+init_thp_state(void) {
+#ifndef JEMALLOC_HAVE_MADVISE_HUGE
+ if (opt_metadata_thp && opt_abort) {
+ malloc_write(": no MADV_HUGEPAGE support\n");
+ abort();
+ }
+ goto label_error;
+#endif
+ static const char madvise_state[] = "always [madvise] never\n";
+ char buf[sizeof(madvise_state)];
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+ int fd = (int)syscall(SYS_open,
+ "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+ int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+ if (fd == -1) {
+ goto label_error;
+ }
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+ ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+ ssize_t nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+ syscall(SYS_close, fd);
+#else
+ close(fd);
+#endif
+
+ if (nread < 1) {
+ goto label_error;
+ }
+ if (strncmp(buf, madvise_state, (size_t)nread) == 0) {
+ thp_state_madvise = true;
+ return;
+ }
+label_error:
+ thp_state_madvise = false;
+}
+
bool
pages_boot(void) {
os_page = os_page_detect();
@@ -441,5 +488,7 @@ pages_boot(void) {
os_overcommits = false;
#endif
+ init_thp_state();
+
return false;
}
diff --git a/src/stats.c b/src/stats.c
index 087df767..746cc426 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,6 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_BOOL(abort_conf, ",")
+ OPT_WRITE_BOOL(metadata_thp, ",")
OPT_WRITE_BOOL(retain, ",")
OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f6116549..d9fdd058 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
} while (0)
TEST_MALLCTL_OPT(bool, abort, always);
+ TEST_MALLCTL_OPT(bool, metadata_thp, always);
TEST_MALLCTL_OPT(bool, retain, always);
TEST_MALLCTL_OPT(const char *, dss, always);
TEST_MALLCTL_OPT(unsigned, narenas, always);