Implement opt.metadata_thp

This option enables transparent huge page for base allocators (require
MADV_HUGEPAGE support).
This commit is contained in:
Qi Wang 2017-08-10 13:14:26 -07:00 committed by Qi Wang
parent d157864027
commit 8fdd9a5797
12 changed files with 118 additions and 17 deletions

View File

@ -1824,6 +1824,9 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_HUGEPAGE); madvise((void *)0, 0, MADV_HUGEPAGE);
madvise((void *)0, 0, MADV_NOHUGEPAGE); madvise((void *)0, 0, MADV_NOHUGEPAGE);
], [je_cv_thp]) ], [je_cv_thp])
if test "x${je_cv_thp}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
fi
fi fi
dnl Enable transparent huge page support by default. dnl Enable transparent huge page support by default.

View File

@ -916,6 +916,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
</para></listitem> </para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="opt.metadata_thp">
<term>
<mallctl>opt.metadata_thp</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
<listitem><para>If true, allow jemalloc to use transparent huge page
(THP) for internal metadata (see <link
linkend="stats.metadata">stats.metadata</link> for details). This
option is disabled by default.</para></listitem>
</varlistentry>
<varlistentry id="opt.retain"> <varlistentry id="opt.retain">
<term> <term>
<mallctl>opt.retain</mallctl> <mallctl>opt.retain</mallctl>
@ -2187,7 +2199,10 @@ struct extent_hooks_s {
metadata structures (see <link metadata structures (see <link
linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>) linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>)
and internal allocations (see <link and internal allocations (see <link
linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).</para></listitem> linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).
Transparent huge page (enabled with <link
linkend="opt.metadata_thp">opt.metadata_thp</link>) usage is not
considered.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="stats.resident"> <varlistentry id="stats.resident">

View File

@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
extern bool opt_metadata_thp;
base_t *b0get(void); base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void base_delete(tsdn_t *tsdn, base_t *base); void base_delete(tsdn_t *tsdn, base_t *base);

View File

@ -4,4 +4,6 @@
typedef struct base_block_s base_block_t; typedef struct base_block_s base_block_t;
typedef struct base_s base_t; typedef struct base_s base_t;
#define METADATA_THP_DEFAULT false
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */

View File

@ -260,6 +260,12 @@
/* Defined if madvise(2) is available. */ /* Defined if madvise(2) is available. */
#undef JEMALLOC_HAVE_MADVISE #undef JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
#undef JEMALLOC_HAVE_MADVISE_HUGE
/* /*
* Methods for purging unused pages differ between operating systems. * Methods for purging unused pages differ between operating systems.
* *

View File

@ -58,6 +58,9 @@ static const bool pages_can_purge_forced =
#endif #endif
; ;
/* Whether transparent huge page state is "madvise". */
extern bool thp_state_madvise;
void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
void pages_unmap(void *addr, size_t size); void pages_unmap(void *addr, size_t size);
bool pages_commit(void *addr, size_t size); bool pages_commit(void *addr, size_t size);

View File

@ -10,7 +10,9 @@
/******************************************************************************/ /******************************************************************************/
/* Data. */ /* Data. */
static base_t *b0; static base_t *b0;
bool opt_metadata_thp = METADATA_THP_DEFAULT;
/******************************************************************************/ /******************************************************************************/
@ -20,19 +22,26 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
bool zero = true; bool zero = true;
bool commit = true; bool commit = true;
/* We use hugepage sizes regardless of opt_metadata_thp. */
assert(size == HUGEPAGE_CEILING(size)); assert(size == HUGEPAGE_CEILING(size));
size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
if (extent_hooks == &extent_hooks_default) { if (extent_hooks == &extent_hooks_default) {
addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit); addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
} else { } else {
/* No arena context as we are creating new arenas. */ /* No arena context as we are creating new arenas. */
tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
pre_reentrancy(tsd, NULL); pre_reentrancy(tsd, NULL);
addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE, addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
&zero, &commit, ind); &zero, &commit, ind);
post_reentrancy(tsd); post_reentrancy(tsd);
} }
if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
pages_huge(addr, size);
}
return addr; return addr;
} }
@ -51,16 +60,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
*/ */
if (extent_hooks == &extent_hooks_default) { if (extent_hooks == &extent_hooks_default) {
if (!extent_dalloc_mmap(addr, size)) { if (!extent_dalloc_mmap(addr, size)) {
return; goto label_done;
} }
if (!pages_decommit(addr, size)) { if (!pages_decommit(addr, size)) {
return; goto label_done;
} }
if (!pages_purge_forced(addr, size)) { if (!pages_purge_forced(addr, size)) {
return; goto label_done;
} }
if (!pages_purge_lazy(addr, size)) { if (!pages_purge_lazy(addr, size)) {
return; goto label_done;
} }
/* Nothing worked. This should never happen. */ /* Nothing worked. This should never happen. */
not_reached(); not_reached();
@ -70,27 +79,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
if (extent_hooks->dalloc != NULL && if (extent_hooks->dalloc != NULL &&
!extent_hooks->dalloc(extent_hooks, addr, size, true, !extent_hooks->dalloc(extent_hooks, addr, size, true,
ind)) { ind)) {
goto label_done; goto label_post_reentrancy;
} }
if (extent_hooks->decommit != NULL && if (extent_hooks->decommit != NULL &&
!extent_hooks->decommit(extent_hooks, addr, size, 0, size, !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
ind)) { ind)) {
goto label_done; goto label_post_reentrancy;
} }
if (extent_hooks->purge_forced != NULL && if (extent_hooks->purge_forced != NULL &&
!extent_hooks->purge_forced(extent_hooks, addr, size, 0, !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
size, ind)) { size, ind)) {
goto label_done; goto label_post_reentrancy;
} }
if (extent_hooks->purge_lazy != NULL && if (extent_hooks->purge_lazy != NULL &&
!extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
ind)) { ind)) {
goto label_done; goto label_post_reentrancy;
} }
/* Nothing worked. That's the application's problem. */ /* Nothing worked. That's the application's problem. */
label_done: label_post_reentrancy:
post_reentrancy(tsd); post_reentrancy(tsd);
return; }
label_done:
if (opt_metadata_thp && thp_state_madvise) {
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
pages_nohuge(addr, size);
} }
} }

View File

@ -80,6 +80,7 @@ CTL_PROTO(config_utrace)
CTL_PROTO(config_xmalloc) CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort) CTL_PROTO(opt_abort)
CTL_PROTO(opt_abort_conf) CTL_PROTO(opt_abort_conf)
CTL_PROTO(opt_metadata_thp)
CTL_PROTO(opt_retain) CTL_PROTO(opt_retain)
CTL_PROTO(opt_dss) CTL_PROTO(opt_dss)
CTL_PROTO(opt_narenas) CTL_PROTO(opt_narenas)
@ -274,6 +275,7 @@ static const ctl_named_node_t config_node[] = {
static const ctl_named_node_t opt_node[] = { static const ctl_named_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)}, {NAME("abort"), CTL(opt_abort)},
{NAME("abort_conf"), CTL(opt_abort_conf)}, {NAME("abort_conf"), CTL(opt_abort_conf)},
{NAME("metadata_thp"), CTL(opt_metadata_thp)},
{NAME("retain"), CTL(opt_retain)}, {NAME("retain"), CTL(opt_retain)},
{NAME("dss"), CTL(opt_dss)}, {NAME("dss"), CTL(opt_dss)},
{NAME("narenas"), CTL(opt_narenas)}, {NAME("narenas"), CTL(opt_narenas)},
@ -1568,6 +1570,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)

View File

@ -1055,6 +1055,7 @@ malloc_conf_init(void) {
if (opt_abort_conf && had_conf_error) { if (opt_abort_conf && had_conf_error) {
malloc_abort_invalid_conf(); malloc_abort_invalid_conf();
} }
CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
CONF_HANDLE_BOOL(opt_retain, "retain") CONF_HANDLE_BOOL(opt_retain, "retain")
if (strncmp("dss", k, klen) == 0) { if (strncmp("dss", k, klen) == 0) {
int i; int i;

View File

@ -25,6 +25,8 @@ static int mmap_flags;
#endif #endif
static bool os_overcommits; static bool os_overcommits;
bool thp_state_madvise;
/******************************************************************************/ /******************************************************************************/
/* /*
* Function prototypes for static functions that are referenced prior to * Function prototypes for static functions that are referenced prior to
@ -291,7 +293,7 @@ pages_huge(void *addr, size_t size) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr); assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size); assert(HUGEPAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP #ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_HUGEPAGE) != 0); return (madvise(addr, size, MADV_HUGEPAGE) != 0);
#else #else
return true; return true;
@ -303,7 +305,7 @@ pages_nohuge(void *addr, size_t size) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr); assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size); assert(HUGEPAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP #ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
#else #else
return false; return false;
@ -413,6 +415,51 @@ os_overcommits_proc(void) {
} }
#endif #endif
static void
init_thp_state(void) {
#ifndef JEMALLOC_HAVE_MADVISE_HUGE
if (opt_metadata_thp && opt_abort) {
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
abort();
}
goto label_error;
#endif
static const char madvise_state[] = "always [madvise] never\n";
char buf[sizeof(madvise_state)];
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
int fd = (int)syscall(SYS_open,
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#else
int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#endif
if (fd == -1) {
goto label_error;
}
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
#else
ssize_t nread = read(fd, &buf, sizeof(buf));
#endif
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
syscall(SYS_close, fd);
#else
close(fd);
#endif
if (nread < 1) {
goto label_error;
}
if (strncmp(buf, madvise_state, (size_t)nread) == 0) {
thp_state_madvise = true;
return;
}
label_error:
thp_state_madvise = false;
}
bool bool
pages_boot(void) { pages_boot(void) {
os_page = os_page_detect(); os_page = os_page_detect();
@ -441,5 +488,7 @@ pages_boot(void) {
os_overcommits = false; os_overcommits = false;
#endif #endif
init_thp_state();
return false; return false;
} }

View File

@ -802,6 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
} }
OPT_WRITE_BOOL(abort, ",") OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_BOOL(abort_conf, ",") OPT_WRITE_BOOL(abort_conf, ",")
OPT_WRITE_BOOL(metadata_thp, ",")
OPT_WRITE_BOOL(retain, ",") OPT_WRITE_BOOL(retain, ",")
OPT_WRITE_CHAR_P(dss, ",") OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",") OPT_WRITE_UNSIGNED(narenas, ",")

View File

@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
} while (0) } while (0)
TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(bool, abort, always);
TEST_MALLCTL_OPT(bool, metadata_thp, always);
TEST_MALLCTL_OPT(bool, retain, always); TEST_MALLCTL_OPT(bool, retain, always);
TEST_MALLCTL_OPT(const char *, dss, always); TEST_MALLCTL_OPT(const char *, dss, always);
TEST_MALLCTL_OPT(unsigned, narenas, always); TEST_MALLCTL_OPT(unsigned, narenas, always);