Fix/enhance THP integration.

Detect whether chunks start off as THP-capable by default (according to
the state of /sys/kernel/mm/transparent_hugepage/enabled), and use this
as the basis for whether to call pages_nohuge() once per chunk during
first purge of any of the chunk's page runs.

Add the --disable-thp configure option, as well as the the opt.thp
mallctl.

This resolves #541.
This commit is contained in:
Jason Evans 2017-02-28 01:08:28 -08:00
parent 766ddcd0f2
commit d84d2909c3
13 changed files with 177 additions and 21 deletions

View File

@ -157,6 +157,13 @@ any of the following arguments (not a definitive list) to 'configure':
released in bulk, thus reducing the total number of mutex operations. See released in bulk, thus reducing the total number of mutex operations. See
the "opt.tcache" option for usage details. the "opt.tcache" option for usage details.
--disable-thp
Disable transparent huge page (THP) integration. On systems with THP
support, THPs are explicitly disabled as a side effect of unused dirty page
purging for chunks that back small and/or large allocations, because such
chunks typically comprise active, unused dirty, and untouched clean
pages.
--disable-munmap --disable-munmap
Disable virtual memory deallocation via munmap(2); instead keep track of Disable virtual memory deallocation via munmap(2); instead keep track of
the virtual memory for later use. munmap() is disabled by default (i.e. the virtual memory for later use. munmap() is disabled by default (i.e.

View File

@ -1683,9 +1683,30 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_NOHUGEPAGE); madvise((void *)0, 0, MADV_NOHUGEPAGE);
], [je_cv_thp]) ], [je_cv_thp])
if test "x${je_cv_thp}" = "xyes" ; then if test "x${je_cv_thp}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
fi
fi
dnl Enable transparent huge page support by default.
AC_ARG_ENABLE([thp],
[AS_HELP_STRING([--disable-thp],
[Disable transparent huge page supprot])],
[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then
enable_thp="0"
else
enable_thp="1"
fi
],
[if test "x${je_cv_thp}" = "xyes" ; then
enable_thp="1"
else
enable_thp="0"
fi
])
if test "x$enable_thp" = "x1" ; then
AC_DEFINE([JEMALLOC_THP], [ ]) AC_DEFINE([JEMALLOC_THP], [ ])
fi fi
fi AC_SUBST([enable_thp])
dnl ============================================================================ dnl ============================================================================
dnl Check whether __sync_{add,sub}_and_fetch() are available despite dnl Check whether __sync_{add,sub}_and_fetch() are available despite
@ -2014,6 +2035,7 @@ AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}])
AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([thp : ${enable_thp}])
AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([fill : ${enable_fill}])
AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([utrace : ${enable_utrace}])
AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}])

View File

@ -850,6 +850,17 @@ for (i = 0; i < nbins; i++) {
during build configuration.</para></listitem> during build configuration.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="config.thp">
<term>
<mallctl>config.thp</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
<listitem><para><option>--disable-thp</option> was not specified
during build configuration, and the system supports transparent huge
page manipulation.</para></listitem>
</varlistentry>
<varlistentry id="config.tls"> <varlistentry id="config.tls">
<term> <term>
<mallctl>config.tls</mallctl> <mallctl>config.tls</mallctl>
@ -1162,6 +1173,21 @@ malloc_conf = "xmalloc:true";]]></programlisting>
forcefully disabled.</para></listitem> forcefully disabled.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="opt.thp">
<term>
<mallctl>opt.thp</mallctl>
(<type>bool</type>)
<literal>r-</literal>
[<option>--enable-thp</option>]
</term>
<listitem><para>Transparent huge page (THP) integration
enabled/disabled. When enabled, THPs are explicitly disabled as a side
effect of unused dirty page purging for chunks that back small and/or
large allocations, because such chunks typically comprise active,
unused dirty, and untouched clean pages. This option is enabled by
default.</para></listitem>
</varlistentry>
<varlistentry id="opt.lg_tcache_max"> <varlistentry id="opt.lg_tcache_max">
<term> <term>
<mallctl>opt.lg_tcache_max</mallctl> <mallctl>opt.lg_tcache_max</mallctl>

View File

@ -506,6 +506,7 @@ static const size_t large_pad =
#endif #endif
; ;
extern bool opt_thp;
extern purge_mode_t opt_purge; extern purge_mode_t opt_purge;
extern const char *purge_mode_names[]; extern const char *purge_mode_names[];
extern ssize_t opt_lg_dirty_mult; extern ssize_t opt_lg_dirty_mult;

View File

@ -99,6 +99,13 @@ static const bool config_tcache =
false false
#endif #endif
; ;
static const bool config_thp =
#ifdef JEMALLOC_THP
true
#else
false
#endif
;
static const bool config_tls = static const bool config_tls =
#ifdef JEMALLOC_TLS #ifdef JEMALLOC_TLS
true true

View File

@ -252,6 +252,12 @@
/* Defined if madvise(2) is available. */ /* Defined if madvise(2) is available. */
#undef JEMALLOC_HAVE_MADVISE #undef JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
#undef JEMALLOC_HAVE_MADVISE_HUGE
/* /*
* Methods for purging unused pages differ between operating systems. * Methods for purging unused pages differ between operating systems.
* *
@ -264,10 +270,7 @@
#undef JEMALLOC_PURGE_MADVISE_FREE #undef JEMALLOC_PURGE_MADVISE_FREE
#undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_DONTNEED
/* /* Defined if transparent huge page support is enabled. */
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
#undef JEMALLOC_THP #undef JEMALLOC_THP
/* Define if operating system has alloca.h header. */ /* Define if operating system has alloca.h header. */

View File

@ -392,6 +392,7 @@ opt_quarantine
opt_redzone opt_redzone
opt_stats_print opt_stats_print
opt_tcache opt_tcache
opt_thp
opt_utrace opt_utrace
opt_xmalloc opt_xmalloc
opt_zero opt_zero

View File

@ -4,6 +4,8 @@
/******************************************************************************/ /******************************************************************************/
/* Data. */ /* Data. */
bool opt_thp = true;
static bool thp_initially_huge;
purge_mode_t opt_purge = PURGE_DEFAULT; purge_mode_t opt_purge = PURGE_DEFAULT;
const char *purge_mode_names[] = { const char *purge_mode_names[] = {
"ratio", "ratio",
@ -680,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
if (chunk == NULL) if (chunk == NULL)
return (NULL); return (NULL);
chunk->hugepage = true; if (config_thp && opt_thp) {
chunk->hugepage = thp_initially_huge;
}
/* /*
* Initialize the map to contain one maximal free untouched run. Mark * Initialize the map to contain one maximal free untouched run. Mark
@ -745,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
static void static void
arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
{ {
size_t sn, hugepage; size_t sn;
UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false);
bool committed; bool committed;
chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
chunk_deregister(chunk, &chunk->node); chunk_deregister(chunk, &chunk->node);
sn = extent_node_sn_get(&chunk->node); sn = extent_node_sn_get(&chunk->node);
if (config_thp && opt_thp) {
hugepage = chunk->hugepage; hugepage = chunk->hugepage;
}
committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
if (!committed) { if (!committed) {
/* /*
@ -765,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
arena->ind); arena->ind);
} }
if (!hugepage) { if (config_thp && opt_thp && hugepage != thp_initially_huge) {
/* /*
* Convert chunk back to the default state, so that all * Convert chunk back to initial THP state, so that all
* subsequent chunk allocations start out with chunks that can * subsequent chunk allocations start out in a consistent state.
* be backed by transparent huge pages.
*/ */
if (thp_initially_huge) {
pages_huge(chunk, chunksize); pages_huge(chunk, chunksize);
} else {
pages_nohuge(chunk, chunksize);
}
} }
chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
@ -1711,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
/* /*
* If this is the first run purged within chunk, mark * If this is the first run purged within chunk, mark
* the chunk as non-huge. This will prevent all use of * the chunk as non-THP-capable. This will prevent all
* transparent huge pages for this chunk until the chunk * use of THPs for this chunk until the chunk as a whole
* as a whole is deallocated. * is deallocated.
*/ */
if (chunk->hugepage) { if (config_thp && opt_thp && chunk->hugepage) {
pages_nohuge(chunk, chunksize); chunk->hugepage = pages_nohuge(chunk,
chunk->hugepage = false; chunksize);
} }
assert(pageind + npages <= chunk_npages); assert(pageind + npages <= chunk_npages);
@ -3772,11 +3782,78 @@ bin_info_init(void)
#undef SC #undef SC
} }
static void
init_thp_initially_huge(void) {
int fd;
char buf[sizeof("[always] madvise never\n")];
ssize_t nread;
static const char *enabled_states[] = {
"[always] madvise never\n",
"always [madvise] never\n",
"always madvise [never]\n"
};
static const bool thp_initially_huge_states[] = {
true,
false,
false
};
unsigned i;
if (config_debug) {
for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
i++) {
assert(sizeof(buf) > strlen(enabled_states[i]));
}
}
assert(sizeof(enabled_states)/sizeof(const char *) ==
sizeof(thp_initially_huge_states)/sizeof(bool));
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
fd = (int)syscall(SYS_open,
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#else
fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#endif
if (fd == -1) {
goto label_error;
}
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
#else
nread = read(fd, &buf, sizeof(buf));
#endif
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
syscall(SYS_close, fd);
#else
close(fd);
#endif
if (nread < 1) {
goto label_error;
}
for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
i++) {
if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) {
thp_initially_huge = thp_initially_huge_states[i];
return;
}
}
label_error:
thp_initially_huge = false;
}
void void
arena_boot(void) arena_boot(void)
{ {
unsigned i; unsigned i;
if (config_thp && opt_thp) {
init_thp_initially_huge();
}
arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
arena_decay_time_default_set(opt_decay_time); arena_decay_time_default_set(opt_decay_time);

View File

@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc)
CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_prof_libunwind)
CTL_PROTO(config_stats) CTL_PROTO(config_stats)
CTL_PROTO(config_tcache) CTL_PROTO(config_tcache)
CTL_PROTO(config_thp)
CTL_PROTO(config_tls) CTL_PROTO(config_tls)
CTL_PROTO(config_utrace) CTL_PROTO(config_utrace)
CTL_PROTO(config_valgrind) CTL_PROTO(config_valgrind)
@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace)
CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_tcache) CTL_PROTO(opt_tcache)
CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_lg_tcache_max)
CTL_PROTO(opt_thp)
CTL_PROTO(opt_prof) CTL_PROTO(opt_prof)
CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_prefix)
CTL_PROTO(opt_prof_active) CTL_PROTO(opt_prof_active)
@ -258,6 +260,7 @@ static const ctl_named_node_t config_node[] = {
{NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
{NAME("stats"), CTL(config_stats)}, {NAME("stats"), CTL(config_stats)},
{NAME("tcache"), CTL(config_tcache)}, {NAME("tcache"), CTL(config_tcache)},
{NAME("thp"), CTL(config_thp)},
{NAME("tls"), CTL(config_tls)}, {NAME("tls"), CTL(config_tls)},
{NAME("utrace"), CTL(config_utrace)}, {NAME("utrace"), CTL(config_utrace)},
{NAME("valgrind"), CTL(config_valgrind)}, {NAME("valgrind"), CTL(config_valgrind)},
@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
{NAME("tcache"), CTL(opt_tcache)}, {NAME("tcache"), CTL(opt_tcache)},
{NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
{NAME("thp"), CTL(opt_thp)},
{NAME("prof"), CTL(opt_prof)}, {NAME("prof"), CTL(opt_prof)},
{NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)},
{NAME("prof_active"), CTL(opt_prof_active)}, {NAME("prof_active"), CTL(opt_prof_active)},
@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
CTL_RO_CONFIG_GEN(config_stats, bool) CTL_RO_CONFIG_GEN(config_stats, bool)
CTL_RO_CONFIG_GEN(config_tcache, bool) CTL_RO_CONFIG_GEN(config_tcache, bool)
CTL_RO_CONFIG_GEN(config_thp, bool)
CTL_RO_CONFIG_GEN(config_tls, bool) CTL_RO_CONFIG_GEN(config_tls, bool)
CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_utrace, bool)
CTL_RO_CONFIG_GEN(config_valgrind, bool) CTL_RO_CONFIG_GEN(config_valgrind, bool)
@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)

View File

@ -1272,6 +1272,9 @@ malloc_conf_init(void)
"lg_tcache_max", -1, "lg_tcache_max", -1,
(sizeof(size_t) << 3) - 1) (sizeof(size_t) << 3) - 1)
} }
if (config_thp) {
CONF_HANDLE_BOOL(opt_thp, "thp", true)
}
if (config_prof) { if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_BOOL(opt_prof, "prof", true)
CONF_HANDLE_CHAR_P(opt_prof_prefix, CONF_HANDLE_CHAR_P(opt_prof_prefix,

View File

@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size)
assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size); assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP #ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_HUGEPAGE) != 0); return (madvise(addr, size, MADV_HUGEPAGE) != 0);
#else #else
return (false); return (false);
@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size)
assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size); assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP #ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
#else #else
return (false); return (false);

View File

@ -750,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_BOOL(xmalloc, ",") OPT_WRITE_BOOL(xmalloc, ",")
OPT_WRITE_BOOL(tcache, ",") OPT_WRITE_BOOL(tcache, ",")
OPT_WRITE_SSIZE_T(lg_tcache_max, ",") OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
OPT_WRITE_BOOL(thp, ",")
OPT_WRITE_BOOL(prof, ",") OPT_WRITE_BOOL(prof, ",")
OPT_WRITE_CHAR_P(prof_prefix, ",") OPT_WRITE_CHAR_P(prof_prefix, ",")
OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")

View File

@ -142,6 +142,7 @@ TEST_BEGIN(test_mallctl_config)
TEST_MALLCTL_CONFIG(prof_libunwind, bool); TEST_MALLCTL_CONFIG(prof_libunwind, bool);
TEST_MALLCTL_CONFIG(stats, bool); TEST_MALLCTL_CONFIG(stats, bool);
TEST_MALLCTL_CONFIG(tcache, bool); TEST_MALLCTL_CONFIG(tcache, bool);
TEST_MALLCTL_CONFIG(thp, bool);
TEST_MALLCTL_CONFIG(tls, bool); TEST_MALLCTL_CONFIG(tls, bool);
TEST_MALLCTL_CONFIG(utrace, bool); TEST_MALLCTL_CONFIG(utrace, bool);
TEST_MALLCTL_CONFIG(valgrind, bool); TEST_MALLCTL_CONFIG(valgrind, bool);
@ -182,6 +183,7 @@ TEST_BEGIN(test_mallctl_opt)
TEST_MALLCTL_OPT(bool, xmalloc, xmalloc); TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
TEST_MALLCTL_OPT(bool, tcache, tcache); TEST_MALLCTL_OPT(bool, tcache, tcache);
TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache); TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
TEST_MALLCTL_OPT(bool, thp, thp);
TEST_MALLCTL_OPT(bool, prof, prof); TEST_MALLCTL_OPT(bool, prof, prof);
TEST_MALLCTL_OPT(const char *, prof_prefix, prof); TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
TEST_MALLCTL_OPT(bool, prof_active, prof); TEST_MALLCTL_OPT(bool, prof_active, prof);