Fix/enhance THP integration.

Detect whether chunks start off as THP-capable by default (according to
the state of /sys/kernel/mm/transparent_hugepage/enabled), and use this
as the basis for whether to call pages_nohuge() once per chunk during
first purge of any of the chunk's page runs.

Add the --disable-thp configure option, as well as the the opt.thp
mallctl.

This resolves #541.
This commit is contained in:
Jason Evans 2017-02-28 01:08:28 -08:00
parent 766ddcd0f2
commit d84d2909c3
13 changed files with 177 additions and 21 deletions

View File

@ -157,6 +157,13 @@ any of the following arguments (not a definitive list) to 'configure':
released in bulk, thus reducing the total number of mutex operations. See
the "opt.tcache" option for usage details.
--disable-thp
Disable transparent huge page (THP) integration. On systems with THP
support, THPs are explicitly disabled as a side effect of unused dirty page
purging for chunks that back small and/or large allocations, because such
chunks typically comprise active, unused dirty, and untouched clean
pages.
--disable-munmap
Disable virtual memory deallocation via munmap(2); instead keep track of
the virtual memory for later use. munmap() is disabled by default (i.e.

View File

@ -1683,10 +1683,31 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_NOHUGEPAGE);
], [je_cv_thp])
if test "x${je_cv_thp}" = "xyes" ; then
AC_DEFINE([JEMALLOC_THP], [ ])
AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
fi
fi
dnl Enable transparent huge page support by default.
AC_ARG_ENABLE([thp],
[AS_HELP_STRING([--disable-thp],
[Disable transparent huge page supprot])],
[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then
enable_thp="0"
else
enable_thp="1"
fi
],
[if test "x${je_cv_thp}" = "xyes" ; then
enable_thp="1"
else
enable_thp="0"
fi
])
if test "x$enable_thp" = "x1" ; then
AC_DEFINE([JEMALLOC_THP], [ ])
fi
AC_SUBST([enable_thp])
dnl ============================================================================
dnl Check whether __sync_{add,sub}_and_fetch() are available despite
dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@ -2014,6 +2035,7 @@ AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}])
AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([thp : ${enable_thp}])
AC_MSG_RESULT([fill : ${enable_fill}])
AC_MSG_RESULT([utrace : ${enable_utrace}])
AC_MSG_RESULT([valgrind : ${enable_valgrind}])

View File

@ -850,6 +850,17 @@ for (i = 0; i < nbins; i++) {
during build configuration.</para></listitem>
</varlistentry>
<varlistentry id="config.thp">
<term>
<mallctl>config.thp</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
<listitem><para><option>--disable-thp</option> was not specified
during build configuration, and the system supports transparent huge
page manipulation.</para></listitem>
</varlistentry>
<varlistentry id="config.tls">
<term>
<mallctl>config.tls</mallctl>
@ -1162,6 +1173,21 @@ malloc_conf = "xmalloc:true";]]></programlisting>
forcefully disabled.</para></listitem>
</varlistentry>
<varlistentry id="opt.thp">
<term>
<mallctl>opt.thp</mallctl>
(<type>bool</type>)
<literal>r-</literal>
[<option>--enable-thp</option>]
</term>
<listitem><para>Transparent huge page (THP) integration
enabled/disabled. When enabled, THPs are explicitly disabled as a side
effect of unused dirty page purging for chunks that back small and/or
large allocations, because such chunks typically comprise active,
unused dirty, and untouched clean pages. This option is enabled by
default.</para></listitem>
</varlistentry>
<varlistentry id="opt.lg_tcache_max">
<term>
<mallctl>opt.lg_tcache_max</mallctl>

View File

@ -506,6 +506,7 @@ static const size_t large_pad =
#endif
;
extern bool opt_thp;
extern purge_mode_t opt_purge;
extern const char *purge_mode_names[];
extern ssize_t opt_lg_dirty_mult;

View File

@ -99,6 +99,13 @@ static const bool config_tcache =
false
#endif
;
static const bool config_thp =
#ifdef JEMALLOC_THP
true
#else
false
#endif
;
static const bool config_tls =
#ifdef JEMALLOC_TLS
true

View File

@ -252,6 +252,12 @@
/* Defined if madvise(2) is available. */
#undef JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
#undef JEMALLOC_HAVE_MADVISE_HUGE
/*
* Methods for purging unused pages differ between operating systems.
*
@ -264,10 +270,7 @@
#undef JEMALLOC_PURGE_MADVISE_FREE
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* Defined if transparent huge page support is enabled. */
#undef JEMALLOC_THP
/* Define if operating system has alloca.h header. */

View File

@ -392,6 +392,7 @@ opt_quarantine
opt_redzone
opt_stats_print
opt_tcache
opt_thp
opt_utrace
opt_xmalloc
opt_zero

View File

@ -4,6 +4,8 @@
/******************************************************************************/
/* Data. */
bool opt_thp = true;
static bool thp_initially_huge;
purge_mode_t opt_purge = PURGE_DEFAULT;
const char *purge_mode_names[] = {
"ratio",
@ -680,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
if (chunk == NULL)
return (NULL);
chunk->hugepage = true;
if (config_thp && opt_thp) {
chunk->hugepage = thp_initially_huge;
}
/*
* Initialize the map to contain one maximal free untouched run. Mark
@ -745,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
static void
arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
{
size_t sn, hugepage;
size_t sn;
UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false);
bool committed;
chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
chunk_deregister(chunk, &chunk->node);
sn = extent_node_sn_get(&chunk->node);
if (config_thp && opt_thp) {
hugepage = chunk->hugepage;
}
committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
if (!committed) {
/*
@ -765,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
arena->ind);
}
if (!hugepage) {
if (config_thp && opt_thp && hugepage != thp_initially_huge) {
/*
* Convert chunk back to the default state, so that all
* subsequent chunk allocations start out with chunks that can
* be backed by transparent huge pages.
* Convert chunk back to initial THP state, so that all
* subsequent chunk allocations start out in a consistent state.
*/
if (thp_initially_huge) {
pages_huge(chunk, chunksize);
} else {
pages_nohuge(chunk, chunksize);
}
}
chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
@ -1711,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
/*
* If this is the first run purged within chunk, mark
* the chunk as non-huge. This will prevent all use of
* transparent huge pages for this chunk until the chunk
* as a whole is deallocated.
* the chunk as non-THP-capable. This will prevent all
* use of THPs for this chunk until the chunk as a whole
* is deallocated.
*/
if (chunk->hugepage) {
pages_nohuge(chunk, chunksize);
chunk->hugepage = false;
if (config_thp && opt_thp && chunk->hugepage) {
chunk->hugepage = pages_nohuge(chunk,
chunksize);
}
assert(pageind + npages <= chunk_npages);
@ -3772,11 +3782,78 @@ bin_info_init(void)
#undef SC
}
static void
init_thp_initially_huge(void) {
int fd;
char buf[sizeof("[always] madvise never\n")];
ssize_t nread;
static const char *enabled_states[] = {
"[always] madvise never\n",
"always [madvise] never\n",
"always madvise [never]\n"
};
static const bool thp_initially_huge_states[] = {
true,
false,
false
};
unsigned i;
if (config_debug) {
for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
i++) {
assert(sizeof(buf) > strlen(enabled_states[i]));
}
}
assert(sizeof(enabled_states)/sizeof(const char *) ==
sizeof(thp_initially_huge_states)/sizeof(bool));
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
fd = (int)syscall(SYS_open,
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#else
fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#endif
if (fd == -1) {
goto label_error;
}
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
#else
nread = read(fd, &buf, sizeof(buf));
#endif
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
syscall(SYS_close, fd);
#else
close(fd);
#endif
if (nread < 1) {
goto label_error;
}
for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
i++) {
if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) {
thp_initially_huge = thp_initially_huge_states[i];
return;
}
}
label_error:
thp_initially_huge = false;
}
void
arena_boot(void)
{
unsigned i;
if (config_thp && opt_thp) {
init_thp_initially_huge();
}
arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
arena_decay_time_default_set(opt_decay_time);

View File

@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc)
CTL_PROTO(config_prof_libunwind)
CTL_PROTO(config_stats)
CTL_PROTO(config_tcache)
CTL_PROTO(config_thp)
CTL_PROTO(config_tls)
CTL_PROTO(config_utrace)
CTL_PROTO(config_valgrind)
@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace)
CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_tcache)
CTL_PROTO(opt_lg_tcache_max)
CTL_PROTO(opt_thp)
CTL_PROTO(opt_prof)
CTL_PROTO(opt_prof_prefix)
CTL_PROTO(opt_prof_active)
@ -258,6 +260,7 @@ static const ctl_named_node_t config_node[] = {
{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
{NAME("stats"), CTL(config_stats)},
{NAME("tcache"), CTL(config_tcache)},
{NAME("thp"), CTL(config_thp)},
{NAME("tls"), CTL(config_tls)},
{NAME("utrace"), CTL(config_utrace)},
{NAME("valgrind"), CTL(config_valgrind)},
@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("xmalloc"), CTL(opt_xmalloc)},
{NAME("tcache"), CTL(opt_tcache)},
{NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
{NAME("thp"), CTL(opt_thp)},
{NAME("prof"), CTL(opt_prof)},
{NAME("prof_prefix"), CTL(opt_prof_prefix)},
{NAME("prof_active"), CTL(opt_prof_active)},
@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
CTL_RO_CONFIG_GEN(config_stats, bool)
CTL_RO_CONFIG_GEN(config_tcache, bool)
CTL_RO_CONFIG_GEN(config_thp, bool)
CTL_RO_CONFIG_GEN(config_tls, bool)
CTL_RO_CONFIG_GEN(config_utrace, bool)
CTL_RO_CONFIG_GEN(config_valgrind, bool)
@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)

View File

@ -1272,6 +1272,9 @@ malloc_conf_init(void)
"lg_tcache_max", -1,
(sizeof(size_t) << 3) - 1)
}
if (config_thp) {
CONF_HANDLE_BOOL(opt_thp, "thp", true)
}
if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof", true)
CONF_HANDLE_CHAR_P(opt_prof_prefix,

View File

@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size)
assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_HUGEPAGE) != 0);
#else
return (false);
@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size)
assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
#else
return (false);

View File

@ -750,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_BOOL(xmalloc, ",")
OPT_WRITE_BOOL(tcache, ",")
OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
OPT_WRITE_BOOL(thp, ",")
OPT_WRITE_BOOL(prof, ",")
OPT_WRITE_CHAR_P(prof_prefix, ",")
OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")

View File

@ -142,6 +142,7 @@ TEST_BEGIN(test_mallctl_config)
TEST_MALLCTL_CONFIG(prof_libunwind, bool);
TEST_MALLCTL_CONFIG(stats, bool);
TEST_MALLCTL_CONFIG(tcache, bool);
TEST_MALLCTL_CONFIG(thp, bool);
TEST_MALLCTL_CONFIG(tls, bool);
TEST_MALLCTL_CONFIG(utrace, bool);
TEST_MALLCTL_CONFIG(valgrind, bool);
@ -182,6 +183,7 @@ TEST_BEGIN(test_mallctl_opt)
TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
TEST_MALLCTL_OPT(bool, tcache, tcache);
TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
TEST_MALLCTL_OPT(bool, thp, thp);
TEST_MALLCTL_OPT(bool, prof, prof);
TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
TEST_MALLCTL_OPT(bool, prof_active, prof);