Add runtime detection for MADV_DONTNEED zeroes pages (mostly for qemu)
qemu does not support this, yet [1], and you can get very tricky assert if you will run program with jemalloc in use under qemu: <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" [1]: https://patchwork.kernel.org/patch/10576637/ Here is a simple example that shows the problem [2]: // Gist to check possible issues with MADV_DONTNEED // For example it does not supported by qemu user // There is a patch for this [1], but it hasn't been applied. // [1]: https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg05422.html #include <sys/mman.h> #include <stdio.h> #include <stddef.h> #include <assert.h> #include <string.h> int main(int argc, char **argv) { void *addr = mmap(NULL, 1<<16, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); return 1; } memset(addr, 'A', 1<<16); if (!madvise(addr, 1<<16, MADV_DONTNEED)) { puts("MADV_DONTNEED does not return error. Check memory."); for (int i = 0; i < 1<<16; ++i) { assert(((unsigned char *)addr)[i] == 0); } } else { perror("madvise"); } if (munmap(addr, 1<<16)) { perror("munmap"); return 1; } return 0; } ### unpatched qemu $ qemu-x86_64-static /tmp/test-MADV_DONTNEED MADV_DONTNEED does not return error. Check memory. test-MADV_DONTNEED: /tmp/test-MADV_DONTNEED.c:19: main: Assertion `((unsigned char *)addr)[i] == 0' failed. qemu: uncaught target signal 6 (Aborted) - core dumped Aborted (core dumped) ### patched qemu (by returning ENOSYS error) $ qemu-x86_64 /tmp/test-MADV_DONTNEED madvise: Success ### patch for qemu to return ENOSYS diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 897d20c076..5540792e0e 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -11775,7 +11775,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, turns private file-backed mappings into anonymous mappings. This will break MADV_DONTNEED. This is a hint, so ignoring and returning success is ok. */ - return 0; + return ENOSYS; #endif #ifdef TARGET_NR_fcntl64 case TARGET_NR_fcntl64: [2]: https://gist.github.com/azat/12ba2c825b710653ece34dba7f926ece v2: - review fixes - add opt_dont_trust_madvise v3: - review fixes - rename opt_dont_trust_madvise to opt_trust_madvise
This commit is contained in:
parent
2e3104ba07
commit
a943172b73
@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
|
|||||||
is <quote>disabled</quote>.</para></listitem>
|
is <quote>disabled</quote>.</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="opt.trust_madvise">
|
||||||
|
<term>
|
||||||
|
<mallctl>opt.trust_madvise</mallctl>
|
||||||
|
(<type>bool</type>)
|
||||||
|
<literal>r-</literal>
|
||||||
|
</term>
|
||||||
|
<listitem><para>Do not perform runtime check for MADV_DONTNEED, to
|
||||||
|
check that it actually zeros pages. The default is
|
||||||
|
<quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
|
||||||
|
</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry id="opt.retain">
|
<varlistentry id="opt.retain">
|
||||||
<term>
|
<term>
|
||||||
<mallctl>opt.retain</mallctl>
|
<mallctl>opt.retain</mallctl>
|
||||||
|
@ -11,6 +11,7 @@ extern bool malloc_slow;
|
|||||||
/* Run-time options. */
|
/* Run-time options. */
|
||||||
extern bool opt_abort;
|
extern bool opt_abort;
|
||||||
extern bool opt_abort_conf;
|
extern bool opt_abort_conf;
|
||||||
|
extern bool opt_trust_madvise;
|
||||||
extern bool opt_confirm_conf;
|
extern bool opt_confirm_conf;
|
||||||
extern bool opt_hpa;
|
extern bool opt_hpa;
|
||||||
extern size_t opt_hpa_slab_max_alloc;
|
extern size_t opt_hpa_slab_max_alloc;
|
||||||
|
@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
|
|||||||
CTL_PROTO(config_xmalloc)
|
CTL_PROTO(config_xmalloc)
|
||||||
CTL_PROTO(opt_abort)
|
CTL_PROTO(opt_abort)
|
||||||
CTL_PROTO(opt_abort_conf)
|
CTL_PROTO(opt_abort_conf)
|
||||||
|
CTL_PROTO(opt_trust_madvise)
|
||||||
CTL_PROTO(opt_confirm_conf)
|
CTL_PROTO(opt_confirm_conf)
|
||||||
CTL_PROTO(opt_hpa)
|
CTL_PROTO(opt_hpa)
|
||||||
CTL_PROTO(opt_hpa_slab_max_alloc)
|
CTL_PROTO(opt_hpa_slab_max_alloc)
|
||||||
@ -372,6 +373,7 @@ static const ctl_named_node_t config_node[] = {
|
|||||||
static const ctl_named_node_t opt_node[] = {
|
static const ctl_named_node_t opt_node[] = {
|
||||||
{NAME("abort"), CTL(opt_abort)},
|
{NAME("abort"), CTL(opt_abort)},
|
||||||
{NAME("abort_conf"), CTL(opt_abort_conf)},
|
{NAME("abort_conf"), CTL(opt_abort_conf)},
|
||||||
|
{NAME("trust_madvise"), CTL(opt_trust_madvise)},
|
||||||
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
|
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
|
||||||
{NAME("hpa"), CTL(opt_hpa)},
|
{NAME("hpa"), CTL(opt_hpa)},
|
||||||
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
|
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
|
||||||
@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
|
|||||||
|
|
||||||
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
||||||
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
|
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
|
||||||
|
CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
|
||||||
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
|
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
|
||||||
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
|
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
|
||||||
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
|
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
|
||||||
|
@ -94,6 +94,13 @@ bool opt_junk_free =
|
|||||||
false
|
false
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
bool opt_trust_madvise =
|
||||||
|
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
|
||||||
|
false
|
||||||
|
#else
|
||||||
|
true
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
zero_realloc_action_t opt_zero_realloc_action =
|
zero_realloc_action_t opt_zero_realloc_action =
|
||||||
zero_realloc_action_strict;
|
zero_realloc_action_strict;
|
||||||
@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
|
|||||||
|
|
||||||
CONF_HANDLE_BOOL(opt_abort, "abort")
|
CONF_HANDLE_BOOL(opt_abort, "abort")
|
||||||
CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
|
CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
|
||||||
|
CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
|
||||||
if (strncmp("metadata_thp", k, klen) == 0) {
|
if (strncmp("metadata_thp", k, klen) == 0) {
|
||||||
int i;
|
int i;
|
||||||
bool match = false;
|
bool match = false;
|
||||||
|
71
src/pages.c
71
src/pages.c
@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode;
|
|||||||
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
|
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
|
||||||
static bool pages_can_purge_lazy_runtime = true;
|
static bool pages_can_purge_lazy_runtime = true;
|
||||||
|
|
||||||
|
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
|
||||||
|
static int madvise_dont_need_zeros_is_faulty = -1;
|
||||||
|
/**
|
||||||
|
* Check that MADV_DONTNEED will actually zero pages on subsequent access.
|
||||||
|
*
|
||||||
|
* Since qemu does not support this, yet [1], and you can get very tricky
|
||||||
|
* assert if you will run program with jemalloc in use under qemu:
|
||||||
|
*
|
||||||
|
* <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
|
||||||
|
*
|
||||||
|
* [1]: https://patchwork.kernel.org/patch/10576637/
|
||||||
|
*/
|
||||||
|
static int madvise_MADV_DONTNEED_zeroes_pages()
|
||||||
|
{
|
||||||
|
int works = -1;
|
||||||
|
size_t size = PAGE;
|
||||||
|
|
||||||
|
void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
|
||||||
|
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||||
|
|
||||||
|
if (addr == MAP_FAILED) {
|
||||||
|
malloc_write("<jemalloc>: Cannot allocate memory for "
|
||||||
|
"MADV_DONTNEED check\n");
|
||||||
|
if (opt_abort) {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(addr, 'A', size);
|
||||||
|
if (madvise(addr, size, MADV_DONTNEED) == 0) {
|
||||||
|
works = memchr(addr, 'A', size) == NULL;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* If madvise() does not support MADV_DONTNEED, then we can
|
||||||
|
* call it anyway, and use it's return code.
|
||||||
|
*/
|
||||||
|
works = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (munmap(addr, size) != 0) {
|
||||||
|
malloc_write("<jemalloc>: Cannot deallocate memory for "
|
||||||
|
"MADV_DONTNEED check\n");
|
||||||
|
if (opt_abort) {
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return works;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/*
|
/*
|
||||||
* Function prototypes for static functions that are referenced prior to
|
* Function prototypes for static functions that are referenced prior to
|
||||||
@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) {
|
|||||||
|
|
||||||
#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
|
#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
|
||||||
defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
|
defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
|
||||||
return (madvise(addr, size, MADV_DONTNEED) != 0);
|
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
|
||||||
|
madvise(addr, size, MADV_DONTNEED) != 0);
|
||||||
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
|
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
|
||||||
defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
|
defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
|
||||||
return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
|
return (unlikely(madvise_dont_need_zeros_is_faulty) ||
|
||||||
|
posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
|
||||||
#elif defined(JEMALLOC_MAPS_COALESCE)
|
#elif defined(JEMALLOC_MAPS_COALESCE)
|
||||||
/* Try to overlay a new demand-zeroed mapping. */
|
/* Try to overlay a new demand-zeroed mapping. */
|
||||||
return pages_commit(addr, size);
|
return pages_commit(addr, size);
|
||||||
@ -642,6 +695,20 @@ pages_boot(void) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
|
||||||
|
if (!opt_trust_madvise) {
|
||||||
|
madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
|
||||||
|
if (madvise_dont_need_zeros_is_faulty) {
|
||||||
|
malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
|
||||||
|
malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* In case opt_trust_madvise is disable,
|
||||||
|
* do not do runtime check */
|
||||||
|
madvise_dont_need_zeros_is_faulty = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
mmap_flags = MAP_PRIVATE | MAP_ANON;
|
mmap_flags = MAP_PRIVATE | MAP_ANON;
|
||||||
#endif
|
#endif
|
||||||
|
@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
|
|||||||
|
|
||||||
TEST_MALLCTL_OPT(bool, abort, always);
|
TEST_MALLCTL_OPT(bool, abort, always);
|
||||||
TEST_MALLCTL_OPT(bool, abort_conf, always);
|
TEST_MALLCTL_OPT(bool, abort_conf, always);
|
||||||
|
TEST_MALLCTL_OPT(bool, trust_madvise, always);
|
||||||
TEST_MALLCTL_OPT(bool, confirm_conf, always);
|
TEST_MALLCTL_OPT(bool, confirm_conf, always);
|
||||||
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
|
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
|
||||||
TEST_MALLCTL_OPT(bool, retain, always);
|
TEST_MALLCTL_OPT(bool, retain, always);
|
||||||
|
Loading…
Reference in New Issue
Block a user