diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e24c191c..4b93c5a9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
is disabled
.
+
+
+ opt.trust_madvise
+ (bool)
+ r-
+
+ Do not perform runtime check for MADV_DONTNEED, to
+ check that it actually zeros pages. The default is
+ disabled
on linux and enabled
elsewhere.
+
+
+
opt.retain
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fb8dc3fe..40591b99 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -11,6 +11,7 @@ extern bool malloc_slow;
/* Run-time options. */
extern bool opt_abort;
extern bool opt_abort_conf;
+extern bool opt_trust_madvise;
extern bool opt_confirm_conf;
extern bool opt_hpa;
extern size_t opt_hpa_slab_max_alloc;
diff --git a/src/ctl.c b/src/ctl.c
index b94ef646..d516196a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_trust_madvise)
CTL_PROTO(opt_confirm_conf)
CTL_PROTO(opt_hpa)
CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -372,6 +373,7 @@ static const ctl_named_node_t config_node[] = {
static const ctl_named_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)},
{NAME("abort_conf"), CTL(opt_abort_conf)},
+ {NAME("trust_madvise"), CTL(opt_trust_madvise)},
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
{NAME("hpa"), CTL(opt_hpa)},
{NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
@@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 02714158..f7c3963d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -94,6 +94,13 @@ bool opt_junk_free =
false
#endif
;
+bool opt_trust_madvise =
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+ false
+#else
+ true
+#endif
+ ;
zero_realloc_action_t opt_zero_realloc_action =
zero_realloc_action_strict;
@@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_HANDLE_BOOL(opt_abort, "abort")
CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+ CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
if (strncmp("metadata_thp", k, klen) == 0) {
int i;
bool match = false;
diff --git a/src/pages.c b/src/pages.c
index b23c9e9e..6984d2a0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode;
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
static bool pages_can_purge_lazy_runtime = true;
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+static int madvise_dont_need_zeros_is_faulty = -1;
+/**
+ * Check that MADV_DONTNEED will actually zero pages on subsequent access.
+ *
+ * Since qemu does not support this, yet [1], and you can get very tricky
+ * assert if you will run program with jemalloc in use under qemu:
+ *
+ * : ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
+ *
+ * [1]: https://patchwork.kernel.org/patch/10576637/
+ */
+static int madvise_MADV_DONTNEED_zeroes_pages()
+{
+ int works = -1;
+ size_t size = PAGE;
+
+ void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ if (addr == MAP_FAILED) {
+ malloc_write(": Cannot allocate memory for "
+ "MADV_DONTNEED check\n");
+ if (opt_abort) {
+ abort();
+ }
+ }
+
+ memset(addr, 'A', size);
+ if (madvise(addr, size, MADV_DONTNEED) == 0) {
+ works = memchr(addr, 'A', size) == NULL;
+ } else {
+ /*
+ * If madvise() does not support MADV_DONTNEED, then we can
+ * call it anyway, and use it's return code.
+ */
+ works = 1;
+ }
+
+ if (munmap(addr, size) != 0) {
+ malloc_write(": Cannot deallocate memory for "
+ "MADV_DONTNEED check\n");
+ if (opt_abort) {
+ abort();
+ }
+ }
+
+ return works;
+}
+#endif
+
/******************************************************************************/
/*
* Function prototypes for static functions that are referenced prior to
@@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) {
#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
- return (madvise(addr, size, MADV_DONTNEED) != 0);
+ return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+ madvise(addr, size, MADV_DONTNEED) != 0);
#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
- return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+ return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+ posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
#elif defined(JEMALLOC_MAPS_COALESCE)
/* Try to overlay a new demand-zeroed mapping. */
return pages_commit(addr, size);
@@ -642,6 +695,20 @@ pages_boot(void) {
return true;
}
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+ if (!opt_trust_madvise) {
+ madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+ if (madvise_dont_need_zeros_is_faulty) {
+ malloc_write(": MADV_DONTNEED does not work (memset will be used instead)\n");
+ malloc_write(": (This is the expected behaviour if you are running under QEMU)\n");
+ }
+ } else {
+ /* In case opt_trust_madvise is disable,
+ * do not do runtime check */
+ madvise_dont_need_zeros_is_faulty = 0;
+ }
+#endif
+
#ifndef _WIN32
mmap_flags = MAP_PRIVATE | MAP_ANON;
#endif
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 85dcb4e2..6f5a8f18 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(bool, abort, always);
TEST_MALLCTL_OPT(bool, abort_conf, always);
+ TEST_MALLCTL_OPT(bool, trust_madvise, always);
TEST_MALLCTL_OPT(bool, confirm_conf, always);
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
TEST_MALLCTL_OPT(bool, retain, always);