diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e24c191c..4b93c5a9 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", is disabled. + + + opt.trust_madvise + (bool) + r- + + Do not perform runtime check for MADV_DONTNEED, to + check that it actually zeros pages. The default is + disabled on linux and enabled elsewhere. + + + opt.retain diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index fb8dc3fe..40591b99 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -11,6 +11,7 @@ extern bool malloc_slow; /* Run-time options. */ extern bool opt_abort; extern bool opt_abort_conf; +extern bool opt_trust_madvise; extern bool opt_confirm_conf; extern bool opt_hpa; extern size_t opt_hpa_slab_max_alloc; diff --git a/src/ctl.c b/src/ctl.c index b94ef646..d516196a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -90,6 +90,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_trust_madvise) CTL_PROTO(opt_confirm_conf) CTL_PROTO(opt_hpa) CTL_PROTO(opt_hpa_slab_max_alloc) @@ -372,6 +373,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("trust_madvise"), CTL(opt_trust_madvise)}, {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)}, {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)}, @@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool) CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool) CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index 02714158..f7c3963d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -94,6 +94,13 @@ bool opt_junk_free = false #endif ; +bool opt_trust_madvise = +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + false +#else + true +#endif + ; zero_realloc_action_t opt_zero_realloc_action = zero_realloc_action_strict; @@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], CONF_HANDLE_BOOL(opt_abort, "abort") CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf") + CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise") if (strncmp("metadata_thp", k, klen) == 0) { int i; bool match = false; diff --git a/src/pages.c b/src/pages.c index b23c9e9e..6984d2a0 100644 --- a/src/pages.c +++ b/src/pages.c @@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode; /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ static bool pages_can_purge_lazy_runtime = true; +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS +static int madvise_dont_need_zeros_is_faulty = -1; +/** + * Check that MADV_DONTNEED will actually zero pages on subsequent access. + * + * Since qemu does not support this, yet [1], and you can get very tricky + * assert if you will run program with jemalloc in use under qemu: + * + * : ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" + * + * [1]: https://patchwork.kernel.org/patch/10576637/ + */ +static int madvise_MADV_DONTNEED_zeroes_pages() +{ + int works = -1; + size_t size = PAGE; + + void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + + if (addr == MAP_FAILED) { + malloc_write(": Cannot allocate memory for " + "MADV_DONTNEED check\n"); + if (opt_abort) { + abort(); + } + } + + memset(addr, 'A', size); + if (madvise(addr, size, MADV_DONTNEED) == 0) { + works = memchr(addr, 'A', size) == NULL; + } else { + /* + * If madvise() does not support MADV_DONTNEED, then we can + * call it anyway, and use it's return code. + */ + works = 1; + } + + if (munmap(addr, size) != 0) { + malloc_write(": Cannot deallocate memory for " + "MADV_DONTNEED check\n"); + if (opt_abort) { + abort(); + } + } + + return works; +} +#endif + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) { #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) - return (madvise(addr, size, MADV_DONTNEED) != 0); + return (unlikely(madvise_dont_need_zeros_is_faulty) || + madvise(addr, size, MADV_DONTNEED) != 0); #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) - return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); + return (unlikely(madvise_dont_need_zeros_is_faulty) || + posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); #elif defined(JEMALLOC_MAPS_COALESCE) /* Try to overlay a new demand-zeroed mapping. */ return pages_commit(addr, size); @@ -642,6 +695,20 @@ pages_boot(void) { return true; } +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + if (!opt_trust_madvise) { + madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages(); + if (madvise_dont_need_zeros_is_faulty) { + malloc_write(": MADV_DONTNEED does not work (memset will be used instead)\n"); + malloc_write(": (This is the expected behaviour if you are running under QEMU)\n"); + } + } else { + /* In case opt_trust_madvise is disable, + * do not do runtime check */ + madvise_dont_need_zeros_is_faulty = 0; + } +#endif + #ifndef _WIN32 mmap_flags = MAP_PRIVATE | MAP_ANON; #endif diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 85dcb4e2..6f5a8f18 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(bool, abort_conf, always); + TEST_MALLCTL_OPT(bool, trust_madvise, always); TEST_MALLCTL_OPT(bool, confirm_conf, always); TEST_MALLCTL_OPT(const char *, metadata_thp, always); TEST_MALLCTL_OPT(bool, retain, always);