diff --git a/INSTALL.md b/INSTALL.md index eb55acfd..adc72b82 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -185,13 +185,13 @@ any of the following arguments (not a definitive list) to 'configure': * `--disable-cache-oblivious` - Disable cache-oblivious large allocation alignment for large allocation - requests with no alignment constraints. If this feature is disabled, all - large allocations are page-aligned as an implementation artifact, which can - severely harm CPU cache utilization. However, the cache-oblivious layout - comes at the cost of one extra page per large allocation, which in the - most extreme case increases physical memory usage for the 16 KiB size class - to 20 KiB. + Disable cache-oblivious large allocation alignment by default, for large + allocation requests with no alignment constraints. If this feature is + disabled, all large allocations are page-aligned as an implementation + artifact, which can severely harm CPU cache utilization. However, the + cache-oblivious layout comes at the cost of one extra page per large + allocation, which in the most extreme case increases physical memory usage + for the 16 KiB size class to 20 KiB. * `--disable-syscall` diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 4b93c5a9..018170ca 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -936,6 +936,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", + + + opt.cache_oblivious + (bool) + r- + + Enable / Disable cache-oblivious large allocation + alignment, for large requests with no alignment constraints. If this + feature is disabled, all large allocations are page-aligned as an + implementation artifact, which can severely harm CPU cache utilization. + However, the cache-oblivious layout comes at the cost of one extra page + per large allocation, which in the most extreme case increases physical + memory usage for the 16 KiB size class to 20 KiB. This option is enabled + by default. + + opt.metadata_thp diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 8054ad9c..da693559 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -33,6 +33,7 @@ extern zero_realloc_action_t opt_zero_realloc_action; extern malloc_init_t malloc_init_state; extern const char *zero_realloc_mode_names[]; extern atomic_zu_t zero_realloc_count; +extern bool opt_cache_oblivious; /* Number of CPUs. */ extern unsigned ncpus; diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h index c76d2ae5..7884e9a7 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines.h @@ -223,7 +223,7 @@ prof_sample_align(size_t orig_align) { * w/o metadata lookup. */ assert(opt_prof); - return (config_cache_oblivious && orig_align < PAGE) ? PAGE : + return (opt_cache_oblivious && orig_align < PAGE) ? PAGE : orig_align; } diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h index 91940ccd..f2be6139 100644 --- a/include/jemalloc/internal/sz.h +++ b/include/jemalloc/internal/sz.h @@ -45,15 +45,13 @@ extern size_t sz_index2size_tab[SC_NSIZES]; */ extern uint8_t sz_size2index_tab[]; -static const size_t sz_large_pad = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - PAGE -#else - 0 -#endif - ; +/* + * Padding for large allocations: PAGE when opt_cache_oblivious == true (to + * enable cache index randomization); 0 otherwise. + */ +extern size_t sz_large_pad; -extern void sz_boot(const sc_data_t *sc_data); +extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious); JEMALLOC_ALWAYS_INLINE pszind_t sz_psz2ind(size_t psz) { diff --git a/src/ctl.c b/src/ctl.c index 1c5e32ba..4fc3ad07 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -90,6 +90,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_cache_oblivious) CTL_PROTO(opt_trust_madvise) CTL_PROTO(opt_confirm_conf) CTL_PROTO(opt_hpa) @@ -395,6 +396,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("cache_oblivious"), CTL(opt_cache_oblivious)}, {NAME("trust_madvise"), CTL(opt_trust_madvise)}, {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)}, @@ -2095,6 +2097,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool) CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool) CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool) CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index 3bccac95..125682bf 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -102,6 +102,14 @@ bool opt_trust_madvise = #endif ; +bool opt_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; + zero_realloc_action_t opt_zero_realloc_action = zero_realloc_action_strict; @@ -1697,7 +1705,7 @@ malloc_init_hard_a0_locked() { prof_boot0(); } malloc_conf_init(&sc_data, bin_shard_sizes); - sz_boot(&sc_data); + sz_boot(&sc_data, opt_cache_oblivious); bin_info_boot(&sc_data, bin_shard_sizes); if (opt_stats_print) { @@ -2790,12 +2798,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { * usize can be trusted to determine szind and slab. */ alloc_ctx.szind = sz_size2index(usize); - if (config_cache_oblivious) { - alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); - } else { - /* Non page aligned must be slab allocated. */ - alloc_ctx.slab = true; - } + alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS); } else if (opt_prof) { emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx); diff --git a/src/large.c b/src/large.c index f23839f7..bd29e5c5 100644 --- a/src/large.c +++ b/src/large.c @@ -95,7 +95,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize, } if (zero) { - if (config_cache_oblivious) { + if (opt_cache_oblivious) { + assert(sz_large_pad == PAGE); /* * Zero the trailing bytes of the original allocation's * last page, since they are in an indeterminate state. diff --git a/src/stats.c b/src/stats.c index 20ff299c..7a0526c5 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1458,6 +1458,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_BOOL("abort") OPT_WRITE_BOOL("abort_conf") + OPT_WRITE_BOOL("cache_oblivious") OPT_WRITE_BOOL("confirm_conf") OPT_WRITE_BOOL("retain") OPT_WRITE_CHAR_P("dss") diff --git a/src/sz.c b/src/sz.c index 7734f394..d3115dda 100644 --- a/src/sz.c +++ b/src/sz.c @@ -1,9 +1,10 @@ #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/sz.h" JEMALLOC_ALIGNED(CACHELINE) size_t sz_pind2sz_tab[SC_NPSIZES+1]; - +size_t sz_large_pad; size_t sz_psz_quantize_floor(size_t size) { @@ -105,7 +106,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) { } void -sz_boot(const sc_data_t *sc_data) { +sz_boot(const sc_data_t *sc_data, bool cache_oblivious) { + sz_large_pad = cache_oblivious ? PAGE : 0; sz_boot_pind2sz_tab(sc_data); sz_boot_index2size_tab(sc_data); sz_boot_size2index_tab(sc_data); diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c index 27a4a7ea..e6bbd539 100644 --- a/test/unit/extent_quantize.c +++ b/test/unit/extent_quantize.c @@ -47,7 +47,7 @@ TEST_BEGIN(test_large_extent_size) { */ sz = sizeof(bool); - expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious, + expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious, &sz, NULL, 0), 0, "Unexpected mallctl failure"); sz = sizeof(unsigned); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 6f5a8f18..1fb74667 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(bool, abort_conf, always); + TEST_MALLCTL_OPT(bool, cache_oblivious, always); TEST_MALLCTL_OPT(bool, trust_madvise, always); TEST_MALLCTL_OPT(bool, confirm_conf, always); TEST_MALLCTL_OPT(const char *, metadata_thp, always);