Implement opt.cache_oblivious.
Keep config.cache_oblivious for now to remain backward-compatible.
This commit is contained in:
parent
8c5e5f50a2
commit
a11be50332
14
INSTALL.md
14
INSTALL.md
@ -185,13 +185,13 @@ any of the following arguments (not a definitive list) to 'configure':
|
|||||||
|
|
||||||
* `--disable-cache-oblivious`
|
* `--disable-cache-oblivious`
|
||||||
|
|
||||||
Disable cache-oblivious large allocation alignment for large allocation
|
Disable cache-oblivious large allocation alignment by default, for large
|
||||||
requests with no alignment constraints. If this feature is disabled, all
|
allocation requests with no alignment constraints. If this feature is
|
||||||
large allocations are page-aligned as an implementation artifact, which can
|
disabled, all large allocations are page-aligned as an implementation
|
||||||
severely harm CPU cache utilization. However, the cache-oblivious layout
|
artifact, which can severely harm CPU cache utilization. However, the
|
||||||
comes at the cost of one extra page per large allocation, which in the
|
cache-oblivious layout comes at the cost of one extra page per large
|
||||||
most extreme case increases physical memory usage for the 16 KiB size class
|
allocation, which in the most extreme case increases physical memory usage
|
||||||
to 20 KiB.
|
for the 16 KiB size class to 20 KiB.
|
||||||
|
|
||||||
* `--disable-syscall`
|
* `--disable-syscall`
|
||||||
|
|
||||||
|
@ -936,6 +936,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
|
|||||||
</para></listitem>
|
</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="opt.cache_oblivious">
|
||||||
|
<term>
|
||||||
|
<mallctl>opt.cache_oblivious</mallctl>
|
||||||
|
(<type>bool</type>)
|
||||||
|
<literal>r-</literal>
|
||||||
|
</term>
|
||||||
|
<listitem><para>Enable / Disable cache-oblivious large allocation
|
||||||
|
alignment, for large requests with no alignment constraints. If this
|
||||||
|
feature is disabled, all large allocations are page-aligned as an
|
||||||
|
implementation artifact, which can severely harm CPU cache utilization.
|
||||||
|
However, the cache-oblivious layout comes at the cost of one extra page
|
||||||
|
per large allocation, which in the most extreme case increases physical
|
||||||
|
memory usage for the 16 KiB size class to 20 KiB. This option is enabled
|
||||||
|
by default.</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry id="opt.metadata_thp">
|
<varlistentry id="opt.metadata_thp">
|
||||||
<term>
|
<term>
|
||||||
<mallctl>opt.metadata_thp</mallctl>
|
<mallctl>opt.metadata_thp</mallctl>
|
||||||
|
@ -33,6 +33,7 @@ extern zero_realloc_action_t opt_zero_realloc_action;
|
|||||||
extern malloc_init_t malloc_init_state;
|
extern malloc_init_t malloc_init_state;
|
||||||
extern const char *zero_realloc_mode_names[];
|
extern const char *zero_realloc_mode_names[];
|
||||||
extern atomic_zu_t zero_realloc_count;
|
extern atomic_zu_t zero_realloc_count;
|
||||||
|
extern bool opt_cache_oblivious;
|
||||||
|
|
||||||
/* Number of CPUs. */
|
/* Number of CPUs. */
|
||||||
extern unsigned ncpus;
|
extern unsigned ncpus;
|
||||||
|
@ -223,7 +223,7 @@ prof_sample_align(size_t orig_align) {
|
|||||||
* w/o metadata lookup.
|
* w/o metadata lookup.
|
||||||
*/
|
*/
|
||||||
assert(opt_prof);
|
assert(opt_prof);
|
||||||
return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
|
return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
|
||||||
orig_align;
|
orig_align;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,15 +45,13 @@ extern size_t sz_index2size_tab[SC_NSIZES];
|
|||||||
*/
|
*/
|
||||||
extern uint8_t sz_size2index_tab[];
|
extern uint8_t sz_size2index_tab[];
|
||||||
|
|
||||||
static const size_t sz_large_pad =
|
/*
|
||||||
#ifdef JEMALLOC_CACHE_OBLIVIOUS
|
* Padding for large allocations: PAGE when opt_cache_oblivious == true (to
|
||||||
PAGE
|
* enable cache index randomization); 0 otherwise.
|
||||||
#else
|
*/
|
||||||
0
|
extern size_t sz_large_pad;
|
||||||
#endif
|
|
||||||
;
|
|
||||||
|
|
||||||
extern void sz_boot(const sc_data_t *sc_data);
|
extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE pszind_t
|
JEMALLOC_ALWAYS_INLINE pszind_t
|
||||||
sz_psz2ind(size_t psz) {
|
sz_psz2ind(size_t psz) {
|
||||||
|
@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
|
|||||||
CTL_PROTO(config_xmalloc)
|
CTL_PROTO(config_xmalloc)
|
||||||
CTL_PROTO(opt_abort)
|
CTL_PROTO(opt_abort)
|
||||||
CTL_PROTO(opt_abort_conf)
|
CTL_PROTO(opt_abort_conf)
|
||||||
|
CTL_PROTO(opt_cache_oblivious)
|
||||||
CTL_PROTO(opt_trust_madvise)
|
CTL_PROTO(opt_trust_madvise)
|
||||||
CTL_PROTO(opt_confirm_conf)
|
CTL_PROTO(opt_confirm_conf)
|
||||||
CTL_PROTO(opt_hpa)
|
CTL_PROTO(opt_hpa)
|
||||||
@ -395,6 +396,7 @@ static const ctl_named_node_t config_node[] = {
|
|||||||
static const ctl_named_node_t opt_node[] = {
|
static const ctl_named_node_t opt_node[] = {
|
||||||
{NAME("abort"), CTL(opt_abort)},
|
{NAME("abort"), CTL(opt_abort)},
|
||||||
{NAME("abort_conf"), CTL(opt_abort_conf)},
|
{NAME("abort_conf"), CTL(opt_abort_conf)},
|
||||||
|
{NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
|
||||||
{NAME("trust_madvise"), CTL(opt_trust_madvise)},
|
{NAME("trust_madvise"), CTL(opt_trust_madvise)},
|
||||||
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
|
{NAME("confirm_conf"), CTL(opt_confirm_conf)},
|
||||||
{NAME("hpa"), CTL(opt_hpa)},
|
{NAME("hpa"), CTL(opt_hpa)},
|
||||||
@ -2095,6 +2097,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
|
|||||||
|
|
||||||
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
||||||
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
|
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
|
||||||
|
CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
|
||||||
CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
|
CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
|
||||||
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
|
CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
|
||||||
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
|
CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
|
||||||
|
@ -102,6 +102,14 @@ bool opt_trust_madvise =
|
|||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
|
bool opt_cache_oblivious =
|
||||||
|
#ifdef JEMALLOC_CACHE_OBLIVIOUS
|
||||||
|
true
|
||||||
|
#else
|
||||||
|
false
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
zero_realloc_action_t opt_zero_realloc_action =
|
zero_realloc_action_t opt_zero_realloc_action =
|
||||||
zero_realloc_action_strict;
|
zero_realloc_action_strict;
|
||||||
|
|
||||||
@ -1697,7 +1705,7 @@ malloc_init_hard_a0_locked() {
|
|||||||
prof_boot0();
|
prof_boot0();
|
||||||
}
|
}
|
||||||
malloc_conf_init(&sc_data, bin_shard_sizes);
|
malloc_conf_init(&sc_data, bin_shard_sizes);
|
||||||
sz_boot(&sc_data);
|
sz_boot(&sc_data, opt_cache_oblivious);
|
||||||
bin_info_boot(&sc_data, bin_shard_sizes);
|
bin_info_boot(&sc_data, bin_shard_sizes);
|
||||||
|
|
||||||
if (opt_stats_print) {
|
if (opt_stats_print) {
|
||||||
@ -2790,12 +2798,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
|
|||||||
* usize can be trusted to determine szind and slab.
|
* usize can be trusted to determine szind and slab.
|
||||||
*/
|
*/
|
||||||
alloc_ctx.szind = sz_size2index(usize);
|
alloc_ctx.szind = sz_size2index(usize);
|
||||||
if (config_cache_oblivious) {
|
|
||||||
alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
|
alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
|
||||||
} else {
|
|
||||||
/* Non page aligned must be slab allocated. */
|
|
||||||
alloc_ctx.slab = true;
|
|
||||||
}
|
|
||||||
} else if (opt_prof) {
|
} else if (opt_prof) {
|
||||||
emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
|
emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
|
||||||
ptr, &alloc_ctx);
|
ptr, &alloc_ctx);
|
||||||
|
@ -95,7 +95,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (zero) {
|
if (zero) {
|
||||||
if (config_cache_oblivious) {
|
if (opt_cache_oblivious) {
|
||||||
|
assert(sz_large_pad == PAGE);
|
||||||
/*
|
/*
|
||||||
* Zero the trailing bytes of the original allocation's
|
* Zero the trailing bytes of the original allocation's
|
||||||
* last page, since they are in an indeterminate state.
|
* last page, since they are in an indeterminate state.
|
||||||
|
@ -1458,6 +1458,7 @@ stats_general_print(emitter_t *emitter) {
|
|||||||
|
|
||||||
OPT_WRITE_BOOL("abort")
|
OPT_WRITE_BOOL("abort")
|
||||||
OPT_WRITE_BOOL("abort_conf")
|
OPT_WRITE_BOOL("abort_conf")
|
||||||
|
OPT_WRITE_BOOL("cache_oblivious")
|
||||||
OPT_WRITE_BOOL("confirm_conf")
|
OPT_WRITE_BOOL("confirm_conf")
|
||||||
OPT_WRITE_BOOL("retain")
|
OPT_WRITE_BOOL("retain")
|
||||||
OPT_WRITE_CHAR_P("dss")
|
OPT_WRITE_CHAR_P("dss")
|
||||||
|
6
src/sz.c
6
src/sz.c
@ -1,9 +1,10 @@
|
|||||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||||
|
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
||||||
#include "jemalloc/internal/sz.h"
|
#include "jemalloc/internal/sz.h"
|
||||||
|
|
||||||
JEMALLOC_ALIGNED(CACHELINE)
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
size_t sz_pind2sz_tab[SC_NPSIZES+1];
|
size_t sz_pind2sz_tab[SC_NPSIZES+1];
|
||||||
|
size_t sz_large_pad;
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
sz_psz_quantize_floor(size_t size) {
|
sz_psz_quantize_floor(size_t size) {
|
||||||
@ -105,7 +106,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
sz_boot(const sc_data_t *sc_data) {
|
sz_boot(const sc_data_t *sc_data, bool cache_oblivious) {
|
||||||
|
sz_large_pad = cache_oblivious ? PAGE : 0;
|
||||||
sz_boot_pind2sz_tab(sc_data);
|
sz_boot_pind2sz_tab(sc_data);
|
||||||
sz_boot_index2size_tab(sc_data);
|
sz_boot_index2size_tab(sc_data);
|
||||||
sz_boot_size2index_tab(sc_data);
|
sz_boot_size2index_tab(sc_data);
|
||||||
|
@ -47,7 +47,7 @@ TEST_BEGIN(test_large_extent_size) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
sz = sizeof(bool);
|
sz = sizeof(bool);
|
||||||
expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
|
expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
|
||||||
&sz, NULL, 0), 0, "Unexpected mallctl failure");
|
&sz, NULL, 0), 0, "Unexpected mallctl failure");
|
||||||
|
|
||||||
sz = sizeof(unsigned);
|
sz = sizeof(unsigned);
|
||||||
|
@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
|
|||||||
|
|
||||||
TEST_MALLCTL_OPT(bool, abort, always);
|
TEST_MALLCTL_OPT(bool, abort, always);
|
||||||
TEST_MALLCTL_OPT(bool, abort_conf, always);
|
TEST_MALLCTL_OPT(bool, abort_conf, always);
|
||||||
|
TEST_MALLCTL_OPT(bool, cache_oblivious, always);
|
||||||
TEST_MALLCTL_OPT(bool, trust_madvise, always);
|
TEST_MALLCTL_OPT(bool, trust_madvise, always);
|
||||||
TEST_MALLCTL_OPT(bool, confirm_conf, always);
|
TEST_MALLCTL_OPT(bool, confirm_conf, always);
|
||||||
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
|
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
|
||||||
|
Loading…
Reference in New Issue
Block a user