Allow opt.tcache_max to accept small size classes.
Previously all the small size classes were cached. However this has downsides -- particularly when page size is greater than 4K (e.g. iOS), which will result in much higher SMALL_MAXCLASS. This change allows tcache_max to be set to lower values, to better control resources taken by tcache.
This commit is contained in:
parent
ea32060f9c
commit
bf72188f80
@ -264,6 +264,7 @@ TESTS_UNIT := \
|
||||
$(srcroot)test/unit/spin.c \
|
||||
$(srcroot)test/unit/stats.c \
|
||||
$(srcroot)test/unit/stats_print.c \
|
||||
$(srcroot)test/unit/tcache_max.c \
|
||||
$(srcroot)test/unit/test_hooks.c \
|
||||
$(srcroot)test/unit/thread_event.c \
|
||||
$(srcroot)test/unit/ticker.c \
|
||||
|
@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
|
||||
<literal>r-</literal>
|
||||
</term>
|
||||
<listitem><para>Maximum size class to cache in the thread-specific cache
|
||||
(tcache). At a minimum, all small size classes are cached; and at a
|
||||
(tcache). At a minimum, the first size class is cached; and at a
|
||||
maximum, size classes up to 8 MiB can be cached. The default maximum is
|
||||
32 KiB (2^15). As a convenience, this may also be set by specifying
|
||||
lg_tcache_max, which will be taken to be the base-2 logarithm of the
|
||||
|
@ -20,6 +20,17 @@
|
||||
*/
|
||||
typedef uint16_t cache_bin_sz_t;
|
||||
|
||||
/*
|
||||
* Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
|
||||
* bug starts leaking those. Make it look like the junk pattern but be distinct
|
||||
* from it.
|
||||
*/
|
||||
static const uintptr_t cache_bin_preceding_junk =
|
||||
(uintptr_t)0x7a7a7a7a7a7a7a7aULL;
|
||||
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
|
||||
static const uintptr_t cache_bin_trailing_junk =
|
||||
(uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
|
||||
|
||||
/*
|
||||
* That implies the following value, for the maximum number of items in any
|
||||
* individual bin. The cache bins track their bounds looking just at the low
|
||||
|
@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
|
||||
tsd_slow_update(tsd);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
|
||||
assert(ind < SC_NBINS);
|
||||
bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
|
||||
if (ret && bin != NULL) {
|
||||
/* small size class but cache bin disabled. */
|
||||
assert(ind >= nhbins);
|
||||
assert((uintptr_t)(*bin->stack_head) ==
|
||||
cache_bin_preceding_junk);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
size_t size, szind_t binind, bool zero, bool slow_path) {
|
||||
@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
if (unlikely(arena == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
|
||||
/* stats and zero are handled directly by the arena. */
|
||||
return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
|
||||
binind, zero);
|
||||
}
|
||||
|
||||
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
|
||||
bin, binind, &tcache_hard_success);
|
||||
@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
bool slow_path) {
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
|
||||
<= SC_SMALL_MAXCLASS);
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
|
||||
|
||||
cache_bin_t *bin = &tcache->bins[binind];
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
|
||||
unsigned remain = cache_bin_info_ncached_max(
|
||||
&tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
|
||||
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
|
||||
arena_dalloc_small(tsd_tsdn(tsd), ptr);
|
||||
return;
|
||||
}
|
||||
cache_bin_sz_t max = cache_bin_info_ncached_max(
|
||||
&tcache_bin_info[binind]);
|
||||
unsigned remain = max >> opt_lg_tcache_flush_small_div;
|
||||
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
|
||||
bool ret = cache_bin_dalloc_easy(bin, ptr);
|
||||
assert(ret);
|
||||
|
@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
|
||||
*/
|
||||
*size = sizeof(void *) * 2;
|
||||
for (szind_t i = 0; i < ninfos; i++) {
|
||||
assert(infos[i].ncached_max > 0);
|
||||
*size += infos[i].ncached_max * sizeof(void *);
|
||||
}
|
||||
|
||||
@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
|
||||
&computed_alignment);
|
||||
assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
|
||||
}
|
||||
/*
|
||||
* Leave a noticeable mark pattern on the boundaries, in case a bug
|
||||
* starts leaking those. Make it look like the junk pattern but be
|
||||
* distinct from it.
|
||||
*/
|
||||
uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
|
||||
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
|
||||
|
||||
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
|
||||
cache_bin_preceding_junk;
|
||||
*cur_offset += sizeof(void *);
|
||||
}
|
||||
|
||||
void
|
||||
cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
|
||||
size_t *cur_offset) {
|
||||
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
|
||||
uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
|
||||
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
|
||||
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
|
||||
cache_bin_trailing_junk;
|
||||
*cur_offset += sizeof(void *);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
|
||||
size_t *cur_offset) {
|
||||
@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
|
||||
(uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
|
||||
assert(cache_bin_ncached_get(bin, info) == 0);
|
||||
assert(cache_bin_empty_position_get(bin, info) == empty_position);
|
||||
|
||||
assert(bin_stack_size > 0 || empty_position == full_position);
|
||||
}
|
||||
|
||||
bool
|
||||
|
52
src/tcache.c
52
src/tcache.c
@ -62,7 +62,9 @@ cache_bin_info_t *tcache_bin_info;
|
||||
static size_t tcache_bin_alloc_size;
|
||||
static size_t tcache_bin_alloc_alignment;
|
||||
|
||||
/* Number of cache bins enabled, including both large and small. */
|
||||
unsigned nhbins;
|
||||
/* Max size class to be cached (can be small or large). */
|
||||
size_t tcache_maxclass;
|
||||
|
||||
tcaches_t *tcaches;
|
||||
@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
|
||||
tcache_slow->arena = NULL;
|
||||
tcache_slow->dyn_alloc = mem;
|
||||
|
||||
memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
|
||||
/*
|
||||
* We reserve cache bins for all small size classes, even if some may
|
||||
* not get used (i.e. bins higher than nhbins). This allows the fast
|
||||
* and common paths to access cache bin metadata safely w/o worrying
|
||||
* about which ones are disabled.
|
||||
*/
|
||||
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
|
||||
memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
|
||||
|
||||
size_t cur_offset = 0;
|
||||
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
|
||||
@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
|
||||
if (i < SC_NBINS) {
|
||||
tcache_slow->lg_fill_div[i] = 1;
|
||||
tcache_slow->bin_refilled[i] = false;
|
||||
tcache_slow->bin_flush_delay_items[i]
|
||||
= tcache_gc_item_delay_compute(i);
|
||||
}
|
||||
cache_bin_t *cache_bin = &tcache->bins[i];
|
||||
cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
|
||||
&cur_offset);
|
||||
}
|
||||
/*
|
||||
* For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
|
||||
* their cache bins are initialized to a state to safely and efficiently
|
||||
* fail all fastpath alloc / free, so that no additional check around
|
||||
* nhbins is needed on fastpath.
|
||||
*/
|
||||
for (unsigned i = nhbins; i < SC_NBINS; i++) {
|
||||
/* Disabled small bins. */
|
||||
cache_bin_t *cache_bin = &tcache->bins[i];
|
||||
void *fake_stack = mem;
|
||||
size_t fake_offset = 0;
|
||||
|
||||
cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
|
||||
&fake_offset);
|
||||
assert(tcache_small_bin_disabled(i, cache_bin));
|
||||
}
|
||||
|
||||
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
|
||||
&cur_offset);
|
||||
/* Sanity check that the whole stack is used. */
|
||||
assert(cur_offset == tcache_bin_alloc_size);
|
||||
for (unsigned i = 0; i < SC_NBINS; i++) {
|
||||
tcache_slow->bin_flush_delay_items[i]
|
||||
= tcache_gc_item_delay_compute(i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize auto tcache (embedded in TSD). */
|
||||
@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
|
||||
bool
|
||||
tcache_boot(tsdn_t *tsdn, base_t *base) {
|
||||
tcache_maxclass = sz_s2u(opt_tcache_max);
|
||||
if (tcache_maxclass < SC_SMALL_MAXCLASS) {
|
||||
tcache_maxclass = SC_SMALL_MAXCLASS;
|
||||
}
|
||||
assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
|
||||
nhbins = sz_size2index(tcache_maxclass) + 1;
|
||||
|
||||
@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Initialize tcache_bin_info. */
|
||||
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
|
||||
nhbins * sizeof(cache_bin_info_t), CACHELINE);
|
||||
/* Initialize tcache_bin_info. See comments in tcache_init(). */
|
||||
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
|
||||
size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
|
||||
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
|
||||
CACHELINE);
|
||||
if (tcache_bin_info == NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (szind_t i = 0; i < nhbins; i++) {
|
||||
unsigned ncached_max = tcache_ncached_max_compute(i);
|
||||
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
|
||||
}
|
||||
for (szind_t i = nhbins; i < SC_NBINS; i++) {
|
||||
/* Disabled small bins. */
|
||||
cache_bin_info_init(&tcache_bin_info[i], 0);
|
||||
assert(tcache_small_bin_disabled(i, NULL));
|
||||
}
|
||||
|
||||
cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
|
||||
&tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
|
||||
|
||||
|
@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) {
|
||||
unsigned arena_ind = do_arena_create(ddt, mdt);
|
||||
int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
|
||||
void *ps[NPS];
|
||||
size_t large;
|
||||
|
||||
/*
|
||||
* Allocate a bunch of large objects, pause the clock, deallocate every
|
||||
@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) {
|
||||
* [md]allocx() in a tight loop while advancing time rapidly to verify
|
||||
* the ticker triggers purging.
|
||||
*/
|
||||
|
||||
size_t tcache_max;
|
||||
size_t large;
|
||||
size_t sz = sizeof(size_t);
|
||||
expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
|
||||
expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
|
||||
0), 0, "Unexpected mallctl failure");
|
||||
large = nallocx(tcache_max + 1, flags);
|
||||
|
||||
do_purge(arena_ind);
|
||||
uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
|
||||
|
@ -1,3 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
|
||||
export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024"
|
||||
|
170
test/unit/tcache_max.c
Normal file
170
test/unit/tcache_max.c
Normal file
@ -0,0 +1,170 @@
|
||||
#include "test/jemalloc_test.h"
|
||||
|
||||
enum {
|
||||
alloc_option_start = 0,
|
||||
use_malloc = 0,
|
||||
use_mallocx,
|
||||
alloc_option_end
|
||||
};
|
||||
|
||||
enum {
|
||||
dalloc_option_start = 0,
|
||||
use_free = 0,
|
||||
use_dallocx,
|
||||
use_sdallocx,
|
||||
dalloc_option_end
|
||||
};
|
||||
|
||||
static unsigned alloc_option, dalloc_option;
|
||||
static size_t tcache_max;
|
||||
|
||||
static void *
|
||||
alloc_func(size_t sz) {
|
||||
void *ret;
|
||||
|
||||
switch (alloc_option) {
|
||||
case use_malloc:
|
||||
ret = malloc(sz);
|
||||
break;
|
||||
case use_mallocx:
|
||||
ret = mallocx(sz, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
dalloc_func(void *ptr, size_t sz) {
|
||||
switch (dalloc_option) {
|
||||
case use_free:
|
||||
free(ptr);
|
||||
break;
|
||||
case use_dallocx:
|
||||
dallocx(ptr, 0);
|
||||
break;
|
||||
case use_sdallocx:
|
||||
sdallocx(ptr, sz, 0);
|
||||
break;
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
tcache_bytes_read(void) {
|
||||
uint64_t epoch;
|
||||
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
|
||||
0, "Unexpected mallctl() failure");
|
||||
|
||||
size_t tcache_bytes;
|
||||
size_t sz = sizeof(tcache_bytes);
|
||||
assert_d_eq(mallctl(
|
||||
"stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
|
||||
&tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
|
||||
|
||||
return tcache_bytes;
|
||||
}
|
||||
|
||||
static void
|
||||
tcache_bytes_check_update(size_t *prev, ssize_t diff) {
|
||||
size_t tcache_bytes = tcache_bytes_read();
|
||||
expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
|
||||
|
||||
*prev += diff;
|
||||
}
|
||||
|
||||
static void
|
||||
test_tcache_bytes_alloc(size_t alloc_size) {
|
||||
expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
|
||||
"Unexpected tcache flush failure");
|
||||
|
||||
size_t usize = sz_s2u(alloc_size);
|
||||
/* No change is expected if usize is outside of tcache_max range. */
|
||||
bool cached = (usize <= tcache_max);
|
||||
ssize_t diff = cached ? usize : 0;
|
||||
|
||||
void *ptr1 = alloc_func(alloc_size);
|
||||
void *ptr2 = alloc_func(alloc_size);
|
||||
|
||||
size_t bytes = tcache_bytes_read();
|
||||
dalloc_func(ptr2, alloc_size);
|
||||
/* Expect tcache_bytes increase after dalloc */
|
||||
tcache_bytes_check_update(&bytes, diff);
|
||||
|
||||
dalloc_func(ptr1, alloc_size);
|
||||
/* Expect tcache_bytes increase again */
|
||||
tcache_bytes_check_update(&bytes, diff);
|
||||
|
||||
void *ptr3 = alloc_func(alloc_size);
|
||||
if (cached) {
|
||||
expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
|
||||
}
|
||||
/* Expect tcache_bytes decrease after alloc */
|
||||
tcache_bytes_check_update(&bytes, -diff);
|
||||
|
||||
void *ptr4 = alloc_func(alloc_size);
|
||||
if (cached) {
|
||||
expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
|
||||
}
|
||||
/* Expect tcache_bytes decrease again */
|
||||
tcache_bytes_check_update(&bytes, -diff);
|
||||
|
||||
dalloc_func(ptr3, alloc_size);
|
||||
tcache_bytes_check_update(&bytes, diff);
|
||||
dalloc_func(ptr4, alloc_size);
|
||||
tcache_bytes_check_update(&bytes, diff);
|
||||
}
|
||||
|
||||
static void
|
||||
test_tcache_max_impl(void) {
|
||||
size_t sz;
|
||||
sz = sizeof(tcache_max);
|
||||
assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
|
||||
&sz, NULL, 0), 0, "Unexpected mallctl() failure");
|
||||
|
||||
/* opt.tcache_max set to 1024 in tcache_max.sh */
|
||||
expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
|
||||
|
||||
test_tcache_bytes_alloc(1);
|
||||
test_tcache_bytes_alloc(tcache_max - 1);
|
||||
test_tcache_bytes_alloc(tcache_max);
|
||||
test_tcache_bytes_alloc(tcache_max + 1);
|
||||
|
||||
test_tcache_bytes_alloc(PAGE - 1);
|
||||
test_tcache_bytes_alloc(PAGE);
|
||||
test_tcache_bytes_alloc(PAGE + 1);
|
||||
|
||||
size_t large;
|
||||
sz = sizeof(large);
|
||||
assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
|
||||
0), 0, "Unexpected mallctl() failure");
|
||||
|
||||
test_tcache_bytes_alloc(large - 1);
|
||||
test_tcache_bytes_alloc(large);
|
||||
test_tcache_bytes_alloc(large + 1);
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_tcache_max) {
|
||||
test_skip_if(!config_stats);
|
||||
test_skip_if(!opt_tcache);
|
||||
|
||||
for (alloc_option = alloc_option_start;
|
||||
alloc_option < alloc_option_end;
|
||||
alloc_option++) {
|
||||
for (dalloc_option = dalloc_option_start;
|
||||
dalloc_option < dalloc_option_end;
|
||||
dalloc_option++) {
|
||||
test_tcache_max_impl();
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_END
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(test_tcache_max);
|
||||
}
|
3
test/unit/tcache_max.sh
Normal file
3
test/unit/tcache_max.sh
Normal file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
export MALLOC_CONF="tcache_max:1024"
|
Loading…
Reference in New Issue
Block a user