Allow opt.tcache_max to accept small size classes.

Previously all the small size classes were cached.  However this has downsides
-- particularly when page size is greater than 4K (e.g. iOS), which will result
in much higher SMALL_MAXCLASS.

This change allows tcache_max to be set to lower values, to better control
resources taken by tcache.
This commit is contained in:
Qi Wang 2020-10-21 19:47:57 -07:00 committed by Qi Wang
parent ea32060f9c
commit bf72188f80
10 changed files with 265 additions and 33 deletions

View File

@ -264,6 +264,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/spin.c \
$(srcroot)test/unit/stats.c \
$(srcroot)test/unit/stats_print.c \
$(srcroot)test/unit/tcache_max.c \
$(srcroot)test/unit/test_hooks.c \
$(srcroot)test/unit/thread_event.c \
$(srcroot)test/unit/ticker.c \

View File

@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
</term>
<listitem><para>Maximum size class to cache in the thread-specific cache
(tcache). At a minimum, all small size classes are cached; and at a
(tcache). At a minimum, the first size class is cached; and at a
maximum, size classes up to 8 MiB can be cached. The default maximum is
32 KiB (2^15). As a convenience, this may also be set by specifying
lg_tcache_max, which will be taken to be the base-2 logarithm of the

View File

@ -20,6 +20,17 @@
*/
typedef uint16_t cache_bin_sz_t;
/*
* Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
* bug starts leaking those. Make it look like the junk pattern but be distinct
* from it.
*/
static const uintptr_t cache_bin_preceding_junk =
(uintptr_t)0x7a7a7a7a7a7a7a7aULL;
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
static const uintptr_t cache_bin_trailing_junk =
(uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
/*
* That implies the following value, for the maximum number of items in any
* individual bin. The cache bins track their bounds looking just at the low

View File

@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
tsd_slow_update(tsd);
}
JEMALLOC_ALWAYS_INLINE bool
tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
assert(ind < SC_NBINS);
bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
if (ret && bin != NULL) {
/* small size class but cache bin disabled. */
assert(ind >= nhbins);
assert((uintptr_t)(*bin->stack_head) ==
cache_bin_preceding_junk);
}
return ret;
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
size_t size, szind_t binind, bool zero, bool slow_path) {
@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
if (unlikely(arena == NULL)) {
return NULL;
}
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
/* stats and zero are handled directly by the arena. */
return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
binind, zero);
}
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
bin, binind, &tcache_hard_success);
@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
<= SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
unsigned remain = cache_bin_info_ncached_max(
&tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
arena_dalloc_small(tsd_tsdn(tsd), ptr);
return;
}
cache_bin_sz_t max = cache_bin_info_ncached_max(
&tcache_bin_info[binind]);
unsigned remain = max >> opt_lg_tcache_flush_small_div;
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret);

View File

@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
*/
*size = sizeof(void *) * 2;
for (szind_t i = 0; i < ninfos; i++) {
assert(infos[i].ncached_max > 0);
*size += infos[i].ncached_max * sizeof(void *);
}
@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
&computed_alignment);
assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
}
/*
* Leave a noticeable mark pattern on the boundaries, in case a bug
* starts leaking those. Make it look like the junk pattern but be
* distinct from it.
*/
uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
cache_bin_preceding_junk;
*cur_offset += sizeof(void *);
}
void
cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
size_t *cur_offset) {
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
cache_bin_trailing_junk;
*cur_offset += sizeof(void *);
}
void
cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
size_t *cur_offset) {
@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
(uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
assert(cache_bin_ncached_get(bin, info) == 0);
assert(cache_bin_empty_position_get(bin, info) == empty_position);
assert(bin_stack_size > 0 || empty_position == full_position);
}
bool

View File

@ -62,7 +62,9 @@ cache_bin_info_t *tcache_bin_info;
static size_t tcache_bin_alloc_size;
static size_t tcache_bin_alloc_alignment;
/* Number of cache bins enabled, including both large and small. */
unsigned nhbins;
/* Max size class to be cached (can be small or large). */
size_t tcache_maxclass;
tcaches_t *tcaches;
@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
tcache_slow->arena = NULL;
tcache_slow->dyn_alloc = mem;
memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
/*
* We reserve cache bins for all small size classes, even if some may
* not get used (i.e. bins higher than nhbins). This allows the fast
* and common paths to access cache bin metadata safely w/o worrying
* about which ones are disabled.
*/
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
size_t cur_offset = 0;
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
if (i < SC_NBINS) {
tcache_slow->lg_fill_div[i] = 1;
tcache_slow->bin_refilled[i] = false;
tcache_slow->bin_flush_delay_items[i]
= tcache_gc_item_delay_compute(i);
}
cache_bin_t *cache_bin = &tcache->bins[i];
cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
&cur_offset);
}
/*
* For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
* their cache bins are initialized to a state to safely and efficiently
* fail all fastpath alloc / free, so that no additional check around
* nhbins is needed on fastpath.
*/
for (unsigned i = nhbins; i < SC_NBINS; i++) {
/* Disabled small bins. */
cache_bin_t *cache_bin = &tcache->bins[i];
void *fake_stack = mem;
size_t fake_offset = 0;
cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
&fake_offset);
assert(tcache_small_bin_disabled(i, cache_bin));
}
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
&cur_offset);
/* Sanity check that the whole stack is used. */
assert(cur_offset == tcache_bin_alloc_size);
for (unsigned i = 0; i < SC_NBINS; i++) {
tcache_slow->bin_flush_delay_items[i]
= tcache_gc_item_delay_compute(i);
}
}
/* Initialize auto tcache (embedded in TSD). */
@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
bool
tcache_boot(tsdn_t *tsdn, base_t *base) {
tcache_maxclass = sz_s2u(opt_tcache_max);
if (tcache_maxclass < SC_SMALL_MAXCLASS) {
tcache_maxclass = SC_SMALL_MAXCLASS;
}
assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
nhbins = sz_size2index(tcache_maxclass) + 1;
@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
return true;
}
/* Initialize tcache_bin_info. */
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
nhbins * sizeof(cache_bin_info_t), CACHELINE);
/* Initialize tcache_bin_info. See comments in tcache_init(). */
unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
CACHELINE);
if (tcache_bin_info == NULL) {
return true;
}
for (szind_t i = 0; i < nhbins; i++) {
unsigned ncached_max = tcache_ncached_max_compute(i);
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
}
for (szind_t i = nhbins; i < SC_NBINS; i++) {
/* Disabled small bins. */
cache_bin_info_init(&tcache_bin_info[i], 0);
assert(tcache_small_bin_disabled(i, NULL));
}
cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
&tcache_bin_alloc_size, &tcache_bin_alloc_alignment);

View File

@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) {
unsigned arena_ind = do_arena_create(ddt, mdt);
int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
void *ps[NPS];
size_t large;
/*
* Allocate a bunch of large objects, pause the clock, deallocate every
@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) {
* [md]allocx() in a tight loop while advancing time rapidly to verify
* the ticker triggers purging.
*/
size_t tcache_max;
size_t large;
size_t sz = sizeof(size_t);
expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
0), 0, "Unexpected mallctl failure");
large = nallocx(tcache_max + 1, flags);
do_purge(arena_ind);
uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);

View File

@ -1,3 +1,3 @@
#!/bin/sh
export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024"

170
test/unit/tcache_max.c Normal file
View File

@ -0,0 +1,170 @@
#include "test/jemalloc_test.h"
enum {
alloc_option_start = 0,
use_malloc = 0,
use_mallocx,
alloc_option_end
};
enum {
dalloc_option_start = 0,
use_free = 0,
use_dallocx,
use_sdallocx,
dalloc_option_end
};
static unsigned alloc_option, dalloc_option;
static size_t tcache_max;
static void *
alloc_func(size_t sz) {
void *ret;
switch (alloc_option) {
case use_malloc:
ret = malloc(sz);
break;
case use_mallocx:
ret = mallocx(sz, 0);
break;
default:
unreachable();
}
expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure");
return ret;
}
static void
dalloc_func(void *ptr, size_t sz) {
switch (dalloc_option) {
case use_free:
free(ptr);
break;
case use_dallocx:
dallocx(ptr, 0);
break;
case use_sdallocx:
sdallocx(ptr, sz, 0);
break;
default:
unreachable();
}
}
static size_t
tcache_bytes_read(void) {
uint64_t epoch;
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
0, "Unexpected mallctl() failure");
size_t tcache_bytes;
size_t sz = sizeof(tcache_bytes);
assert_d_eq(mallctl(
"stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
&tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
return tcache_bytes;
}
static void
tcache_bytes_check_update(size_t *prev, ssize_t diff) {
size_t tcache_bytes = tcache_bytes_read();
expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
*prev += diff;
}
static void
test_tcache_bytes_alloc(size_t alloc_size) {
expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
"Unexpected tcache flush failure");
size_t usize = sz_s2u(alloc_size);
/* No change is expected if usize is outside of tcache_max range. */
bool cached = (usize <= tcache_max);
ssize_t diff = cached ? usize : 0;
void *ptr1 = alloc_func(alloc_size);
void *ptr2 = alloc_func(alloc_size);
size_t bytes = tcache_bytes_read();
dalloc_func(ptr2, alloc_size);
/* Expect tcache_bytes increase after dalloc */
tcache_bytes_check_update(&bytes, diff);
dalloc_func(ptr1, alloc_size);
/* Expect tcache_bytes increase again */
tcache_bytes_check_update(&bytes, diff);
void *ptr3 = alloc_func(alloc_size);
if (cached) {
expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
}
/* Expect tcache_bytes decrease after alloc */
tcache_bytes_check_update(&bytes, -diff);
void *ptr4 = alloc_func(alloc_size);
if (cached) {
expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
}
/* Expect tcache_bytes decrease again */
tcache_bytes_check_update(&bytes, -diff);
dalloc_func(ptr3, alloc_size);
tcache_bytes_check_update(&bytes, diff);
dalloc_func(ptr4, alloc_size);
tcache_bytes_check_update(&bytes, diff);
}
static void
test_tcache_max_impl(void) {
size_t sz;
sz = sizeof(tcache_max);
assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
&sz, NULL, 0), 0, "Unexpected mallctl() failure");
/* opt.tcache_max set to 1024 in tcache_max.sh */
expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
test_tcache_bytes_alloc(1);
test_tcache_bytes_alloc(tcache_max - 1);
test_tcache_bytes_alloc(tcache_max);
test_tcache_bytes_alloc(tcache_max + 1);
test_tcache_bytes_alloc(PAGE - 1);
test_tcache_bytes_alloc(PAGE);
test_tcache_bytes_alloc(PAGE + 1);
size_t large;
sz = sizeof(large);
assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
0), 0, "Unexpected mallctl() failure");
test_tcache_bytes_alloc(large - 1);
test_tcache_bytes_alloc(large);
test_tcache_bytes_alloc(large + 1);
}
TEST_BEGIN(test_tcache_max) {
test_skip_if(!config_stats);
test_skip_if(!opt_tcache);
for (alloc_option = alloc_option_start;
alloc_option < alloc_option_end;
alloc_option++) {
for (dalloc_option = dalloc_option_start;
dalloc_option < dalloc_option_end;
dalloc_option++) {
test_tcache_max_impl();
}
}
}
TEST_END
int
main(void) {
return test(test_tcache_max);
}

3
test/unit/tcache_max.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/sh
export MALLOC_CONF="tcache_max:1024"