From bf72188f80c59328b20441c79861f9373c22bccd Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Wed, 21 Oct 2020 19:47:57 -0700 Subject: [PATCH] Allow opt.tcache_max to accept small size classes. Previously all the small size classes were cached. However this has downsides -- particularly when page size is greater than 4K (e.g. iOS), which will result in much higher SMALL_MAXCLASS. This change allows tcache_max to be set to lower values, to better control resources taken by tcache. --- Makefile.in | 1 + doc/jemalloc.xml.in | 2 +- include/jemalloc/internal/cache_bin.h | 11 ++ include/jemalloc/internal/tcache_inlines.h | 31 +++- src/cache_bin.c | 19 +-- src/tcache.c | 52 +++++-- test/unit/arena_decay.c | 7 +- test/unit/arena_decay.sh | 2 +- test/unit/tcache_max.c | 170 +++++++++++++++++++++ test/unit/tcache_max.sh | 3 + 10 files changed, 265 insertions(+), 33 deletions(-) create mode 100644 test/unit/tcache_max.c create mode 100644 test/unit/tcache_max.sh diff --git a/Makefile.in b/Makefile.in index 0136a40e..34df2398 100644 --- a/Makefile.in +++ b/Makefile.in @@ -264,6 +264,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/spin.c \ $(srcroot)test/unit/stats.c \ $(srcroot)test/unit/stats_print.c \ + $(srcroot)test/unit/tcache_max.c \ $(srcroot)test/unit/test_hooks.c \ $(srcroot)test/unit/thread_event.c \ $(srcroot)test/unit/ticker.c \ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e5f2aa67..e24c191c 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]> r- Maximum size class to cache in the thread-specific cache - (tcache). At a minimum, all small size classes are cached; and at a + (tcache). At a minimum, the first size class is cached; and at a maximum, size classes up to 8 MiB can be cached. The default maximum is 32 KiB (2^15). As a convenience, this may also be set by specifying lg_tcache_max, which will be taken to be the base-2 logarithm of the diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 0767862c..64275f24 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -20,6 +20,17 @@ */ typedef uint16_t cache_bin_sz_t; +/* + * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a + * bug starts leaking those. Make it look like the junk pattern but be distinct + * from it. + */ +static const uintptr_t cache_bin_preceding_junk = + (uintptr_t)0x7a7a7a7a7a7a7a7aULL; +/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */ +static const uintptr_t cache_bin_trailing_junk = + (uintptr_t)0xa7a7a7a7a7a7a7a7ULL; + /* * That implies the following value, for the maximum number of items in any * individual bin. The cache bins track their bounds looking just at the low diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 1cba9186..926c852d 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) { tsd_slow_update(tsd); } +JEMALLOC_ALWAYS_INLINE bool +tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) { + assert(ind < SC_NBINS); + bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0); + if (ret && bin != NULL) { + /* small size class but cache bin disabled. */ + assert(ind >= nhbins); + assert((uintptr_t)(*bin->stack_head) == + cache_bin_preceding_junk); + } + + return ret; +} + JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, szind_t binind, bool zero, bool slow_path) { @@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, if (unlikely(arena == NULL)) { return NULL; } + if (unlikely(tcache_small_bin_disabled(binind, bin))) { + /* stats and zero are handled directly by the arena. */ + return arena_malloc_hard(tsd_tsdn(tsd), arena, size, + binind, zero); + } ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, bin, binind, &tcache_hard_success); @@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) { - assert(tcache_salloc(tsd_tsdn(tsd), ptr) - <= SC_SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS); cache_bin_t *bin = &tcache->bins[binind]; if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) { - unsigned remain = cache_bin_info_ncached_max( - &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div; + if (unlikely(tcache_small_bin_disabled(binind, bin))) { + arena_dalloc_small(tsd_tsdn(tsd), ptr); + return; + } + cache_bin_sz_t max = cache_bin_info_ncached_max( + &tcache_bin_info[binind]); + unsigned remain = max >> opt_lg_tcache_flush_small_div; tcache_bin_flush_small(tsd, tcache, bin, binind, remain); bool ret = cache_bin_dalloc_easy(bin, ptr); assert(ret); diff --git a/src/cache_bin.c b/src/cache_bin.c index 1e26c4ef..1d04b0dd 100644 --- a/src/cache_bin.c +++ b/src/cache_bin.c @@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos, */ *size = sizeof(void *) * 2; for (szind_t i = 0; i < ninfos; i++) { + assert(infos[i].ncached_max > 0); *size += infos[i].ncached_max * sizeof(void *); } @@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc, &computed_alignment); assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0); } - /* - * Leave a noticeable mark pattern on the boundaries, in case a bug - * starts leaking those. Make it look like the junk pattern but be - * distinct from it. - */ - uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL; - *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk; + + *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = + cache_bin_preceding_junk; *cur_offset += sizeof(void *); } void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc, size_t *cur_offset) { - /* Note: a7 vs. 7a above -- this tells you which pointer leaked. */ - uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL; - *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk; + *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = + cache_bin_trailing_junk; *cur_offset += sizeof(void *); } - void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc, size_t *cur_offset) { @@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc, (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size); assert(cache_bin_ncached_get(bin, info) == 0); assert(cache_bin_empty_position_get(bin, info) == empty_position); + + assert(bin_stack_size > 0 || empty_position == full_position); } bool diff --git a/src/tcache.c b/src/tcache.c index edbedf79..41a1b828 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -62,7 +62,9 @@ cache_bin_info_t *tcache_bin_info; static size_t tcache_bin_alloc_size; static size_t tcache_bin_alloc_alignment; +/* Number of cache bins enabled, including both large and small. */ unsigned nhbins; +/* Max size class to be cached (can be small or large). */ size_t tcache_maxclass; tcaches_t *tcaches; @@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, tcache_slow->arena = NULL; tcache_slow->dyn_alloc = mem; - memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins); + /* + * We reserve cache bins for all small size classes, even if some may + * not get used (i.e. bins higher than nhbins). This allows the fast + * and common paths to access cache bin metadata safely w/o worrying + * about which ones are disabled. + */ + unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins; + memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins); size_t cur_offset = 0; cache_bin_preincrement(tcache_bin_info, nhbins, mem, @@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, if (i < SC_NBINS) { tcache_slow->lg_fill_div[i] = 1; tcache_slow->bin_refilled[i] = false; + tcache_slow->bin_flush_delay_items[i] + = tcache_gc_item_delay_compute(i); } cache_bin_t *cache_bin = &tcache->bins[i]; cache_bin_init(cache_bin, &tcache_bin_info[i], mem, &cur_offset); } + /* + * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS), + * their cache bins are initialized to a state to safely and efficiently + * fail all fastpath alloc / free, so that no additional check around + * nhbins is needed on fastpath. + */ + for (unsigned i = nhbins; i < SC_NBINS; i++) { + /* Disabled small bins. */ + cache_bin_t *cache_bin = &tcache->bins[i]; + void *fake_stack = mem; + size_t fake_offset = 0; + + cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack, + &fake_offset); + assert(tcache_small_bin_disabled(i, cache_bin)); + } + cache_bin_postincrement(tcache_bin_info, nhbins, mem, &cur_offset); /* Sanity check that the whole stack is used. */ assert(cur_offset == tcache_bin_alloc_size); - for (unsigned i = 0; i < SC_NBINS; i++) { - tcache_slow->bin_flush_delay_items[i] - = tcache_gc_item_delay_compute(i); - } } /* Initialize auto tcache (embedded in TSD). */ @@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) { bool tcache_boot(tsdn_t *tsdn, base_t *base) { tcache_maxclass = sz_s2u(opt_tcache_max); - if (tcache_maxclass < SC_SMALL_MAXCLASS) { - tcache_maxclass = SC_SMALL_MAXCLASS; - } assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT); nhbins = sz_size2index(tcache_maxclass) + 1; @@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) { return true; } - /* Initialize tcache_bin_info. */ - tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, - nhbins * sizeof(cache_bin_info_t), CACHELINE); + /* Initialize tcache_bin_info. See comments in tcache_init(). */ + unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins; + size_t size = n_reserved_bins * sizeof(cache_bin_info_t); + tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size, + CACHELINE); if (tcache_bin_info == NULL) { return true; } + for (szind_t i = 0; i < nhbins; i++) { unsigned ncached_max = tcache_ncached_max_compute(i); cache_bin_info_init(&tcache_bin_info[i], ncached_max); } + for (szind_t i = nhbins; i < SC_NBINS; i++) { + /* Disabled small bins. */ + cache_bin_info_init(&tcache_bin_info[i], 0); + assert(tcache_small_bin_disabled(i, NULL)); + } + cache_bin_info_compute_alloc(tcache_bin_info, nhbins, &tcache_bin_alloc_size, &tcache_bin_alloc_alignment); diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c index 555f71ae..a2661682 100644 --- a/test/unit/arena_decay.c +++ b/test/unit/arena_decay.c @@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) { unsigned arena_ind = do_arena_create(ddt, mdt); int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE); void *ps[NPS]; - size_t large; /* * Allocate a bunch of large objects, pause the clock, deallocate every @@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) { * [md]allocx() in a tight loop while advancing time rapidly to verify * the ticker triggers purging. */ - - size_t tcache_max; + size_t large; size_t sz = sizeof(size_t); - expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL, + expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0, "Unexpected mallctl failure"); - large = nallocx(tcache_max + 1, flags); do_purge(arena_ind); uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind); diff --git a/test/unit/arena_decay.sh b/test/unit/arena_decay.sh index 45aeccf4..52f1b207 100644 --- a/test/unit/arena_decay.sh +++ b/test/unit/arena_decay.sh @@ -1,3 +1,3 @@ #!/bin/sh -export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0" +export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024" diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c new file mode 100644 index 00000000..0594ceff --- /dev/null +++ b/test/unit/tcache_max.c @@ -0,0 +1,170 @@ +#include "test/jemalloc_test.h" + +enum { + alloc_option_start = 0, + use_malloc = 0, + use_mallocx, + alloc_option_end +}; + +enum { + dalloc_option_start = 0, + use_free = 0, + use_dallocx, + use_sdallocx, + dalloc_option_end +}; + +static unsigned alloc_option, dalloc_option; +static size_t tcache_max; + +static void * +alloc_func(size_t sz) { + void *ret; + + switch (alloc_option) { + case use_malloc: + ret = malloc(sz); + break; + case use_mallocx: + ret = mallocx(sz, 0); + break; + default: + unreachable(); + } + expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure"); + + return ret; +} + +static void +dalloc_func(void *ptr, size_t sz) { + switch (dalloc_option) { + case use_free: + free(ptr); + break; + case use_dallocx: + dallocx(ptr, 0); + break; + case use_sdallocx: + sdallocx(ptr, sz, 0); + break; + default: + unreachable(); + } +} + +static size_t +tcache_bytes_read(void) { + uint64_t epoch; + assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)), + 0, "Unexpected mallctl() failure"); + + size_t tcache_bytes; + size_t sz = sizeof(tcache_bytes); + assert_d_eq(mallctl( + "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes", + &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure"); + + return tcache_bytes; +} + +static void +tcache_bytes_check_update(size_t *prev, ssize_t diff) { + size_t tcache_bytes = tcache_bytes_read(); + expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected"); + + *prev += diff; +} + +static void +test_tcache_bytes_alloc(size_t alloc_size) { + expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0, + "Unexpected tcache flush failure"); + + size_t usize = sz_s2u(alloc_size); + /* No change is expected if usize is outside of tcache_max range. */ + bool cached = (usize <= tcache_max); + ssize_t diff = cached ? usize : 0; + + void *ptr1 = alloc_func(alloc_size); + void *ptr2 = alloc_func(alloc_size); + + size_t bytes = tcache_bytes_read(); + dalloc_func(ptr2, alloc_size); + /* Expect tcache_bytes increase after dalloc */ + tcache_bytes_check_update(&bytes, diff); + + dalloc_func(ptr1, alloc_size); + /* Expect tcache_bytes increase again */ + tcache_bytes_check_update(&bytes, diff); + + void *ptr3 = alloc_func(alloc_size); + if (cached) { + expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr"); + } + /* Expect tcache_bytes decrease after alloc */ + tcache_bytes_check_update(&bytes, -diff); + + void *ptr4 = alloc_func(alloc_size); + if (cached) { + expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr"); + } + /* Expect tcache_bytes decrease again */ + tcache_bytes_check_update(&bytes, -diff); + + dalloc_func(ptr3, alloc_size); + tcache_bytes_check_update(&bytes, diff); + dalloc_func(ptr4, alloc_size); + tcache_bytes_check_update(&bytes, diff); +} + +static void +test_tcache_max_impl(void) { + size_t sz; + sz = sizeof(tcache_max); + assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, + &sz, NULL, 0), 0, "Unexpected mallctl() failure"); + + /* opt.tcache_max set to 1024 in tcache_max.sh */ + expect_zu_eq(tcache_max, 1024, "tcache_max not expected"); + + test_tcache_bytes_alloc(1); + test_tcache_bytes_alloc(tcache_max - 1); + test_tcache_bytes_alloc(tcache_max); + test_tcache_bytes_alloc(tcache_max + 1); + + test_tcache_bytes_alloc(PAGE - 1); + test_tcache_bytes_alloc(PAGE); + test_tcache_bytes_alloc(PAGE + 1); + + size_t large; + sz = sizeof(large); + assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, + 0), 0, "Unexpected mallctl() failure"); + + test_tcache_bytes_alloc(large - 1); + test_tcache_bytes_alloc(large); + test_tcache_bytes_alloc(large + 1); +} + +TEST_BEGIN(test_tcache_max) { + test_skip_if(!config_stats); + test_skip_if(!opt_tcache); + + for (alloc_option = alloc_option_start; + alloc_option < alloc_option_end; + alloc_option++) { + for (dalloc_option = dalloc_option_start; + dalloc_option < dalloc_option_end; + dalloc_option++) { + test_tcache_max_impl(); + } + } +} +TEST_END + +int +main(void) { + return test(test_tcache_max); +} diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh new file mode 100644 index 00000000..4480d733 --- /dev/null +++ b/test/unit/tcache_max.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +export MALLOC_CONF="tcache_max:1024"