diff --git a/Makefile.in b/Makefile.in
index 0136a40e..34df2398 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -264,6 +264,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/spin.c \
$(srcroot)test/unit/stats.c \
$(srcroot)test/unit/stats_print.c \
+ $(srcroot)test/unit/tcache_max.c \
$(srcroot)test/unit/test_hooks.c \
$(srcroot)test/unit/thread_event.c \
$(srcroot)test/unit/ticker.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e5f2aa67..e24c191c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]>
r-
Maximum size class to cache in the thread-specific cache
- (tcache). At a minimum, all small size classes are cached; and at a
+ (tcache). At a minimum, the first size class is cached; and at a
maximum, size classes up to 8 MiB can be cached. The default maximum is
32 KiB (2^15). As a convenience, this may also be set by specifying
lg_tcache_max, which will be taken to be the base-2 logarithm of the
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0767862c..64275f24 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -20,6 +20,17 @@
*/
typedef uint16_t cache_bin_sz_t;
+/*
+ * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
+ * bug starts leaking those. Make it look like the junk pattern but be distinct
+ * from it.
+ */
+static const uintptr_t cache_bin_preceding_junk =
+ (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk =
+ (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+
/*
* That implies the following value, for the maximum number of items in any
* individual bin. The cache bins track their bounds looking just at the low
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 1cba9186..926c852d 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
tsd_slow_update(tsd);
}
+JEMALLOC_ALWAYS_INLINE bool
+tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
+ assert(ind < SC_NBINS);
+ bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
+ if (ret && bin != NULL) {
+ /* small size class but cache bin disabled. */
+ assert(ind >= nhbins);
+ assert((uintptr_t)(*bin->stack_head) ==
+ cache_bin_preceding_junk);
+ }
+
+ return ret;
+}
+
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
if (unlikely(arena == NULL)) {
return NULL;
}
+ if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+ /* stats and zero are handled directly by the arena. */
+ return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
+ binind, zero);
+ }
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
bin, binind, &tcache_hard_success);
@@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
- assert(tcache_salloc(tsd_tsdn(tsd), ptr)
- <= SC_SMALL_MAXCLASS);
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
cache_bin_t *bin = &tcache->bins[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
- unsigned remain = cache_bin_info_ncached_max(
- &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
+ if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+ arena_dalloc_small(tsd_tsdn(tsd), ptr);
+ return;
+ }
+ cache_bin_sz_t max = cache_bin_info_ncached_max(
+ &tcache_bin_info[binind]);
+ unsigned remain = max >> opt_lg_tcache_flush_small_div;
tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 1e26c4ef..1d04b0dd 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
*/
*size = sizeof(void *) * 2;
for (szind_t i = 0; i < ninfos; i++) {
+ assert(infos[i].ncached_max > 0);
*size += infos[i].ncached_max * sizeof(void *);
}
@@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
&computed_alignment);
assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
}
- /*
- * Leave a noticeable mark pattern on the boundaries, in case a bug
- * starts leaking those. Make it look like the junk pattern but be
- * distinct from it.
- */
- uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
- *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+
+ *(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+ cache_bin_preceding_junk;
*cur_offset += sizeof(void *);
}
void
cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
size_t *cur_offset) {
- /* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
- uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
- *(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+ *(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+ cache_bin_trailing_junk;
*cur_offset += sizeof(void *);
}
-
void
cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
size_t *cur_offset) {
@@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
(uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
assert(cache_bin_ncached_get(bin, info) == 0);
assert(cache_bin_empty_position_get(bin, info) == empty_position);
+
+ assert(bin_stack_size > 0 || empty_position == full_position);
}
bool
diff --git a/src/tcache.c b/src/tcache.c
index edbedf79..41a1b828 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -62,7 +62,9 @@ cache_bin_info_t *tcache_bin_info;
static size_t tcache_bin_alloc_size;
static size_t tcache_bin_alloc_alignment;
+/* Number of cache bins enabled, including both large and small. */
unsigned nhbins;
+/* Max size class to be cached (can be small or large). */
size_t tcache_maxclass;
tcaches_t *tcaches;
@@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
tcache_slow->arena = NULL;
tcache_slow->dyn_alloc = mem;
- memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
+ /*
+ * We reserve cache bins for all small size classes, even if some may
+ * not get used (i.e. bins higher than nhbins). This allows the fast
+ * and common paths to access cache bin metadata safely w/o worrying
+ * about which ones are disabled.
+ */
+ unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+ memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
size_t cur_offset = 0;
cache_bin_preincrement(tcache_bin_info, nhbins, mem,
@@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
if (i < SC_NBINS) {
tcache_slow->lg_fill_div[i] = 1;
tcache_slow->bin_refilled[i] = false;
+ tcache_slow->bin_flush_delay_items[i]
+ = tcache_gc_item_delay_compute(i);
}
cache_bin_t *cache_bin = &tcache->bins[i];
cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
&cur_offset);
}
+ /*
+ * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
+ * their cache bins are initialized to a state to safely and efficiently
+ * fail all fastpath alloc / free, so that no additional check around
+ * nhbins is needed on fastpath.
+ */
+ for (unsigned i = nhbins; i < SC_NBINS; i++) {
+ /* Disabled small bins. */
+ cache_bin_t *cache_bin = &tcache->bins[i];
+ void *fake_stack = mem;
+ size_t fake_offset = 0;
+
+ cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
+ &fake_offset);
+ assert(tcache_small_bin_disabled(i, cache_bin));
+ }
+
cache_bin_postincrement(tcache_bin_info, nhbins, mem,
&cur_offset);
/* Sanity check that the whole stack is used. */
assert(cur_offset == tcache_bin_alloc_size);
- for (unsigned i = 0; i < SC_NBINS; i++) {
- tcache_slow->bin_flush_delay_items[i]
- = tcache_gc_item_delay_compute(i);
- }
}
/* Initialize auto tcache (embedded in TSD). */
@@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
bool
tcache_boot(tsdn_t *tsdn, base_t *base) {
tcache_maxclass = sz_s2u(opt_tcache_max);
- if (tcache_maxclass < SC_SMALL_MAXCLASS) {
- tcache_maxclass = SC_SMALL_MAXCLASS;
- }
assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
nhbins = sz_size2index(tcache_maxclass) + 1;
@@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
return true;
}
- /* Initialize tcache_bin_info. */
- tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
- nhbins * sizeof(cache_bin_info_t), CACHELINE);
+ /* Initialize tcache_bin_info. See comments in tcache_init(). */
+ unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+ size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
+ tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
+ CACHELINE);
if (tcache_bin_info == NULL) {
return true;
}
+
for (szind_t i = 0; i < nhbins; i++) {
unsigned ncached_max = tcache_ncached_max_compute(i);
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
}
+ for (szind_t i = nhbins; i < SC_NBINS; i++) {
+ /* Disabled small bins. */
+ cache_bin_info_init(&tcache_bin_info[i], 0);
+ assert(tcache_small_bin_disabled(i, NULL));
+ }
+
cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
&tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 555f71ae..a2661682 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) {
unsigned arena_ind = do_arena_create(ddt, mdt);
int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
void *ps[NPS];
- size_t large;
/*
* Allocate a bunch of large objects, pause the clock, deallocate every
@@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) {
* [md]allocx() in a tight loop while advancing time rapidly to verify
* the ticker triggers purging.
*/
-
- size_t tcache_max;
+ size_t large;
size_t sz = sizeof(size_t);
- expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+ expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
0), 0, "Unexpected mallctl failure");
- large = nallocx(tcache_max + 1, flags);
do_purge(arena_ind);
uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
diff --git a/test/unit/arena_decay.sh b/test/unit/arena_decay.sh
index 45aeccf4..52f1b207 100644
--- a/test/unit/arena_decay.sh
+++ b/test/unit/arena_decay.sh
@@ -1,3 +1,3 @@
#!/bin/sh
-export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024"
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
new file mode 100644
index 00000000..0594ceff
--- /dev/null
+++ b/test/unit/tcache_max.c
@@ -0,0 +1,170 @@
+#include "test/jemalloc_test.h"
+
+enum {
+ alloc_option_start = 0,
+ use_malloc = 0,
+ use_mallocx,
+ alloc_option_end
+};
+
+enum {
+ dalloc_option_start = 0,
+ use_free = 0,
+ use_dallocx,
+ use_sdallocx,
+ dalloc_option_end
+};
+
+static unsigned alloc_option, dalloc_option;
+static size_t tcache_max;
+
+static void *
+alloc_func(size_t sz) {
+ void *ret;
+
+ switch (alloc_option) {
+ case use_malloc:
+ ret = malloc(sz);
+ break;
+ case use_mallocx:
+ ret = mallocx(sz, 0);
+ break;
+ default:
+ unreachable();
+ }
+ expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure");
+
+ return ret;
+}
+
+static void
+dalloc_func(void *ptr, size_t sz) {
+ switch (dalloc_option) {
+ case use_free:
+ free(ptr);
+ break;
+ case use_dallocx:
+ dallocx(ptr, 0);
+ break;
+ case use_sdallocx:
+ sdallocx(ptr, sz, 0);
+ break;
+ default:
+ unreachable();
+ }
+}
+
+static size_t
+tcache_bytes_read(void) {
+ uint64_t epoch;
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
+
+ size_t tcache_bytes;
+ size_t sz = sizeof(tcache_bytes);
+ assert_d_eq(mallctl(
+ "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+ &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+
+ return tcache_bytes;
+}
+
+static void
+tcache_bytes_check_update(size_t *prev, ssize_t diff) {
+ size_t tcache_bytes = tcache_bytes_read();
+ expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
+
+ *prev += diff;
+}
+
+static void
+test_tcache_bytes_alloc(size_t alloc_size) {
+ expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+ "Unexpected tcache flush failure");
+
+ size_t usize = sz_s2u(alloc_size);
+ /* No change is expected if usize is outside of tcache_max range. */
+ bool cached = (usize <= tcache_max);
+ ssize_t diff = cached ? usize : 0;
+
+ void *ptr1 = alloc_func(alloc_size);
+ void *ptr2 = alloc_func(alloc_size);
+
+ size_t bytes = tcache_bytes_read();
+ dalloc_func(ptr2, alloc_size);
+ /* Expect tcache_bytes increase after dalloc */
+ tcache_bytes_check_update(&bytes, diff);
+
+ dalloc_func(ptr1, alloc_size);
+ /* Expect tcache_bytes increase again */
+ tcache_bytes_check_update(&bytes, diff);
+
+ void *ptr3 = alloc_func(alloc_size);
+ if (cached) {
+ expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
+ }
+ /* Expect tcache_bytes decrease after alloc */
+ tcache_bytes_check_update(&bytes, -diff);
+
+ void *ptr4 = alloc_func(alloc_size);
+ if (cached) {
+ expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
+ }
+ /* Expect tcache_bytes decrease again */
+ tcache_bytes_check_update(&bytes, -diff);
+
+ dalloc_func(ptr3, alloc_size);
+ tcache_bytes_check_update(&bytes, diff);
+ dalloc_func(ptr4, alloc_size);
+ tcache_bytes_check_update(&bytes, diff);
+}
+
+static void
+test_tcache_max_impl(void) {
+ size_t sz;
+ sz = sizeof(tcache_max);
+ assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+ &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+
+ /* opt.tcache_max set to 1024 in tcache_max.sh */
+ expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
+
+ test_tcache_bytes_alloc(1);
+ test_tcache_bytes_alloc(tcache_max - 1);
+ test_tcache_bytes_alloc(tcache_max);
+ test_tcache_bytes_alloc(tcache_max + 1);
+
+ test_tcache_bytes_alloc(PAGE - 1);
+ test_tcache_bytes_alloc(PAGE);
+ test_tcache_bytes_alloc(PAGE + 1);
+
+ size_t large;
+ sz = sizeof(large);
+ assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
+
+ test_tcache_bytes_alloc(large - 1);
+ test_tcache_bytes_alloc(large);
+ test_tcache_bytes_alloc(large + 1);
+}
+
+TEST_BEGIN(test_tcache_max) {
+ test_skip_if(!config_stats);
+ test_skip_if(!opt_tcache);
+
+ for (alloc_option = alloc_option_start;
+ alloc_option < alloc_option_end;
+ alloc_option++) {
+ for (dalloc_option = dalloc_option_start;
+ dalloc_option < dalloc_option_end;
+ dalloc_option++) {
+ test_tcache_max_impl();
+ }
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(test_tcache_max);
+}
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
new file mode 100644
index 00000000..4480d733
--- /dev/null
+++ b/test/unit/tcache_max.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache_max:1024"