Allow opt.tcache_max to accept small size classes.

Previously all the small size classes were cached. However this has downsides -- particularly when page size is greater than 4K (e.g. iOS), which will result in much higher SMALL_MAXCLASS. This change allows tcache_max to be set to lower values, to better control resources taken by tcache.
2020-10-21 19:47:57 -07:00
parent ea32060f9c
commit bf72188f80
10 changed files with 265 additions and 33 deletions
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
+		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}

@@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
-	/*
-	 * Leave a noticeable mark pattern on the boundaries, in case a bug
-	 * starts leaking those.  Make it look like the junk pattern but be
-	 * distinct from it.
-	 */
-	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }

 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
    size_t *cur_offset) {
-	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }

-
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
    size_t *cur_offset) {
@@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+
+	assert(bin_stack_size > 0 || empty_position == full_position);
 }

 bool
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -62,7 +62,9 @@ cache_bin_info_t	*tcache_bin_info;
 static size_t tcache_bin_alloc_size;
 static size_t tcache_bin_alloc_alignment;

+/* Number of cache bins enabled, including both large and small. */
 unsigned		nhbins;
+/* Max size class to be cached (can be small or large). */
 size_t			tcache_maxclass;

 tcaches_t		*tcaches;
@@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;

-	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
+	/*
+	 * We reserve cache bins for all small size classes, even if some may
+	 * not get used (i.e. bins higher than nhbins).  This allows the fast
+	 * and common paths to access cache bin metadata safely w/o worrying
+	 * about which ones are disabled.
+	 */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);

 	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
@@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
+			tcache_slow->bin_flush_delay_items[i]
+			    = tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
+	/*
+	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
+	 * their cache bins are initialized to a state to safely and efficiently
+	 * fail all fastpath alloc / free, so that no additional check around
+	 * nhbins is needed on fastpath.
+	 */
+	for (unsigned i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		void *fake_stack = mem;
+		size_t fake_offset = 0;
+
+		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
+		    &fake_offset);
+		assert(tcache_small_bin_disabled(i, cache_bin));
+	}
+
 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		tcache_slow->bin_flush_delay_items[i]
-		    = tcache_gc_item_delay_compute(i);
-	}
 }

 /* Initialize auto tcache (embedded in TSD). */
@@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	tcache_maxclass = sz_s2u(opt_tcache_max);
-	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
-		tcache_maxclass = SC_SMALL_MAXCLASS;
-	}
 	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	nhbins = sz_size2index(tcache_maxclass) + 1;

@@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}

-	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
-	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
+	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
+	    CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
+
 	for (szind_t i = 0; i < nhbins; i++) {
 		unsigned ncached_max = tcache_ncached_max_compute(i);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
+	for (szind_t i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+		assert(tcache_small_bin_disabled(i, NULL));
+	}
+
 	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
 	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);