Cache bin: Move in initialization code.

2020-02-29 14:41:47 -08:00
parent 44529da852
commit 60113dfe3b
3 changed files with 170 additions and 87 deletions
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -14,7 +14,10 @@
 * of the tcache at all.
 */
-/* The size in bytes of each cache bin stack. */
+/*
 * The size in bytes of each cache bin stack.  We also use this to indicate
 * *counts* of individual objects.
 */
 typedef uint16_t cache_bin_sz_t;
 typedef struct cache_bin_stats_s cache_bin_stats_t;
@@ -311,4 +314,31 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	}
 }
 /*
 * Initialize a cache_bin_info to represent up to the given number of items in
 * the cache_bins it is associated with.
 */
 void cache_bin_info_init(cache_bin_info_t *bin_info,
    cache_bin_sz_t ncached_max);
 /*
 * Given an array of initialized cache_bin_info_ts, determine how big an
 * allocation is required to initialize a full set of cache_bin_ts.
 */
 void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
    size_t *size, size_t *alignment);
 /*
 * Actually initialize some cache bins.  Callers should allocate the backing
 * memory indicated by a call to cache_bin_compute_alloc.  They should then
 * preincrement, call init once for each bin and info, and then call
 * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
 * of the allocation.
 */
 void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
    void *alloc, size_t *cur_offset);
 void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
    void *alloc, size_t *cur_offset);
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
    size_t *cur_offset);
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -1,3 +1,104 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 #include "jemalloc/internal/bit_util.h"
 void
 cache_bin_info_init(cache_bin_info_t *info,
    cache_bin_sz_t ncached_max) {
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->stack_size = (cache_bin_sz_t)stack_size;
 }
 void
 cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
    size_t *size, size_t *alignment) {
 	/* For the total bin stack region (per tcache), reserve 2 more slots so
 	 * that
 	 * 1) the empty position can be safely read on the fast path before
 	 *    checking "is_empty"; and
 	 * 2) the cur_ptr can go beyond the empty position by 1 step safely on
 	 * the fast path (i.e. no overflow).
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
 		*size += infos[i].stack_size;
 	}
 	/*
 	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
 	 * smaller sizes; also helps if the larger sizes don't get used at all.
 	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
 	 */
 	if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
 		*alignment = PAGE;
 	} else {
 		/*
 		 * Align pow2 to avoid overflow the cache bin compressed
 		 * pointers.
 		 */
 		*alignment = pow2_ceil_zu(*size);
 	}
 }
 void
 cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
    size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
 		size_t computed_alignment;
 		/* Pointer should be as aligned as we asked for. */
 		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 		/* And that alignment should disallow overflow. */
 		uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
 		assert((uint32_t)(uintptr_t)alloc < lowbits);
 	}
 	/*
 	 * Leave a noticeable mark pattern on the boundaries, in case a bug
 	 * starts leaking those.  Make it look like the junk pattern but be
 	 * distinct from it.
 	 */
 	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
 	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
 	*cur_offset += sizeof(void *);
 }
 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
    size_t *cur_offset) {
 	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
 	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
 	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
 	*cur_offset += sizeof(void *);
 }
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
    size_t *cur_offset) {
 	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
 	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
 	void *full_position = stack_cur;
 	uint32_t bin_stack_size = info->stack_size;
 	*cur_offset += bin_stack_size;
 	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
 	/* Init to the empty position. */
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
 }
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -14,16 +14,10 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 cache_bin_info_t	*tcache_bin_info;
 /*
 * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
 * the empty position can be safely read on the fast path before checking
 * "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
 * safely on the fast path (i.e. no overflow).
 */
 static const unsigned total_stack_padding = sizeof(void *) * 2;
 /* Total stack size required (per tcache).  Include the padding above. */
-static uint32_t total_stack_bytes;
+static size_t tcache_bin_alloc_size;
 static size_t tcache_bin_alloc_alignment;
 unsigned		nhbins;
 size_t			tcache_maxclass;
@@ -430,43 +424,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	return false;
 }
 static bool
 tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
 	void *full_position = (void *)*stack_cur;
 	uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
 	*stack_cur += bin_stack_size;
 	void *empty_position = (void *)*stack_cur;
 	/* Init to the empty position. */
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
 	assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
 	    == empty_position);
 	return false;
 }
 /* Sanity check only. */
 static bool
 tcache_bin_lowbits_overflowable(void *ptr) {
 	uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
 	return lowbits < (uint32_t)(uintptr_t)ptr;
 }
 static void
 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	assert(!tcache_bin_lowbits_overflowable(avail_stack));
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
@@ -476,35 +435,25 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 	unsigned i = 0;
-	uintptr_t stack_cur = (uintptr_t)avail_stack;
+	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
 	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
 		    &cur_offset);
 	}
 	for (; i < nhbins; i++) {
 		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
 		    &cur_offset);
 	}
-
+	cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
-	size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
+	assert(cur_offset == tcache_bin_alloc_size);
 	assert(stack_offset + total_stack_padding == total_stack_bytes);
 }
 static size_t
 tcache_bin_stack_alignment (size_t size) {
 	/*
 	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
 	 * smaller sizes; also helps if the larger sizes don't get used at all.
 	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
 	 */
 	if (LG_SIZEOF_PTR < 3 || size < PAGE) {
 		return PAGE;
 	}
 	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
 	return pow2_ceil_zu(size);
 }
 /* Initialize auto tcache (embedded in TSD). */
@@ -512,8 +461,8 @@ bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
-	size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
+	size_t alignment = tcache_bin_alloc_alignment;
-	size_t size = sz_sa2u(total_stack_bytes, alignment);
+	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
 	    NULL, true, arena_get(TSDN_NULL, 0, true));
@@ -551,22 +500,29 @@ tsd_tcache_data_init(tsd_t *tsd) {
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
-	size_t size = sizeof(tcache_t);
+	/*
 	 * We place the cache bin stacks, then the tcache_t, then a pointer to
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
 	size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size_t stack_offset = size;
+	size = sz_sa2u(size, tcache_bin_alloc_alignment);
 	size += total_stack_bytes;
 	size_t alignment = tcache_bin_stack_alignment(size);
 	size = sz_sa2u(size, alignment);
-	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
+	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
-	if (tcache == NULL) {
+	if (mem == NULL) {
 		return NULL;
 	}
 	void *avail_array = mem;
 	tcache_t *tcache = (void *)((uintptr_t)avail_array
 	    + tcache_bin_alloc_size);
 	void **head_ptr = (void *)((uintptr_t)avail_array
 	    + tcache_bin_alloc_size + sizeof(tcache_t));
 	tcache_init(tsd, tcache, avail_array);
 	*head_ptr = mem;
 	void *avail_array = (void *)((uintptr_t)tcache +
 	    (uintptr_t)stack_offset);
 	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
@@ -617,8 +573,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* See the comment at the top of tcache_create_explicit. */
 		void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
 		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
 	}
 	/*
@@ -816,7 +774,6 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}
 	unsigned i, ncached_max;
 	total_stack_bytes = 0;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
@@ -826,18 +783,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		} else {
 			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
-		unsigned stack_size = ncached_max * sizeof(void *);
+		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 		assert(stack_size < ((uint64_t)1 <<
 		    (sizeof(cache_bin_sz_t) * 8)));
 		tcache_bin_info[i].stack_size = stack_size;
 		total_stack_bytes += stack_size;
 	}
 	for (; i < nhbins; i++) {
-		unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
+		cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
 		tcache_bin_info[i].stack_size = stack_size;
 		total_stack_bytes += stack_size;
 	}
-	total_stack_bytes += total_stack_padding;
+	cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
 	    &tcache_bin_alloc_alignment);
 	return false;
 }