Cache bin: Move in initialization code.

This commit is contained in:
David Goldblatt 2020-02-29 14:41:47 -08:00 committed by David Goldblatt
parent 44529da852
commit 60113dfe3b
3 changed files with 170 additions and 87 deletions

View File

@ -14,7 +14,10 @@
* of the tcache at all.
*/
/* The size in bytes of each cache bin stack. */
/*
* The size in bytes of each cache bin stack. We also use this to indicate
* *counts* of individual objects.
*/
typedef uint16_t cache_bin_sz_t;
typedef struct cache_bin_stats_s cache_bin_stats_t;
@ -311,4 +314,31 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
}
}
/*
* Initialize a cache_bin_info to represent up to the given number of items in
* the cache_bins it is associated with.
*/
void cache_bin_info_init(cache_bin_info_t *bin_info,
cache_bin_sz_t ncached_max);
/*
* Given an array of initialized cache_bin_info_ts, determine how big an
* allocation is required to initialize a full set of cache_bin_ts.
*/
void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
size_t *size, size_t *alignment);
/*
* Actually initialize some cache bins. Callers should allocate the backing
* memory indicated by a call to cache_bin_compute_alloc. They should then
* preincrement, call init once for each bin and info, and then call
* cache_bin_postincrement. *alloc_cur will then point immediately past the end
* of the allocation.
*/
void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
void *alloc, size_t *cur_offset);
void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
void *alloc, size_t *cur_offset);
void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
size_t *cur_offset);
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */

View File

@ -1,3 +1,104 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/bit_util.h"
void
cache_bin_info_init(cache_bin_info_t *info,
cache_bin_sz_t ncached_max) {
size_t stack_size = (size_t)ncached_max * sizeof(void *);
assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
info->stack_size = (cache_bin_sz_t)stack_size;
}
void
cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
size_t *size, size_t *alignment) {
/* For the total bin stack region (per tcache), reserve 2 more slots so
* that
* 1) the empty position can be safely read on the fast path before
* checking "is_empty"; and
* 2) the cur_ptr can go beyond the empty position by 1 step safely on
* the fast path (i.e. no overflow).
*/
*size = sizeof(void *) * 2;
for (szind_t i = 0; i < ninfos; i++) {
*size += infos[i].stack_size;
}
/*
* 1) Align to at least PAGE, to minimize the # of TLBs needed by the
* smaller sizes; also helps if the larger sizes don't get used at all.
* 2) On 32-bit the pointers won't be compressed; use minimal alignment.
*/
if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
*alignment = PAGE;
} else {
/*
* Align pow2 to avoid overflow the cache bin compressed
* pointers.
*/
*alignment = pow2_ceil_zu(*size);
}
}
void
cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
size_t *cur_offset) {
if (config_debug) {
size_t computed_size;
size_t computed_alignment;
/* Pointer should be as aligned as we asked for. */
cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
&computed_alignment);
assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
/* And that alignment should disallow overflow. */
uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
assert((uint32_t)(uintptr_t)alloc < lowbits);
}
/*
* Leave a noticeable mark pattern on the boundaries, in case a bug
* starts leaking those. Make it look like the junk pattern but be
* distinct from it.
*/
uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
*cur_offset += sizeof(void *);
}
void
cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
size_t *cur_offset) {
/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
*cur_offset += sizeof(void *);
}
void
cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
size_t *cur_offset) {
assert(sizeof(bin->cur_ptr) == sizeof(void *));
/*
* The full_position points to the lowest available space. Allocations
* will access the slots toward higher addresses (for the benefit of
* adjacent prefetch).
*/
void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
void *full_position = stack_cur;
uint32_t bin_stack_size = info->stack_size;
*cur_offset += bin_stack_size;
void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
/* Init to the empty position. */
bin->cur_ptr.ptr = empty_position;
bin->low_water_position = bin->cur_ptr.lowbits;
bin->full_position = (uint32_t)(uintptr_t)full_position;
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
assert(cache_bin_ncached_get(bin, info) == 0);
assert(cache_bin_empty_position_get(bin, info) == empty_position);
}

View File

@ -14,16 +14,10 @@ bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
cache_bin_info_t *tcache_bin_info;
/*
* For the total bin stack region (per tcache), reserve 2 more slots so that 1)
* the empty position can be safely read on the fast path before checking
* "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
* safely on the fast path (i.e. no overflow).
*/
static const unsigned total_stack_padding = sizeof(void *) * 2;
/* Total stack size required (per tcache). Include the padding above. */
static uint32_t total_stack_bytes;
static size_t tcache_bin_alloc_size;
static size_t tcache_bin_alloc_alignment;
unsigned nhbins;
size_t tcache_maxclass;
@ -430,43 +424,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
return false;
}
static bool
tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
assert(sizeof(bin->cur_ptr) == sizeof(void *));
/*
* The full_position points to the lowest available space. Allocations
* will access the slots toward higher addresses (for the benefit of
* adjacent prefetch).
*/
void *full_position = (void *)*stack_cur;
uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
*stack_cur += bin_stack_size;
void *empty_position = (void *)*stack_cur;
/* Init to the empty position. */
bin->cur_ptr.ptr = empty_position;
bin->low_water_position = bin->cur_ptr.lowbits;
bin->full_position = (uint32_t)(uintptr_t)full_position;
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
== empty_position);
return false;
}
/* Sanity check only. */
static bool
tcache_bin_lowbits_overflowable(void *ptr) {
uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
return lowbits < (uint32_t)(uintptr_t)ptr;
}
static void
tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
assert(!tcache_bin_lowbits_overflowable(avail_stack));
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
tcache->next_gc_bin = 0;
tcache->arena = NULL;
@ -476,35 +435,25 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
unsigned i = 0;
uintptr_t stack_cur = (uintptr_t)avail_stack;
size_t cur_offset = 0;
cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
&cur_offset);
for (; i < SC_NBINS; i++) {
tcache->lg_fill_div[i] = 1;
tcache->bin_refilled[i] = false;
cache_bin_t *bin = tcache_small_bin_get(tcache, i);
tcache_bin_init(bin, i, &stack_cur);
cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
&cur_offset);
}
for (; i < nhbins; i++) {
cache_bin_t *bin = tcache_large_bin_get(tcache, i);
tcache_bin_init(bin, i, &stack_cur);
cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
&cur_offset);
}
cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
&cur_offset);
/* Sanity check that the whole stack is used. */
size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
assert(stack_offset + total_stack_padding == total_stack_bytes);
}
static size_t
tcache_bin_stack_alignment (size_t size) {
/*
* 1) Align to at least PAGE, to minimize the # of TLBs needed by the
* smaller sizes; also helps if the larger sizes don't get used at all.
* 2) On 32-bit the pointers won't be compressed; use minimal alignment.
*/
if (LG_SIZEOF_PTR < 3 || size < PAGE) {
return PAGE;
}
/* Align pow2 to avoid overflow the cache bin compressed pointers. */
return pow2_ceil_zu(size);
assert(cur_offset == tcache_bin_alloc_size);
}
/* Initialize auto tcache (embedded in TSD). */
@ -512,8 +461,8 @@ bool
tsd_tcache_data_init(tsd_t *tsd) {
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
size_t size = sz_sa2u(total_stack_bytes, alignment);
size_t alignment = tcache_bin_alloc_alignment;
size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
NULL, true, arena_get(TSDN_NULL, 0, true));
@ -551,22 +500,29 @@ tsd_tcache_data_init(tsd_t *tsd) {
/* Created manual tcache for tcache.create mallctl. */
tcache_t *
tcache_create_explicit(tsd_t *tsd) {
size_t size = sizeof(tcache_t);
/*
* We place the cache bin stacks, then the tcache_t, then a pointer to
* the beginning of the whole allocation (for freeing). The makes sure
* the cache bins have the requested alignment.
*/
size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
/* Naturally align the pointer stacks. */
size = PTR_CEILING(size);
size_t stack_offset = size;
size += total_stack_bytes;
size_t alignment = tcache_bin_stack_alignment(size);
size = sz_sa2u(size, alignment);
size = sz_sa2u(size, tcache_bin_alloc_alignment);
tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
NULL, true, arena_get(TSDN_NULL, 0, true));
if (tcache == NULL) {
void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
true, NULL, true, arena_get(TSDN_NULL, 0, true));
if (mem == NULL) {
return NULL;
}
void *avail_array = mem;
tcache_t *tcache = (void *)((uintptr_t)avail_array
+ tcache_bin_alloc_size);
void **head_ptr = (void *)((uintptr_t)avail_array
+ tcache_bin_alloc_size + sizeof(tcache_t));
tcache_init(tsd, tcache, avail_array);
*head_ptr = mem;
void *avail_array = (void *)((uintptr_t)tcache +
(uintptr_t)stack_offset);
tcache_init(tsd, tcache, avail_array);
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
@ -617,8 +573,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
tcache_bin_info[0].stack_size);
idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
} else {
/* See the comment at the top of tcache_create_explicit. */
void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
/* Release both the tcache struct and avail array. */
idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
}
/*
@ -816,7 +774,6 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
return true;
}
unsigned i, ncached_max;
total_stack_bytes = 0;
for (i = 0; i < SC_NBINS; i++) {
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
ncached_max = TCACHE_NSLOTS_SMALL_MIN;
@ -826,18 +783,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
} else {
ncached_max = TCACHE_NSLOTS_SMALL_MAX;
}
unsigned stack_size = ncached_max * sizeof(void *);
assert(stack_size < ((uint64_t)1 <<
(sizeof(cache_bin_sz_t) * 8)));
tcache_bin_info[i].stack_size = stack_size;
total_stack_bytes += stack_size;
cache_bin_info_init(&tcache_bin_info[i], ncached_max);
}
for (; i < nhbins; i++) {
unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
tcache_bin_info[i].stack_size = stack_size;
total_stack_bytes += stack_size;
cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
}
total_stack_bytes += total_stack_padding;
cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
&tcache_bin_alloc_alignment);
return false;
}