Redesign the cache bin metadata for fast path.

Implement the pointer-based metadata for tcache bins --
- 3 pointers are maintained to represent each bin;
- 2 of the pointers are compressed on 64-bit;
- is_full / is_empty done through pointer comparison;

Comparing to the previous counter based design --
- fast-path speed up ~15% in benchmarks
- direct pointer comparison and de-reference
- no need to access tcache_bin_info in common case
This commit is contained in:
Qi Wang 2019-08-09 22:12:47 -07:00 committed by Qi Wang
parent d2dddfb82a
commit 7599c82d48
9 changed files with 340 additions and 122 deletions

View File

@ -178,6 +178,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/bit_util.c \ $(srcroot)test/unit/bit_util.c \
$(srcroot)test/unit/binshard.c \ $(srcroot)test/unit/binshard.c \
$(srcroot)test/unit/buf_writer.c \ $(srcroot)test/unit/buf_writer.c \
$(srcroot)test/unit/cache_bin.c \
$(srcroot)test/unit/ckh.c \ $(srcroot)test/unit/ckh.c \
$(srcroot)test/unit/decay.c \ $(srcroot)test/unit/decay.c \
$(srcroot)test/unit/div.c \ $(srcroot)test/unit/div.c \

View File

@ -13,7 +13,6 @@
* of the tcache at all. * of the tcache at all.
*/ */
/* /*
* The count of the number of cached allocations in a bin. We make this signed * The count of the number of cached allocations in a bin. We make this signed
* so that negative numbers can encode "invalid" states (e.g. a low water mark * so that negative numbers can encode "invalid" states (e.g. a low water mark
@ -39,29 +38,67 @@ struct cache_bin_info_s {
/* Upper limit on ncached. */ /* Upper limit on ncached. */
cache_bin_sz_t ncached_max; cache_bin_sz_t ncached_max;
}; };
extern cache_bin_info_t *tcache_bin_info;
typedef struct cache_bin_s cache_bin_t; typedef struct cache_bin_s cache_bin_t;
struct cache_bin_s { struct cache_bin_s {
/* Min # cached since last GC. */
cache_bin_sz_t low_water;
/* # of cached objects. */
cache_bin_sz_t ncached;
/* /*
* ncached and stats are both modified frequently. Let's keep them * The cache bin stack is represented using 3 pointers: cur_ptr,
* low_water and full, optimized for the fast path efficiency.
*
* low addr ==> high addr
* |----|----|----|item1|item2|.....................|itemN|
* full cur empty
* (ncached == N; full + ncached_max == empty)
*
* Data directly stored:
* 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
* 2) full points to the top of the stack (i.e. ncached == ncached_max),
* which is compared against on free_fastpath to check "is_full".
* 3) low_water indicates a low water mark of ncached.
* Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
*
* The empty position (ncached == 0) is derived via full + ncached_max
* and not accessed in the common case (guarded behind low_water).
*
* On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
* omitting the high 32 bits. Overflow of the half pointers is avoided
* when allocating / initializing the stack space. As a result,
* cur_ptr.lowbits can be safely used for pointer comparisons.
*/
union {
void **ptr;
struct {
/* highbits never accessed directly. */
#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
uint32_t __highbits;
#endif
uint32_t lowbits;
#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
uint32_t __highbits;
#endif
};
} cur_ptr;
/*
* cur_ptr and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same * close so that they have a higher chance of being on the same
* cacheline, thus less write-backs. * cacheline, thus less write-backs.
*/ */
cache_bin_stats_t tstats; cache_bin_stats_t tstats;
/* /*
* Stack of available objects. * Points to the first item that hasn't been used since last GC, to
* track the low water mark (min # of cached). It may point to
* empty_position + 1, which indicates the cache has been depleted and
* refilled (low_water == -1).
*/
uint32_t low_water_position;
/*
* Points to the position when the cache is full.
* *
* To make use of adjacent cacheline prefetch, the items in the avail * To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points * stack goes to higher address for newer allocations (i.e. cur_ptr++).
* just above the available space, which means that
* avail[-ncached, ... -1] are available items and the lowest item will
* be allocated first.
*/ */
void **avail; uint32_t full_position;
}; };
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@ -76,6 +113,67 @@ struct cache_bin_array_descriptor_s {
cache_bin_t *bins_large; cache_bin_t *bins_large;
}; };
/*
* None of the cache_bin_*_get / _set functions is used on the fast path, which
* relies on pointer comparisons to determine if the cache is full / empty.
*/
static inline cache_bin_sz_t
cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
(bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
return n;
}
static inline void **
cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
/* Low bits overflow disallowed when allocating the space. */
assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
assert(bin->full_position + tcache_bin_info[ind].ncached_max *
sizeof(void *) > bin->full_position);
/* Can also be computed via (full_position + ncached_max) | highbits. */
assert(ret == (void **)((uintptr_t)(bin->full_position +
tcache_bin_info[ind].ncached_max * sizeof(void *)) |
(uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
~(((uint64_t)1 << 32) - 1))));
return ret;
}
/* Returns the position of the bottom item on the stack; for convenience. */
static inline void **
cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
return bottom;
}
/* Returns the numeric value of low water in [-1, ncached]. */
static inline cache_bin_sz_t
cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
(bin->low_water_position - bin->full_position) / sizeof(void *);
assert(low_water >= -1 && low_water <=
tcache_bin_info[ind].ncached_max);
assert(low_water <= cache_bin_ncached_get(bin, ind));
assert(bin->low_water_position >= bin->cur_ptr.lowbits);
return low_water;
}
static inline void
cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
bin->cur_ptr.lowbits = bin->full_position +
(tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
assert(n == 0 || *bin->cur_ptr.ptr != NULL);
}
static inline void static inline void
cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
cache_bin_t *bins_small, cache_bin_t *bins_large) { cache_bin_t *bins_small, cache_bin_t *bins_large) {
@ -85,19 +183,24 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
} }
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
void *ret;
bin->ncached--;
/* /*
* Check for both bin->ncached == 0 and ncached < low_water * This may read from the empty position; however the loaded value won't
* in a single branch. * be used. It's safe because the stack has one more slot reserved.
*/ */
if (unlikely(bin->ncached <= bin->low_water)) { void *ret = *(bin->cur_ptr.ptr++);
bin->low_water = bin->ncached; /*
if (bin->ncached == -1) { * Check for both bin->ncached == 0 and ncached < low_water in a single
bin->ncached = 0; * branch. This also avoids accessing tcache_bin_info (which is on a
* separate cacheline / page) in the common case.
*/
if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
bin->low_water_position = bin->cur_ptr.lowbits;
uint32_t empty_position = bin->full_position +
tcache_bin_info[ind].ncached_max * sizeof(void *);
if (bin->cur_ptr.lowbits > empty_position) {
bin->cur_ptr.ptr--;
assert(bin->cur_ptr.lowbits == empty_position);
*success = false; *success = false;
return NULL; return NULL;
} }
@ -111,19 +214,18 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
* cacheline). * cacheline).
*/ */
*success = true; *success = true;
ret = *(bin->avail - (bin->ncached + 1));
return ret; return ret;
} }
JEMALLOC_ALWAYS_INLINE bool JEMALLOC_ALWAYS_INLINE bool
cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) { cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
if (unlikely(bin->ncached == bin_info->ncached_max)) { if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
return false; return false;
} }
assert(bin->ncached < bin_info->ncached_max);
bin->ncached++; *(--bin->cur_ptr.ptr) = ptr;
*(bin->avail - bin->ncached) = ptr; assert(bin->cur_ptr.lowbits >= bin->full_position);
return true; return true;
} }

View File

@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
if (likely(tsd_tcache_enabled_get(tsd))) { if (likely(tsd_tcache_enabled_get(tsd))) {
/* Associated arena == NULL implies tcache init in progress. */ /* Associated arena == NULL implies tcache init in progress. */
assert(tsd_tcachep_get(tsd)->arena == NULL || assert(tsd_tcachep_get(tsd)->arena == NULL ||
tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail != tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
NULL); != NULL);
return true; return true;
} }

View File

@ -4,8 +4,6 @@
extern bool opt_tcache; extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_max;
extern cache_bin_info_t *tcache_bin_info;
/* /*
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins. * large-object bins.

View File

@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
assert(binind < SC_NBINS); assert(binind < SC_NBINS);
bin = tcache_small_bin_get(tcache, binind); bin = tcache_small_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success); ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
assert(tcache_success == (ret != NULL)); assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) { if (unlikely(!tcache_success)) {
bool tcache_hard_success; bool tcache_hard_success;
@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
assert(binind >= SC_NBINS &&binind < nhbins); assert(binind >= SC_NBINS &&binind < nhbins);
bin = tcache_large_bin_get(tcache, binind); bin = tcache_large_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success); ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
assert(tcache_success == (ret != NULL)); assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) { if (unlikely(!tcache_success)) {
/* /*
@ -164,7 +164,6 @@ JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) { bool slow_path) {
cache_bin_t *bin; cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) assert(tcache_salloc(tsd_tsdn(tsd), ptr)
<= SC_SMALL_MAXCLASS); <= SC_SMALL_MAXCLASS);
@ -174,11 +173,10 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
} }
bin = tcache_small_bin_get(tcache, binind); bin = tcache_small_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind]; if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
tcache_bin_flush_small(tsd, tcache, bin, binind, tcache_bin_flush_small(tsd, tcache, bin, binind,
(bin_info->ncached_max >> 1)); tcache_bin_info[binind].ncached_max >> 1);
bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr); bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret); assert(ret);
} }
@ -189,7 +187,6 @@ JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) { bool slow_path) {
cache_bin_t *bin; cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) assert(tcache_salloc(tsd_tsdn(tsd), ptr)
> SC_SMALL_MAXCLASS); > SC_SMALL_MAXCLASS);
@ -200,11 +197,10 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
} }
bin = tcache_large_bin_get(tcache, binind); bin = tcache_large_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind]; if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
tcache_bin_flush_large(tsd, tcache, bin, binind, tcache_bin_flush_large(tsd, tcache, bin, binind,
(bin_info->ncached_max >> 1)); tcache_bin_info[binind].ncached_max >> 1);
bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr); bool ret = cache_bin_dalloc_easy(bin, ptr);
assert(ret); assert(ret);
} }

View File

@ -202,12 +202,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
for (szind_t i = 0; i < SC_NBINS; i++) { for (szind_t i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_small[i]; cache_bin_t *tbin = &descriptor->bins_small[i];
arena_stats_accum_zu(&astats->tcache_bytes, arena_stats_accum_zu(&astats->tcache_bytes,
tbin->ncached * sz_index2size(i)); cache_bin_ncached_get(tbin, i) * sz_index2size(i));
} }
for (szind_t i = 0; i < nhbins - SC_NBINS; i++) { for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
cache_bin_t *tbin = &descriptor->bins_large[i]; cache_bin_t *tbin = &descriptor->bins_large[i];
arena_stats_accum_zu(&astats->tcache_bytes, arena_stats_accum_zu(&astats->tcache_bytes,
tbin->ncached * sz_index2size(i)); cache_bin_ncached_get(tbin, i + SC_NBINS) *
sz_index2size(i));
} }
} }
malloc_mutex_prof_read(tsdn, malloc_mutex_prof_read(tsdn,
@ -1381,7 +1382,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
unsigned i, nfill, cnt; unsigned i, nfill, cnt;
assert(tbin->ncached == 0); assert(cache_bin_ncached_get(tbin, binind) == 0);
if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) { if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
prof_idump(tsdn); prof_idump(tsdn);
@ -1390,6 +1391,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
unsigned binshard; unsigned binshard;
bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
void **empty_position = cache_bin_empty_position_get(tbin, binind);
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
tcache->lg_fill_div[binind]); i < nfill; i += cnt) { tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
extent_t *slab; extent_t *slab;
@ -1400,7 +1402,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tofill : extent_nfree_get(slab); tofill : extent_nfree_get(slab);
arena_slab_reg_alloc_batch( arena_slab_reg_alloc_batch(
slab, &bin_infos[binind], cnt, slab, &bin_infos[binind], cnt,
tbin->avail - nfill + i); empty_position - nfill + i);
} else { } else {
cnt = 1; cnt = 1;
void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
@ -1412,18 +1414,18 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
*/ */
if (ptr == NULL) { if (ptr == NULL) {
if (i > 0) { if (i > 0) {
memmove(tbin->avail - i, memmove(empty_position - i,
tbin->avail - nfill, empty_position - nfill,
i * sizeof(void *)); i * sizeof(void *));
} }
break; break;
} }
/* Insert such that low regions get used first. */ /* Insert such that low regions get used first. */
*(tbin->avail - nfill + i) = ptr; *(empty_position - nfill + i) = ptr;
} }
if (config_fill && unlikely(opt_junk_alloc)) { if (config_fill && unlikely(opt_junk_alloc)) {
for (unsigned j = 0; j < cnt; j++) { for (unsigned j = 0; j < cnt; j++) {
void* ptr = *(tbin->avail - nfill + i + j); void* ptr = *(empty_position - nfill + i + j);
arena_alloc_junk_small(ptr, &bin_infos[binind], arena_alloc_junk_small(ptr, &bin_infos[binind],
true); true);
} }
@ -1437,7 +1439,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tbin->tstats.nrequests = 0; tbin->tstats.nrequests = 0;
} }
malloc_mutex_unlock(tsdn, &bin->lock); malloc_mutex_unlock(tsdn, &bin->lock);
tbin->ncached = i; cache_bin_ncached_set(tbin, binind, i);
arena_decay_tick(tsdn, arena); arena_decay_tick(tsdn, arena);
} }

View File

@ -2368,7 +2368,7 @@ je_malloc(size_t size) {
cache_bin_t *bin = tcache_small_bin_get(tcache, ind); cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
bool tcache_success; bool tcache_success;
void* ret = cache_bin_alloc_easy(bin, &tcache_success); void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
if (tcache_success) { if (tcache_success) {
if (config_stats) { if (config_stats) {
@ -2846,8 +2846,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
} }
cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind); cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind]; if (!cache_bin_dalloc_easy(bin, ptr)) {
if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
return false; return false;
} }

View File

@ -14,7 +14,16 @@ bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
cache_bin_info_t *tcache_bin_info; cache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */ /*
* For the total bin stack region (per tcache), reserve 2 more slots so that 1)
* the empty position can be safely read on the fast path before checking
* "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
* by 1 step safely (i.e. no overflow).
*/
static const unsigned total_stack_padding = sizeof(void *) * 2;
/* Total stack size required (per tcache). Include the padding above. */
static uint32_t total_stack_bytes;
unsigned nhbins; unsigned nhbins;
size_t tcache_maxclass; size_t tcache_maxclass;
@ -47,14 +56,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
} else { } else {
tbin = tcache_large_bin_get(tcache, binind); tbin = tcache_large_bin_get(tcache, binind);
} }
if (tbin->low_water > 0) {
cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
if (low_water > 0) {
/* /*
* Flush (ceiling) 3/4 of the objects below the low water mark. * Flush (ceiling) 3/4 of the objects below the low water mark.
*/ */
if (binind < SC_NBINS) { if (binind < SC_NBINS) {
tcache_bin_flush_small(tsd, tcache, tbin, binind, tcache_bin_flush_small(tsd, tcache, tbin, binind,
tbin->ncached - tbin->low_water + (tbin->low_water ncached - low_water + (low_water >> 2));
>> 2));
/* /*
* Reduce fill count by 2X. Limit lg_fill_div such that * Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1. * the fill count is always at least 1.
@ -66,10 +77,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
} }
} else { } else {
tcache_bin_flush_large(tsd, tcache, tbin, binind, tcache_bin_flush_large(tsd, tcache, tbin, binind,
tbin->ncached - tbin->low_water + (tbin->low_water ncached - low_water + (low_water >> 2));
>> 2));
} }
} else if (tbin->low_water < 0) { } else if (low_water < 0) {
assert(low_water == -1);
/* /*
* Increase fill count by 2X for small bins. Make sure * Increase fill count by 2X for small bins. Make sure
* lg_fill_div stays greater than 0. * lg_fill_div stays greater than 0.
@ -78,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
tcache->lg_fill_div[binind]--; tcache->lg_fill_div[binind]--;
} }
} }
tbin->low_water = tbin->ncached; tbin->low_water_position = tbin->cur_ptr.lowbits;
tcache->next_gc_bin++; tcache->next_gc_bin++;
if (tcache->next_gc_bin == nhbins) { if (tcache->next_gc_bin == nhbins) {
@ -97,7 +108,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
if (config_prof) { if (config_prof) {
tcache->prof_accumbytes = 0; tcache->prof_accumbytes = 0;
} }
ret = cache_bin_alloc_easy(tbin, tcache_success); ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
return ret; return ret;
} }
@ -117,9 +128,10 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
*/ */
szind_t szind; szind_t szind;
size_t sz_sum = binind * nflush; size_t sz_sum = binind * nflush;
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
for (unsigned i = 0 ; i < nflush; i++) { for (unsigned i = 0 ; i < nflush; i++) {
rtree_extent_szind_read(tsdn, &extents_rtree, rtree_extent_szind_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true, rtree_ctx, (uintptr_t)*(bottom_item - i), true,
&extents[i], &szind); &extents[i], &szind);
sz_sum -= szind; sz_sum -= szind;
} }
@ -137,13 +149,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
bool merged_stats = false; bool merged_stats = false;
assert(binind < SC_NBINS); assert(binind < SC_NBINS);
assert((cache_bin_sz_t)rem <= tbin->ncached); cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
assert((cache_bin_sz_t)rem <= ncached);
arena_t *arena = tcache->arena; arena_t *arena = tcache->arena;
assert(arena != NULL); assert(arena != NULL);
unsigned nflush = tbin->ncached - rem; unsigned nflush = ncached - rem;
VARIABLE_ARRAY(extent_t *, item_extent, nflush); VARIABLE_ARRAY(extent_t *, item_extent, nflush);
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
/* Look up extent once per item. */ /* Look up extent once per item. */
if (config_opt_safety_checks) { if (config_opt_safety_checks) {
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
@ -151,7 +165,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
} else { } else {
for (unsigned i = 0 ; i < nflush; i++) { for (unsigned i = 0 ; i < nflush; i++) {
item_extent[i] = iealloc(tsd_tsdn(tsd), item_extent[i] = iealloc(tsd_tsdn(tsd),
*(tbin->avail - 1 - i)); *(bottom_item - i));
} }
} }
while (nflush > 0) { while (nflush > 0) {
@ -181,7 +195,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
} }
unsigned ndeferred = 0; unsigned ndeferred = 0;
for (unsigned i = 0; i < nflush; i++) { for (unsigned i = 0; i < nflush; i++) {
void *ptr = *(tbin->avail - 1 - i); void *ptr = *(bottom_item - i);
extent = item_extent[i]; extent = item_extent[i];
assert(ptr != NULL && extent != NULL); assert(ptr != NULL && extent != NULL);
@ -196,7 +210,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
* locked. Stash the object, so that it can be * locked. Stash the object, so that it can be
* handled in a future pass. * handled in a future pass.
*/ */
*(tbin->avail - 1 - ndeferred) = ptr; *(bottom_item - ndeferred) = ptr;
item_extent[ndeferred] = extent; item_extent[ndeferred] = extent;
ndeferred++; ndeferred++;
} }
@ -219,11 +233,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
} }
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
sizeof(void *)); sizeof(void *));
tbin->ncached = rem; cache_bin_ncached_set(tbin, binind, rem);
if (tbin->ncached < tbin->low_water) { if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
tbin->low_water = tbin->ncached; tbin->low_water_position = tbin->cur_ptr.lowbits;
} }
} }
@ -233,17 +247,19 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
bool merged_stats = false; bool merged_stats = false;
assert(binind < nhbins); assert(binind < nhbins);
assert((cache_bin_sz_t)rem <= tbin->ncached); cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
assert((cache_bin_sz_t)rem <= ncached);
arena_t *tcache_arena = tcache->arena; arena_t *tcache_arena = tcache->arena;
assert(tcache_arena != NULL); assert(tcache_arena != NULL);
unsigned nflush = tbin->ncached - rem; unsigned nflush = ncached - rem;
VARIABLE_ARRAY(extent_t *, item_extent, nflush); VARIABLE_ARRAY(extent_t *, item_extent, nflush);
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
#ifndef JEMALLOC_EXTRA_SIZE_CHECK #ifndef JEMALLOC_EXTRA_SIZE_CHECK
/* Look up extent once per item. */ /* Look up extent once per item. */
for (unsigned i = 0 ; i < nflush; i++) { for (unsigned i = 0 ; i < nflush; i++) {
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i)); item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
} }
#else #else
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush, tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
@ -266,7 +282,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx); malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
} }
for (unsigned i = 0; i < nflush; i++) { for (unsigned i = 0; i < nflush; i++) {
void *ptr = *(tbin->avail - 1 - i); void *ptr = *(bottom_item - i);
assert(ptr != NULL); assert(ptr != NULL);
extent = item_extent[i]; extent = item_extent[i];
if (extent_arena_ind_get(extent) == locked_arena_ind) { if (extent_arena_ind_get(extent) == locked_arena_ind) {
@ -295,7 +311,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
unsigned ndeferred = 0; unsigned ndeferred = 0;
for (unsigned i = 0; i < nflush; i++) { for (unsigned i = 0; i < nflush; i++) {
void *ptr = *(tbin->avail - 1 - i); void *ptr = *(bottom_item - i);
extent = item_extent[i]; extent = item_extent[i];
assert(ptr != NULL && extent != NULL); assert(ptr != NULL && extent != NULL);
@ -308,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
* Stash the object, so that it can be handled * Stash the object, so that it can be handled
* in a future pass. * in a future pass.
*/ */
*(tbin->avail - 1 - ndeferred) = ptr; *(bottom_item - ndeferred) = ptr;
item_extent[ndeferred] = extent; item_extent[ndeferred] = extent;
ndeferred++; ndeferred++;
} }
@ -330,11 +346,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
tbin->tstats.nrequests = 0; tbin->tstats.nrequests = 0;
} }
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
sizeof(void *)); sizeof(void *));
tbin->ncached = rem; cache_bin_ncached_set(tbin, binind, rem);
if (tbin->ncached < tbin->low_water) { if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
tbin->low_water = tbin->ncached; tbin->low_water_position = tbin->cur_ptr.lowbits;
} }
} }
@ -406,9 +422,43 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
return false; return false;
} }
/* Initialize auto tcache (embedded in TSD). */ static bool
tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
cassert(sizeof(bin->cur_ptr) == sizeof(void *));
/*
* The full_position points to the lowest available space. Allocations
* will access the slots toward higher addresses (for the benefit of
* adjacent prefetch).
*/
void *full_position = (void *)*stack_cur;
uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
sizeof(void *);
*stack_cur += bin_stack_size;
void *empty_position = (void *)*stack_cur;
/* Init to the empty position. */
bin->cur_ptr.ptr = empty_position;
bin->low_water_position = bin->cur_ptr.lowbits;
bin->full_position = (uint32_t)(uintptr_t)full_position;
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
assert(cache_bin_ncached_get(bin, ind) == 0);
assert(cache_bin_empty_position_get(bin, ind) == empty_position);
return false;
}
/* Sanity check only. */
static bool
tcache_bin_lowbits_overflowable(void *ptr) {
uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
return lowbits < (uint32_t)(uintptr_t)ptr;
}
static void static void
tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
assert(!tcache_bin_lowbits_overflowable(avail_stack));
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t))); memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
tcache->prof_accumbytes = 0; tcache->prof_accumbytes = 0;
tcache->next_gc_bin = 0; tcache->next_gc_bin = 0;
@ -416,41 +466,43 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR); ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
size_t stack_offset = 0;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS); memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS)); memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
unsigned i = 0; unsigned i = 0;
uintptr_t stack_cur = (uintptr_t)avail_stack;
for (; i < SC_NBINS; i++) { for (; i < SC_NBINS; i++) {
tcache->lg_fill_div[i] = 1; tcache->lg_fill_div[i] = 1;
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); cache_bin_t *bin = tcache_small_bin_get(tcache, i);
/* tcache_bin_init(bin, i, &stack_cur);
* avail points past the available space. Allocations will
* access the slots toward higher addresses (for the benefit of
* prefetch).
*/
tcache_small_bin_get(tcache, i)->avail =
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
} }
for (; i < nhbins; i++) { for (; i < nhbins; i++) {
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); cache_bin_t *bin = tcache_large_bin_get(tcache, i);
tcache_large_bin_get(tcache, i)->avail = tcache_bin_init(bin, i, &stack_cur);
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
} }
assert(stack_offset == stack_nelms * sizeof(void *));
/* Sanity check that the whole stack is used. */
size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
assert(stack_offset + total_stack_padding == total_stack_bytes);
}
static size_t
tcache_bin_stack_alignment (size_t size) {
/* Align pow2 to avoid overflow the cache bin compressed pointers. */
return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
} }
/* Initialize auto tcache (embedded in TSD). */ /* Initialize auto tcache (embedded in TSD). */
bool bool
tsd_tcache_data_init(tsd_t *tsd) { tsd_tcache_data_init(tsd_t *tsd) {
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd); tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
assert(tcache_small_bin_get(tcache, 0)->avail == NULL); assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
size_t size = stack_nelms * sizeof(void *);
/* Avoid false cacheline sharing. */ /* Avoid false cacheline sharing. */
size = sz_sa2u(size, CACHELINE); size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, tcache_bin_stack_alignment(size), true, NULL, true,
NULL, true, arena_get(TSDN_NULL, 0, true)); arena_get(TSDN_NULL, 0, true));
if (avail_array == NULL) { if (avail_array == NULL) {
return true; return true;
} }
@ -485,25 +537,24 @@ tsd_tcache_data_init(tsd_t *tsd) {
/* Created manual tcache for tcache.create mallctl. */ /* Created manual tcache for tcache.create mallctl. */
tcache_t * tcache_t *
tcache_create_explicit(tsd_t *tsd) { tcache_create_explicit(tsd_t *tsd) {
tcache_t *tcache; size_t size = sizeof(tcache_t);
size_t size, stack_offset;
size = sizeof(tcache_t);
/* Naturally align the pointer stacks. */ /* Naturally align the pointer stacks. */
size = PTR_CEILING(size); size = PTR_CEILING(size);
stack_offset = size; size_t stack_offset = size;
size += stack_nelms * sizeof(void *); size += total_stack_bytes;
/* Avoid false cacheline sharing. */ /* Avoid false cacheline sharing. */
size = sz_sa2u(size, CACHELINE); size = sz_sa2u(size, CACHELINE);
tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true, tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
tcache_bin_stack_alignment(size), true, NULL, true,
arena_get(TSDN_NULL, 0, true)); arena_get(TSDN_NULL, 0, true));
if (tcache == NULL) { if (tcache == NULL) {
return NULL; return NULL;
} }
tcache_init(tsd, tcache, void *avail_array = (void *)((uintptr_t)tcache +
(void *)((uintptr_t)tcache + (uintptr_t)stack_offset)); (uintptr_t)stack_offset);
tcache_init(tsd, tcache, avail_array);
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL)); tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
return tcache; return tcache;
@ -553,9 +604,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
if (tsd_tcache) { if (tsd_tcache) {
/* Release the avail array for the TSD embedded auto tcache. */ /* Release the avail array for the TSD embedded auto tcache. */
void *avail_array = cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
(void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail - assert(cache_bin_ncached_get(bin, 0) == 0);
(uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *)); assert(cache_bin_empty_position_get(bin, 0) ==
bin->cur_ptr.ptr);
void *avail_array = bin->cur_ptr.ptr -
tcache_bin_info[0].ncached_max;
idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true); idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
} else { } else {
/* Release both the tcache struct and avail array. */ /* Release both the tcache struct and avail array. */
@ -587,16 +641,17 @@ tcache_cleanup(tsd_t *tsd) {
if (!tcache_available(tsd)) { if (!tcache_available(tsd)) {
assert(tsd_tcache_enabled_get(tsd) == false); assert(tsd_tcache_enabled_get(tsd) == false);
if (config_debug) { if (config_debug) {
assert(tcache_small_bin_get(tcache, 0)->avail == NULL); assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
== NULL);
} }
return; return;
} }
assert(tsd_tcache_enabled_get(tsd)); assert(tsd_tcache_enabled_get(tsd));
assert(tcache_small_bin_get(tcache, 0)->avail != NULL); assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
tcache_destroy(tsd, tcache, true); tcache_destroy(tsd, tcache, true);
if (config_debug) { if (config_debug) {
tcache_small_bin_get(tcache, 0)->avail = NULL; tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
} }
} }
@ -755,8 +810,8 @@ tcache_boot(tsdn_t *tsdn) {
if (tcache_bin_info == NULL) { if (tcache_bin_info == NULL) {
return true; return true;
} }
unsigned i, stack_nelms;
stack_nelms = 0; stack_nelms = 0;
unsigned i;
for (i = 0; i < SC_NBINS; i++) { for (i = 0; i < SC_NBINS; i++) {
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
tcache_bin_info[i].ncached_max = tcache_bin_info[i].ncached_max =
@ -775,6 +830,7 @@ tcache_boot(tsdn_t *tsdn) {
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
stack_nelms += tcache_bin_info[i].ncached_max; stack_nelms += tcache_bin_info[i].ncached_max;
} }
total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
return false; return false;
} }

64
test/unit/cache_bin.c Normal file
View File

@ -0,0 +1,64 @@
#include "test/jemalloc_test.h"
cache_bin_t test_bin;
TEST_BEGIN(test_cache_bin) {
cache_bin_t *bin = &test_bin;
cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
/* Page aligned to make sure lowbits not overflowable. */
void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
assert_ptr_not_null(stack, "Unexpected mallocx failure");
/* Initialize to empty; bin 0. */
cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
void **empty_position = stack + ncached_max;
bin->cur_ptr.ptr = empty_position;
bin->low_water_position = bin->cur_ptr.lowbits;
bin->full_position = (uint32_t)(uintptr_t)stack;
assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
"Incorrect empty position");
/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
bool success;
void *ret = cache_bin_alloc_easy(bin, &success, 0);
assert_false(success, "Empty cache bin should not alloc");
assert_true(cache_bin_low_water_get(bin, 0) == - 1,
"Incorrect low water mark");
cache_bin_ncached_set(bin, 0, 0);
assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
assert_true(success && cache_bin_ncached_get(bin, 0) == i,
"Bin dalloc failure");
}
success = cache_bin_dalloc_easy(bin, (void *)1);
assert_false(success, "Bin should be full");
assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
cache_bin_ncached_set(bin, 0, ncached_max);
assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
/* Emulate low water after refill. */
bin->low_water_position = bin->full_position;
for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
ret = cache_bin_alloc_easy(bin, &success, 0);
cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
assert_true(success && ncached == i - 1,
"Cache bin alloc failure");
assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
assert_true(cache_bin_low_water_get(bin, 0) == ncached,
"Incorrect low water mark");
}
ret = cache_bin_alloc_easy(bin, &success, 0);
assert_false(success, "Empty cache bin should not alloc.");
assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
"Bin should be empty");
}
TEST_END
int
main(void) {
return test(test_cache_bin);
}