Redesign the cache bin metadata for fast path.
Implement the pointer-based metadata for tcache bins -- - 3 pointers are maintained to represent each bin; - 2 of the pointers are compressed on 64-bit; - is_full / is_empty done through pointer comparison; Comparing to the previous counter based design -- - fast-path speed up ~15% in benchmarks - direct pointer comparison and de-reference - no need to access tcache_bin_info in common case
This commit is contained in:
parent
d2dddfb82a
commit
7599c82d48
@ -178,6 +178,7 @@ TESTS_UNIT := \
|
||||
$(srcroot)test/unit/bit_util.c \
|
||||
$(srcroot)test/unit/binshard.c \
|
||||
$(srcroot)test/unit/buf_writer.c \
|
||||
$(srcroot)test/unit/cache_bin.c \
|
||||
$(srcroot)test/unit/ckh.c \
|
||||
$(srcroot)test/unit/decay.c \
|
||||
$(srcroot)test/unit/div.c \
|
||||
|
@ -13,7 +13,6 @@
|
||||
* of the tcache at all.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* The count of the number of cached allocations in a bin. We make this signed
|
||||
* so that negative numbers can encode "invalid" states (e.g. a low water mark
|
||||
@ -39,29 +38,67 @@ struct cache_bin_info_s {
|
||||
/* Upper limit on ncached. */
|
||||
cache_bin_sz_t ncached_max;
|
||||
};
|
||||
extern cache_bin_info_t *tcache_bin_info;
|
||||
|
||||
typedef struct cache_bin_s cache_bin_t;
|
||||
struct cache_bin_s {
|
||||
/* Min # cached since last GC. */
|
||||
cache_bin_sz_t low_water;
|
||||
/* # of cached objects. */
|
||||
cache_bin_sz_t ncached;
|
||||
/*
|
||||
* ncached and stats are both modified frequently. Let's keep them
|
||||
* The cache bin stack is represented using 3 pointers: cur_ptr,
|
||||
* low_water and full, optimized for the fast path efficiency.
|
||||
*
|
||||
* low addr ==> high addr
|
||||
* |----|----|----|item1|item2|.....................|itemN|
|
||||
* full cur empty
|
||||
* (ncached == N; full + ncached_max == empty)
|
||||
*
|
||||
* Data directly stored:
|
||||
* 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
|
||||
* 2) full points to the top of the stack (i.e. ncached == ncached_max),
|
||||
* which is compared against on free_fastpath to check "is_full".
|
||||
* 3) low_water indicates a low water mark of ncached.
|
||||
* Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
|
||||
*
|
||||
* The empty position (ncached == 0) is derived via full + ncached_max
|
||||
* and not accessed in the common case (guarded behind low_water).
|
||||
*
|
||||
* On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
|
||||
* omitting the high 32 bits. Overflow of the half pointers is avoided
|
||||
* when allocating / initializing the stack space. As a result,
|
||||
* cur_ptr.lowbits can be safely used for pointer comparisons.
|
||||
*/
|
||||
union {
|
||||
void **ptr;
|
||||
struct {
|
||||
/* highbits never accessed directly. */
|
||||
#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
|
||||
uint32_t __highbits;
|
||||
#endif
|
||||
uint32_t lowbits;
|
||||
#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
|
||||
uint32_t __highbits;
|
||||
#endif
|
||||
};
|
||||
} cur_ptr;
|
||||
/*
|
||||
* cur_ptr and stats are both modified frequently. Let's keep them
|
||||
* close so that they have a higher chance of being on the same
|
||||
* cacheline, thus less write-backs.
|
||||
*/
|
||||
cache_bin_stats_t tstats;
|
||||
/*
|
||||
* Stack of available objects.
|
||||
* Points to the first item that hasn't been used since last GC, to
|
||||
* track the low water mark (min # of cached). It may point to
|
||||
* empty_position + 1, which indicates the cache has been depleted and
|
||||
* refilled (low_water == -1).
|
||||
*/
|
||||
uint32_t low_water_position;
|
||||
/*
|
||||
* Points to the position when the cache is full.
|
||||
*
|
||||
* To make use of adjacent cacheline prefetch, the items in the avail
|
||||
* stack goes to higher address for newer allocations. avail points
|
||||
* just above the available space, which means that
|
||||
* avail[-ncached, ... -1] are available items and the lowest item will
|
||||
* be allocated first.
|
||||
* stack goes to higher address for newer allocations (i.e. cur_ptr++).
|
||||
*/
|
||||
void **avail;
|
||||
uint32_t full_position;
|
||||
};
|
||||
|
||||
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
|
||||
@ -76,6 +113,67 @@ struct cache_bin_array_descriptor_s {
|
||||
cache_bin_t *bins_large;
|
||||
};
|
||||
|
||||
/*
|
||||
* None of the cache_bin_*_get / _set functions is used on the fast path, which
|
||||
* relies on pointer comparisons to determine if the cache is full / empty.
|
||||
*/
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
|
||||
cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
|
||||
(bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
|
||||
assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
|
||||
assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static inline void **
|
||||
cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
|
||||
void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
|
||||
/* Low bits overflow disallowed when allocating the space. */
|
||||
assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
|
||||
assert(bin->full_position + tcache_bin_info[ind].ncached_max *
|
||||
sizeof(void *) > bin->full_position);
|
||||
|
||||
/* Can also be computed via (full_position + ncached_max) | highbits. */
|
||||
assert(ret == (void **)((uintptr_t)(bin->full_position +
|
||||
tcache_bin_info[ind].ncached_max * sizeof(void *)) |
|
||||
(uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
|
||||
~(((uint64_t)1 << 32) - 1))));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Returns the position of the bottom item on the stack; for convenience. */
|
||||
static inline void **
|
||||
cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
|
||||
void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
|
||||
assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
|
||||
|
||||
return bottom;
|
||||
}
|
||||
|
||||
/* Returns the numeric value of low water in [-1, ncached]. */
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
|
||||
cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
|
||||
(bin->low_water_position - bin->full_position) / sizeof(void *);
|
||||
assert(low_water >= -1 && low_water <=
|
||||
tcache_bin_info[ind].ncached_max);
|
||||
assert(low_water <= cache_bin_ncached_get(bin, ind));
|
||||
assert(bin->low_water_position >= bin->cur_ptr.lowbits);
|
||||
|
||||
return low_water;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
|
||||
bin->cur_ptr.lowbits = bin->full_position +
|
||||
(tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
|
||||
assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
|
||||
assert(n == 0 || *bin->cur_ptr.ptr != NULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
|
||||
cache_bin_t *bins_small, cache_bin_t *bins_large) {
|
||||
@ -85,19 +183,24 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
|
||||
void *ret;
|
||||
|
||||
bin->ncached--;
|
||||
|
||||
cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
|
||||
/*
|
||||
* Check for both bin->ncached == 0 and ncached < low_water
|
||||
* in a single branch.
|
||||
* This may read from the empty position; however the loaded value won't
|
||||
* be used. It's safe because the stack has one more slot reserved.
|
||||
*/
|
||||
if (unlikely(bin->ncached <= bin->low_water)) {
|
||||
bin->low_water = bin->ncached;
|
||||
if (bin->ncached == -1) {
|
||||
bin->ncached = 0;
|
||||
void *ret = *(bin->cur_ptr.ptr++);
|
||||
/*
|
||||
* Check for both bin->ncached == 0 and ncached < low_water in a single
|
||||
* branch. This also avoids accessing tcache_bin_info (which is on a
|
||||
* separate cacheline / page) in the common case.
|
||||
*/
|
||||
if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
uint32_t empty_position = bin->full_position +
|
||||
tcache_bin_info[ind].ncached_max * sizeof(void *);
|
||||
if (bin->cur_ptr.lowbits > empty_position) {
|
||||
bin->cur_ptr.ptr--;
|
||||
assert(bin->cur_ptr.lowbits == empty_position);
|
||||
*success = false;
|
||||
return NULL;
|
||||
}
|
||||
@ -111,19 +214,18 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
|
||||
* cacheline).
|
||||
*/
|
||||
*success = true;
|
||||
ret = *(bin->avail - (bin->ncached + 1));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
|
||||
if (unlikely(bin->ncached == bin_info->ncached_max)) {
|
||||
cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
|
||||
if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
|
||||
return false;
|
||||
}
|
||||
assert(bin->ncached < bin_info->ncached_max);
|
||||
bin->ncached++;
|
||||
*(bin->avail - bin->ncached) = ptr;
|
||||
|
||||
*(--bin->cur_ptr.ptr) = ptr;
|
||||
assert(bin->cur_ptr.lowbits >= bin->full_position);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
|
||||
if (likely(tsd_tcache_enabled_get(tsd))) {
|
||||
/* Associated arena == NULL implies tcache init in progress. */
|
||||
assert(tsd_tcachep_get(tsd)->arena == NULL ||
|
||||
tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
|
||||
NULL);
|
||||
tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
|
||||
!= NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,6 @@
|
||||
extern bool opt_tcache;
|
||||
extern ssize_t opt_lg_tcache_max;
|
||||
|
||||
extern cache_bin_info_t *tcache_bin_info;
|
||||
|
||||
/*
|
||||
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
|
||||
* large-object bins.
|
||||
|
@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
|
||||
assert(binind < SC_NBINS);
|
||||
bin = tcache_small_bin_get(tcache, binind);
|
||||
ret = cache_bin_alloc_easy(bin, &tcache_success);
|
||||
ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
|
||||
assert(tcache_success == (ret != NULL));
|
||||
if (unlikely(!tcache_success)) {
|
||||
bool tcache_hard_success;
|
||||
@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
||||
|
||||
assert(binind >= SC_NBINS &&binind < nhbins);
|
||||
bin = tcache_large_bin_get(tcache, binind);
|
||||
ret = cache_bin_alloc_easy(bin, &tcache_success);
|
||||
ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
|
||||
assert(tcache_success == (ret != NULL));
|
||||
if (unlikely(!tcache_success)) {
|
||||
/*
|
||||
@ -164,7 +164,6 @@ JEMALLOC_ALWAYS_INLINE void
|
||||
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
bool slow_path) {
|
||||
cache_bin_t *bin;
|
||||
cache_bin_info_t *bin_info;
|
||||
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
|
||||
<= SC_SMALL_MAXCLASS);
|
||||
@ -174,11 +173,10 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
}
|
||||
|
||||
bin = tcache_small_bin_get(tcache, binind);
|
||||
bin_info = &tcache_bin_info[binind];
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
|
||||
tcache_bin_flush_small(tsd, tcache, bin, binind,
|
||||
(bin_info->ncached_max >> 1));
|
||||
bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
|
||||
tcache_bin_info[binind].ncached_max >> 1);
|
||||
bool ret = cache_bin_dalloc_easy(bin, ptr);
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
@ -189,7 +187,6 @@ JEMALLOC_ALWAYS_INLINE void
|
||||
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
bool slow_path) {
|
||||
cache_bin_t *bin;
|
||||
cache_bin_info_t *bin_info;
|
||||
|
||||
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
|
||||
> SC_SMALL_MAXCLASS);
|
||||
@ -200,11 +197,10 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
||||
}
|
||||
|
||||
bin = tcache_large_bin_get(tcache, binind);
|
||||
bin_info = &tcache_bin_info[binind];
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
|
||||
if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
|
||||
tcache_bin_flush_large(tsd, tcache, bin, binind,
|
||||
(bin_info->ncached_max >> 1));
|
||||
bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
|
||||
tcache_bin_info[binind].ncached_max >> 1);
|
||||
bool ret = cache_bin_dalloc_easy(bin, ptr);
|
||||
assert(ret);
|
||||
}
|
||||
|
||||
|
20
src/arena.c
20
src/arena.c
@ -202,12 +202,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
for (szind_t i = 0; i < SC_NBINS; i++) {
|
||||
cache_bin_t *tbin = &descriptor->bins_small[i];
|
||||
arena_stats_accum_zu(&astats->tcache_bytes,
|
||||
tbin->ncached * sz_index2size(i));
|
||||
cache_bin_ncached_get(tbin, i) * sz_index2size(i));
|
||||
}
|
||||
for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
|
||||
cache_bin_t *tbin = &descriptor->bins_large[i];
|
||||
arena_stats_accum_zu(&astats->tcache_bytes,
|
||||
tbin->ncached * sz_index2size(i));
|
||||
cache_bin_ncached_get(tbin, i + SC_NBINS) *
|
||||
sz_index2size(i));
|
||||
}
|
||||
}
|
||||
malloc_mutex_prof_read(tsdn,
|
||||
@ -1381,7 +1382,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
|
||||
unsigned i, nfill, cnt;
|
||||
|
||||
assert(tbin->ncached == 0);
|
||||
assert(cache_bin_ncached_get(tbin, binind) == 0);
|
||||
|
||||
if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
|
||||
prof_idump(tsdn);
|
||||
@ -1390,6 +1391,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
unsigned binshard;
|
||||
bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
|
||||
|
||||
void **empty_position = cache_bin_empty_position_get(tbin, binind);
|
||||
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
|
||||
tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
|
||||
extent_t *slab;
|
||||
@ -1400,7 +1402,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
tofill : extent_nfree_get(slab);
|
||||
arena_slab_reg_alloc_batch(
|
||||
slab, &bin_infos[binind], cnt,
|
||||
tbin->avail - nfill + i);
|
||||
empty_position - nfill + i);
|
||||
} else {
|
||||
cnt = 1;
|
||||
void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
|
||||
@ -1412,18 +1414,18 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
*/
|
||||
if (ptr == NULL) {
|
||||
if (i > 0) {
|
||||
memmove(tbin->avail - i,
|
||||
tbin->avail - nfill,
|
||||
memmove(empty_position - i,
|
||||
empty_position - nfill,
|
||||
i * sizeof(void *));
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Insert such that low regions get used first. */
|
||||
*(tbin->avail - nfill + i) = ptr;
|
||||
*(empty_position - nfill + i) = ptr;
|
||||
}
|
||||
if (config_fill && unlikely(opt_junk_alloc)) {
|
||||
for (unsigned j = 0; j < cnt; j++) {
|
||||
void* ptr = *(tbin->avail - nfill + i + j);
|
||||
void* ptr = *(empty_position - nfill + i + j);
|
||||
arena_alloc_junk_small(ptr, &bin_infos[binind],
|
||||
true);
|
||||
}
|
||||
@ -1437,7 +1439,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||
tbin->ncached = i;
|
||||
cache_bin_ncached_set(tbin, binind, i);
|
||||
arena_decay_tick(tsdn, arena);
|
||||
}
|
||||
|
||||
|
@ -2368,7 +2368,7 @@ je_malloc(size_t size) {
|
||||
|
||||
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
|
||||
bool tcache_success;
|
||||
void* ret = cache_bin_alloc_easy(bin, &tcache_success);
|
||||
void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
|
||||
|
||||
if (tcache_success) {
|
||||
if (config_stats) {
|
||||
@ -2846,8 +2846,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
|
||||
}
|
||||
|
||||
cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
|
||||
cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind];
|
||||
if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
|
||||
if (!cache_bin_dalloc_easy(bin, ptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
186
src/tcache.c
186
src/tcache.c
@ -14,7 +14,16 @@ bool opt_tcache = true;
|
||||
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
|
||||
|
||||
cache_bin_info_t *tcache_bin_info;
|
||||
static unsigned stack_nelms; /* Total stack elms per tcache. */
|
||||
/*
|
||||
* For the total bin stack region (per tcache), reserve 2 more slots so that 1)
|
||||
* the empty position can be safely read on the fast path before checking
|
||||
* "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
|
||||
* by 1 step safely (i.e. no overflow).
|
||||
*/
|
||||
static const unsigned total_stack_padding = sizeof(void *) * 2;
|
||||
|
||||
/* Total stack size required (per tcache). Include the padding above. */
|
||||
static uint32_t total_stack_bytes;
|
||||
|
||||
unsigned nhbins;
|
||||
size_t tcache_maxclass;
|
||||
@ -47,14 +56,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
||||
} else {
|
||||
tbin = tcache_large_bin_get(tcache, binind);
|
||||
}
|
||||
if (tbin->low_water > 0) {
|
||||
|
||||
cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
|
||||
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
|
||||
if (low_water > 0) {
|
||||
/*
|
||||
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
||||
*/
|
||||
if (binind < SC_NBINS) {
|
||||
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
||||
tbin->ncached - tbin->low_water + (tbin->low_water
|
||||
>> 2));
|
||||
ncached - low_water + (low_water >> 2));
|
||||
/*
|
||||
* Reduce fill count by 2X. Limit lg_fill_div such that
|
||||
* the fill count is always at least 1.
|
||||
@ -66,10 +77,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
||||
}
|
||||
} else {
|
||||
tcache_bin_flush_large(tsd, tcache, tbin, binind,
|
||||
tbin->ncached - tbin->low_water + (tbin->low_water
|
||||
>> 2));
|
||||
ncached - low_water + (low_water >> 2));
|
||||
}
|
||||
} else if (tbin->low_water < 0) {
|
||||
} else if (low_water < 0) {
|
||||
assert(low_water == -1);
|
||||
/*
|
||||
* Increase fill count by 2X for small bins. Make sure
|
||||
* lg_fill_div stays greater than 0.
|
||||
@ -78,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
||||
tcache->lg_fill_div[binind]--;
|
||||
}
|
||||
}
|
||||
tbin->low_water = tbin->ncached;
|
||||
tbin->low_water_position = tbin->cur_ptr.lowbits;
|
||||
|
||||
tcache->next_gc_bin++;
|
||||
if (tcache->next_gc_bin == nhbins) {
|
||||
@ -97,7 +108,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
if (config_prof) {
|
||||
tcache->prof_accumbytes = 0;
|
||||
}
|
||||
ret = cache_bin_alloc_easy(tbin, tcache_success);
|
||||
ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -117,9 +128,10 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
|
||||
*/
|
||||
szind_t szind;
|
||||
size_t sz_sum = binind * nflush;
|
||||
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
|
||||
for (unsigned i = 0 ; i < nflush; i++) {
|
||||
rtree_extent_szind_read(tsdn, &extents_rtree,
|
||||
rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
|
||||
rtree_ctx, (uintptr_t)*(bottom_item - i), true,
|
||||
&extents[i], &szind);
|
||||
sz_sum -= szind;
|
||||
}
|
||||
@ -137,13 +149,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
||||
bool merged_stats = false;
|
||||
|
||||
assert(binind < SC_NBINS);
|
||||
assert((cache_bin_sz_t)rem <= tbin->ncached);
|
||||
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
|
||||
assert((cache_bin_sz_t)rem <= ncached);
|
||||
|
||||
arena_t *arena = tcache->arena;
|
||||
assert(arena != NULL);
|
||||
unsigned nflush = tbin->ncached - rem;
|
||||
unsigned nflush = ncached - rem;
|
||||
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
|
||||
|
||||
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
|
||||
/* Look up extent once per item. */
|
||||
if (config_opt_safety_checks) {
|
||||
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
|
||||
@ -151,7 +165,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
||||
} else {
|
||||
for (unsigned i = 0 ; i < nflush; i++) {
|
||||
item_extent[i] = iealloc(tsd_tsdn(tsd),
|
||||
*(tbin->avail - 1 - i));
|
||||
*(bottom_item - i));
|
||||
}
|
||||
}
|
||||
while (nflush > 0) {
|
||||
@ -181,7 +195,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
||||
}
|
||||
unsigned ndeferred = 0;
|
||||
for (unsigned i = 0; i < nflush; i++) {
|
||||
void *ptr = *(tbin->avail - 1 - i);
|
||||
void *ptr = *(bottom_item - i);
|
||||
extent = item_extent[i];
|
||||
assert(ptr != NULL && extent != NULL);
|
||||
|
||||
@ -196,7 +210,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
||||
* locked. Stash the object, so that it can be
|
||||
* handled in a future pass.
|
||||
*/
|
||||
*(tbin->avail - 1 - ndeferred) = ptr;
|
||||
*(bottom_item - ndeferred) = ptr;
|
||||
item_extent[ndeferred] = extent;
|
||||
ndeferred++;
|
||||
}
|
||||
@ -219,11 +233,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
|
||||
}
|
||||
|
||||
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
||||
memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
|
||||
sizeof(void *));
|
||||
tbin->ncached = rem;
|
||||
if (tbin->ncached < tbin->low_water) {
|
||||
tbin->low_water = tbin->ncached;
|
||||
cache_bin_ncached_set(tbin, binind, rem);
|
||||
if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
|
||||
tbin->low_water_position = tbin->cur_ptr.lowbits;
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,17 +247,19 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
|
||||
bool merged_stats = false;
|
||||
|
||||
assert(binind < nhbins);
|
||||
assert((cache_bin_sz_t)rem <= tbin->ncached);
|
||||
cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
|
||||
assert((cache_bin_sz_t)rem <= ncached);
|
||||
|
||||
arena_t *tcache_arena = tcache->arena;
|
||||
assert(tcache_arena != NULL);
|
||||
unsigned nflush = tbin->ncached - rem;
|
||||
unsigned nflush = ncached - rem;
|
||||
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
|
||||
|
||||
void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
|
||||
#ifndef JEMALLOC_EXTRA_SIZE_CHECK
|
||||
/* Look up extent once per item. */
|
||||
for (unsigned i = 0 ; i < nflush; i++) {
|
||||
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
|
||||
item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
|
||||
}
|
||||
#else
|
||||
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
|
||||
@ -266,7 +282,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
|
||||
}
|
||||
for (unsigned i = 0; i < nflush; i++) {
|
||||
void *ptr = *(tbin->avail - 1 - i);
|
||||
void *ptr = *(bottom_item - i);
|
||||
assert(ptr != NULL);
|
||||
extent = item_extent[i];
|
||||
if (extent_arena_ind_get(extent) == locked_arena_ind) {
|
||||
@ -295,7 +311,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
|
||||
|
||||
unsigned ndeferred = 0;
|
||||
for (unsigned i = 0; i < nflush; i++) {
|
||||
void *ptr = *(tbin->avail - 1 - i);
|
||||
void *ptr = *(bottom_item - i);
|
||||
extent = item_extent[i];
|
||||
assert(ptr != NULL && extent != NULL);
|
||||
|
||||
@ -308,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
|
||||
* Stash the object, so that it can be handled
|
||||
* in a future pass.
|
||||
*/
|
||||
*(tbin->avail - 1 - ndeferred) = ptr;
|
||||
*(bottom_item - ndeferred) = ptr;
|
||||
item_extent[ndeferred] = extent;
|
||||
ndeferred++;
|
||||
}
|
||||
@ -330,11 +346,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
|
||||
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
||||
memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
|
||||
sizeof(void *));
|
||||
tbin->ncached = rem;
|
||||
if (tbin->ncached < tbin->low_water) {
|
||||
tbin->low_water = tbin->ncached;
|
||||
cache_bin_ncached_set(tbin, binind, rem);
|
||||
if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
|
||||
tbin->low_water_position = tbin->cur_ptr.lowbits;
|
||||
}
|
||||
}
|
||||
|
||||
@ -406,9 +422,43 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Initialize auto tcache (embedded in TSD). */
|
||||
static bool
|
||||
tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
|
||||
cassert(sizeof(bin->cur_ptr) == sizeof(void *));
|
||||
/*
|
||||
* The full_position points to the lowest available space. Allocations
|
||||
* will access the slots toward higher addresses (for the benefit of
|
||||
* adjacent prefetch).
|
||||
*/
|
||||
void *full_position = (void *)*stack_cur;
|
||||
uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
|
||||
sizeof(void *);
|
||||
|
||||
*stack_cur += bin_stack_size;
|
||||
void *empty_position = (void *)*stack_cur;
|
||||
|
||||
/* Init to the empty position. */
|
||||
bin->cur_ptr.ptr = empty_position;
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
bin->full_position = (uint32_t)(uintptr_t)full_position;
|
||||
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
|
||||
assert(cache_bin_ncached_get(bin, ind) == 0);
|
||||
assert(cache_bin_empty_position_get(bin, ind) == empty_position);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Sanity check only. */
|
||||
static bool
|
||||
tcache_bin_lowbits_overflowable(void *ptr) {
|
||||
uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
|
||||
return lowbits < (uint32_t)(uintptr_t)ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
|
||||
assert(!tcache_bin_lowbits_overflowable(avail_stack));
|
||||
|
||||
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
|
||||
tcache->prof_accumbytes = 0;
|
||||
tcache->next_gc_bin = 0;
|
||||
@ -416,41 +466,43 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
|
||||
|
||||
ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
|
||||
|
||||
size_t stack_offset = 0;
|
||||
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
||||
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
|
||||
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
|
||||
|
||||
unsigned i = 0;
|
||||
uintptr_t stack_cur = (uintptr_t)avail_stack;
|
||||
for (; i < SC_NBINS; i++) {
|
||||
tcache->lg_fill_div[i] = 1;
|
||||
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
||||
/*
|
||||
* avail points past the available space. Allocations will
|
||||
* access the slots toward higher addresses (for the benefit of
|
||||
* prefetch).
|
||||
*/
|
||||
tcache_small_bin_get(tcache, i)->avail =
|
||||
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
||||
cache_bin_t *bin = tcache_small_bin_get(tcache, i);
|
||||
tcache_bin_init(bin, i, &stack_cur);
|
||||
}
|
||||
for (; i < nhbins; i++) {
|
||||
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
||||
tcache_large_bin_get(tcache, i)->avail =
|
||||
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
||||
cache_bin_t *bin = tcache_large_bin_get(tcache, i);
|
||||
tcache_bin_init(bin, i, &stack_cur);
|
||||
}
|
||||
assert(stack_offset == stack_nelms * sizeof(void *));
|
||||
|
||||
/* Sanity check that the whole stack is used. */
|
||||
size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
|
||||
assert(stack_offset + total_stack_padding == total_stack_bytes);
|
||||
}
|
||||
|
||||
static size_t
|
||||
tcache_bin_stack_alignment (size_t size) {
|
||||
/* Align pow2 to avoid overflow the cache bin compressed pointers. */
|
||||
return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
|
||||
}
|
||||
|
||||
/* Initialize auto tcache (embedded in TSD). */
|
||||
bool
|
||||
tsd_tcache_data_init(tsd_t *tsd) {
|
||||
tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
|
||||
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
||||
size_t size = stack_nelms * sizeof(void *);
|
||||
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
|
||||
/* Avoid false cacheline sharing. */
|
||||
size = sz_sa2u(size, CACHELINE);
|
||||
|
||||
void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
|
||||
NULL, true, arena_get(TSDN_NULL, 0, true));
|
||||
size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
|
||||
void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
|
||||
tcache_bin_stack_alignment(size), true, NULL, true,
|
||||
arena_get(TSDN_NULL, 0, true));
|
||||
if (avail_array == NULL) {
|
||||
return true;
|
||||
}
|
||||
@ -485,25 +537,24 @@ tsd_tcache_data_init(tsd_t *tsd) {
|
||||
/* Created manual tcache for tcache.create mallctl. */
|
||||
tcache_t *
|
||||
tcache_create_explicit(tsd_t *tsd) {
|
||||
tcache_t *tcache;
|
||||
size_t size, stack_offset;
|
||||
|
||||
size = sizeof(tcache_t);
|
||||
size_t size = sizeof(tcache_t);
|
||||
/* Naturally align the pointer stacks. */
|
||||
size = PTR_CEILING(size);
|
||||
stack_offset = size;
|
||||
size += stack_nelms * sizeof(void *);
|
||||
size_t stack_offset = size;
|
||||
size += total_stack_bytes;
|
||||
/* Avoid false cacheline sharing. */
|
||||
size = sz_sa2u(size, CACHELINE);
|
||||
|
||||
tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
|
||||
tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
|
||||
tcache_bin_stack_alignment(size), true, NULL, true,
|
||||
arena_get(TSDN_NULL, 0, true));
|
||||
if (tcache == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tcache_init(tsd, tcache,
|
||||
(void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
|
||||
void *avail_array = (void *)((uintptr_t)tcache +
|
||||
(uintptr_t)stack_offset);
|
||||
tcache_init(tsd, tcache, avail_array);
|
||||
tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
|
||||
|
||||
return tcache;
|
||||
@ -553,9 +604,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
||||
|
||||
if (tsd_tcache) {
|
||||
/* Release the avail array for the TSD embedded auto tcache. */
|
||||
void *avail_array =
|
||||
(void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
|
||||
(uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
|
||||
cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
|
||||
assert(cache_bin_ncached_get(bin, 0) == 0);
|
||||
assert(cache_bin_empty_position_get(bin, 0) ==
|
||||
bin->cur_ptr.ptr);
|
||||
void *avail_array = bin->cur_ptr.ptr -
|
||||
tcache_bin_info[0].ncached_max;
|
||||
idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
|
||||
} else {
|
||||
/* Release both the tcache struct and avail array. */
|
||||
@ -587,16 +641,17 @@ tcache_cleanup(tsd_t *tsd) {
|
||||
if (!tcache_available(tsd)) {
|
||||
assert(tsd_tcache_enabled_get(tsd) == false);
|
||||
if (config_debug) {
|
||||
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
||||
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
|
||||
== NULL);
|
||||
}
|
||||
return;
|
||||
}
|
||||
assert(tsd_tcache_enabled_get(tsd));
|
||||
assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
|
||||
assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
|
||||
|
||||
tcache_destroy(tsd, tcache, true);
|
||||
if (config_debug) {
|
||||
tcache_small_bin_get(tcache, 0)->avail = NULL;
|
||||
tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -755,8 +810,8 @@ tcache_boot(tsdn_t *tsdn) {
|
||||
if (tcache_bin_info == NULL) {
|
||||
return true;
|
||||
}
|
||||
unsigned i, stack_nelms;
|
||||
stack_nelms = 0;
|
||||
unsigned i;
|
||||
for (i = 0; i < SC_NBINS; i++) {
|
||||
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
|
||||
tcache_bin_info[i].ncached_max =
|
||||
@ -775,6 +830,7 @@ tcache_boot(tsdn_t *tsdn) {
|
||||
tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
|
||||
stack_nelms += tcache_bin_info[i].ncached_max;
|
||||
}
|
||||
total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
64
test/unit/cache_bin.c
Normal file
64
test/unit/cache_bin.c
Normal file
@ -0,0 +1,64 @@
|
||||
#include "test/jemalloc_test.h"
|
||||
|
||||
cache_bin_t test_bin;
|
||||
|
||||
TEST_BEGIN(test_cache_bin) {
|
||||
cache_bin_t *bin = &test_bin;
|
||||
cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
|
||||
/* Page aligned to make sure lowbits not overflowable. */
|
||||
void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
|
||||
|
||||
assert_ptr_not_null(stack, "Unexpected mallocx failure");
|
||||
/* Initialize to empty; bin 0. */
|
||||
cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
|
||||
void **empty_position = stack + ncached_max;
|
||||
bin->cur_ptr.ptr = empty_position;
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
bin->full_position = (uint32_t)(uintptr_t)stack;
|
||||
assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
|
||||
"Incorrect empty position");
|
||||
/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
|
||||
assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
|
||||
|
||||
bool success;
|
||||
void *ret = cache_bin_alloc_easy(bin, &success, 0);
|
||||
assert_false(success, "Empty cache bin should not alloc");
|
||||
assert_true(cache_bin_low_water_get(bin, 0) == - 1,
|
||||
"Incorrect low water mark");
|
||||
|
||||
cache_bin_ncached_set(bin, 0, 0);
|
||||
assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
|
||||
for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
|
||||
success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
|
||||
assert_true(success && cache_bin_ncached_get(bin, 0) == i,
|
||||
"Bin dalloc failure");
|
||||
}
|
||||
success = cache_bin_dalloc_easy(bin, (void *)1);
|
||||
assert_false(success, "Bin should be full");
|
||||
assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
|
||||
|
||||
cache_bin_ncached_set(bin, 0, ncached_max);
|
||||
assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
|
||||
/* Emulate low water after refill. */
|
||||
bin->low_water_position = bin->full_position;
|
||||
for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
|
||||
ret = cache_bin_alloc_easy(bin, &success, 0);
|
||||
cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
|
||||
assert_true(success && ncached == i - 1,
|
||||
"Cache bin alloc failure");
|
||||
assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
|
||||
assert_true(cache_bin_low_water_get(bin, 0) == ncached,
|
||||
"Incorrect low water mark");
|
||||
}
|
||||
|
||||
ret = cache_bin_alloc_easy(bin, &success, 0);
|
||||
assert_false(success, "Empty cache bin should not alloc.");
|
||||
assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
|
||||
"Bin should be empty");
|
||||
}
|
||||
TEST_END
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(test_cache_bin);
|
||||
}
|
Loading…
Reference in New Issue
Block a user