Cache bin: rewrite to track more state.
With this, we track all of the empty, full, and low water states together. This simplifies a lot of the tracking logic, since we now don't need the cache_bin_info_t for state queries (except for some debugging).
This commit is contained in:
parent
fef0b1ffe4
commit
397da03865
@ -35,67 +35,53 @@ struct cache_bin_stats_s {
|
||||
*/
|
||||
typedef struct cache_bin_info_s cache_bin_info_t;
|
||||
struct cache_bin_info_s {
|
||||
/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
|
||||
cache_bin_sz_t stack_size;
|
||||
cache_bin_sz_t ncached_max;
|
||||
};
|
||||
|
||||
typedef struct cache_bin_s cache_bin_t;
|
||||
struct cache_bin_s {
|
||||
/*
|
||||
* The cache bin stack is represented using 3 pointers: cur_ptr,
|
||||
* low_water and full, optimized for the fast path efficiency.
|
||||
*
|
||||
* low addr ==> high addr
|
||||
* |----|----|----|item1|item2|.....................|itemN|
|
||||
* full cur empty
|
||||
* (ncached == N; full + ncached_max == empty)
|
||||
*
|
||||
* Data directly stored:
|
||||
* 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
|
||||
* 2) full points to the top of the stack (i.e. ncached == ncached_max),
|
||||
* which is compared against on free_fastpath to check "is_full".
|
||||
* 3) low_water indicates a low water mark of ncached.
|
||||
* Range of low_water is [cur, empty], i.e. values of [ncached, 0].
|
||||
*
|
||||
* The empty position (ncached == 0) is derived via full + ncached_max
|
||||
* and not accessed in the common case (guarded behind low_water).
|
||||
*
|
||||
* On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
|
||||
* omitting the high 32 bits. Overflow of the half pointers is avoided
|
||||
* when allocating / initializing the stack space. As a result,
|
||||
* cur_ptr.lowbits can be safely used for pointer comparisons.
|
||||
* The stack grows down. Whenever the bin is nonempty, the head points
|
||||
* to an array entry containing a valid allocation. When it is empty,
|
||||
* the head points to one element past the owned array.
|
||||
*/
|
||||
union {
|
||||
void **ptr;
|
||||
struct {
|
||||
/* highbits never accessed directly. */
|
||||
#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
|
||||
uint32_t __highbits;
|
||||
#endif
|
||||
uint32_t lowbits;
|
||||
#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
|
||||
uint32_t __highbits;
|
||||
#endif
|
||||
};
|
||||
} cur_ptr;
|
||||
void **stack_head;
|
||||
|
||||
/*
|
||||
* The low bits of the address of the first item in the stack that
|
||||
* hasn't been used since the last GC, to track the low water mark (min
|
||||
* # of cached items).
|
||||
*
|
||||
* Since the stack grows down, this is a higher address than
|
||||
* low_bits_full.
|
||||
*/
|
||||
uint16_t low_bits_low_water;
|
||||
|
||||
/*
|
||||
* The low bits of the value that stack_head will take on when the array
|
||||
* is full. (But remember that stack_head always points to a valid item
|
||||
* when the array is nonempty -- this is in the array).
|
||||
*
|
||||
* Recall that since the stack grows down, this is the lowest address in
|
||||
* the array.
|
||||
*/
|
||||
uint16_t low_bits_full;
|
||||
|
||||
/*
|
||||
* The low bits of the value that stack_head will take on when the array
|
||||
* is empty.
|
||||
*
|
||||
* The stack grows down -- this is one past the highest address in the
|
||||
* array.
|
||||
*/
|
||||
uint16_t low_bits_empty;
|
||||
|
||||
/*
|
||||
* cur_ptr and stats are both modified frequently. Let's keep them
|
||||
* close so that they have a higher chance of being on the same
|
||||
* cacheline, thus less write-backs.
|
||||
*/
|
||||
cache_bin_stats_t tstats;
|
||||
/*
|
||||
* Points to the first item that hasn't been used since last GC, to
|
||||
* track the low water mark (min # of cached).
|
||||
*/
|
||||
uint32_t low_water_position;
|
||||
/*
|
||||
* Points to the position when the cache is full.
|
||||
*
|
||||
* To make use of adjacent cacheline prefetch, the items in the avail
|
||||
* stack goes to higher address for newer allocations (i.e. cur_ptr++).
|
||||
*/
|
||||
uint32_t full_position;
|
||||
};
|
||||
|
||||
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
|
||||
@ -118,30 +104,51 @@ struct cache_bin_array_descriptor_s {
|
||||
/* Returns ncached_max: Upper limit on ncached. */
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_info_ncached_max(cache_bin_info_t *info) {
|
||||
return info->stack_size / sizeof(void *);
|
||||
return info->ncached_max;
|
||||
}
|
||||
|
||||
/*
|
||||
* Asserts that the pointer associated with earlier is <= the one associated
|
||||
* with later.
|
||||
*/
|
||||
static inline void
|
||||
cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
|
||||
if (earlier > later) {
|
||||
assert(bin->low_bits_full > bin->low_bits_empty);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal -- does difference calculations that handle wraparound correctly.
|
||||
* Earlier must be associated with the position earlier in memory.
|
||||
*/
|
||||
static inline uint16_t
|
||||
cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
|
||||
cache_bin_assert_earlier(bin, earlier, later);
|
||||
return later - earlier;
|
||||
}
|
||||
|
||||
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
cache_bin_sz_t n = (cache_bin_sz_t)((info->stack_size +
|
||||
bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
|
||||
cache_bin_sz_t diff = cache_bin_diff(bin,
|
||||
(uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
|
||||
cache_bin_sz_t n = diff / sizeof(void *);
|
||||
|
||||
assert(n <= cache_bin_info_ncached_max(info));
|
||||
assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
|
||||
assert(n == 0 || *(bin->stack_head) != NULL);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static inline void **
|
||||
cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, info);
|
||||
/* Low bits overflow disallowed when allocating the space. */
|
||||
assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
|
||||
cache_bin_sz_t diff = cache_bin_diff(bin,
|
||||
(uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
|
||||
uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
|
||||
void **ret = (void **)empty_bits;
|
||||
|
||||
/* Can also be computed via (full_position + ncached_max) | highbits. */
|
||||
uintptr_t lowbits = bin->full_position + info->stack_size;
|
||||
uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
|
||||
~(((uint64_t)1 << 32) - 1);
|
||||
assert(ret == (void **)(lowbits | highbits));
|
||||
assert(ret >= bin->stack_head);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -149,20 +156,29 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
static inline void
|
||||
cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
assert(cache_bin_ncached_get(bin, info) == 0);
|
||||
assert(cache_bin_empty_position_get(bin, info) == bin->cur_ptr.ptr);
|
||||
assert(cache_bin_empty_position_get(bin, info) == bin->stack_head);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get low water, but without any of the correctness checking we do for the
|
||||
* caller-usable version, if we are temporarily breaking invariants (like
|
||||
* ncached >= low_water during flush).
|
||||
*/
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
return cache_bin_diff(bin, bin->low_bits_low_water,
|
||||
bin->low_bits_empty) / sizeof(void *);
|
||||
}
|
||||
|
||||
/* Returns the numeric value of low water in [0, ncached]. */
|
||||
static inline cache_bin_sz_t
|
||||
cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
|
||||
cache_bin_sz_t low_water = ncached_max -
|
||||
(cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
|
||||
sizeof(void *));
|
||||
assert(low_water <= ncached_max);
|
||||
cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin, info);
|
||||
assert(low_water <= cache_bin_info_ncached_max(info));
|
||||
assert(low_water <= cache_bin_ncached_get(bin, info));
|
||||
assert(bin->low_water_position >= bin->cur_ptr.lowbits);
|
||||
|
||||
cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
|
||||
bin->low_bits_low_water);
|
||||
|
||||
return low_water;
|
||||
}
|
||||
@ -173,20 +189,7 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
|
||||
*/
|
||||
static inline void
|
||||
cache_bin_low_water_set(cache_bin_t *bin) {
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is an internal implementation detail -- users should only affect ncached
|
||||
* via single-item pushes or batch fills.
|
||||
*/
|
||||
static inline void
|
||||
cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
cache_bin_sz_t n) {
|
||||
bin->cur_ptr.lowbits = bin->full_position + info->stack_size
|
||||
- n * sizeof(void *);
|
||||
assert(n <= cache_bin_info_ncached_max(info));
|
||||
assert(n == 0 || *bin->cur_ptr.ptr != NULL);
|
||||
bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -198,38 +201,35 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
bool *success, const bool adjust_low_water) {
|
||||
cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
|
||||
const bool adjust_low_water) {
|
||||
/*
|
||||
* This may read from the empty position; however the loaded value won't
|
||||
* be used. It's safe because the stack has one more slot reserved.
|
||||
*/
|
||||
void *ret = *(bin->cur_ptr.ptr++);
|
||||
void *ret = *bin->stack_head;
|
||||
uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
|
||||
void **new_head = bin->stack_head + 1;
|
||||
/*
|
||||
* Check for both bin->ncached == 0 and ncached < low_water in a single
|
||||
* branch. When adjust_low_water is true, this also avoids accessing
|
||||
* the cache_bin_info_t (which is on a separate cacheline / page) in
|
||||
* the common case.
|
||||
* Note that the low water mark is at most empty; if we pass this check,
|
||||
* we know we're non-empty.
|
||||
*/
|
||||
if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
|
||||
if (unlikely(low_bits == bin->low_bits_low_water)) {
|
||||
if (adjust_low_water) {
|
||||
uint32_t empty_position = bin->full_position +
|
||||
info->stack_size;
|
||||
if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
|
||||
/* Over-allocated; revert. */
|
||||
bin->cur_ptr.ptr--;
|
||||
assert(bin->cur_ptr.lowbits == empty_position);
|
||||
if (unlikely(low_bits == bin->low_bits_empty)) {
|
||||
*success = false;
|
||||
return NULL;
|
||||
}
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
/* Overflow should be impossible. */
|
||||
assert(bin->low_bits_low_water
|
||||
< (uint16_t)(uintptr_t)new_head);
|
||||
bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
|
||||
} else {
|
||||
bin->cur_ptr.ptr--;
|
||||
assert(bin->cur_ptr.lowbits == bin->low_water_position);
|
||||
*success = false;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
bin->stack_head = new_head;
|
||||
|
||||
/*
|
||||
* success (instead of ret) should be checked upon the return of this
|
||||
@ -246,22 +246,27 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
|
||||
/* We don't look at info if we're not adjusting low-water. */
|
||||
return cache_bin_alloc_easy_impl(bin, NULL, success, false);
|
||||
return cache_bin_alloc_easy_impl(bin, success, false);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
|
||||
return cache_bin_alloc_easy_impl(bin, info, success, true);
|
||||
/* We don't use info now, but we may want to in the future. */
|
||||
(void)info;
|
||||
return cache_bin_alloc_easy_impl(bin, success, true);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
|
||||
if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
|
||||
uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
|
||||
if (unlikely(low_bits == bin->low_bits_full)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*(--bin->cur_ptr.ptr) = ptr;
|
||||
assert(bin->cur_ptr.lowbits >= bin->full_position);
|
||||
bin->stack_head--;
|
||||
*bin->stack_head = ptr;
|
||||
cache_bin_assert_earlier(bin, bin->low_bits_full,
|
||||
(uint16_t)(uintptr_t)bin->stack_head);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -279,8 +284,8 @@ struct cache_bin_ptr_array_s {
|
||||
static inline void
|
||||
cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
|
||||
arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
|
||||
assert(cache_bin_ncached_get(bin, info) == 0);
|
||||
arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -292,12 +297,12 @@ static inline void
|
||||
cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
|
||||
assert(cache_bin_ncached_get(bin, info) == 0);
|
||||
void **empty_position = cache_bin_empty_position_get(bin, info);
|
||||
if (nfilled < arr->n) {
|
||||
void **empty_position = cache_bin_empty_position_get(bin, info);
|
||||
memmove(empty_position - nfilled, empty_position - arr->n,
|
||||
nfilled * sizeof(void *));
|
||||
}
|
||||
cache_bin_ncached_set(bin, info, nfilled);
|
||||
bin->stack_head = empty_position - nfilled;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -326,11 +331,12 @@ static inline void
|
||||
cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
|
||||
cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
|
||||
unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
|
||||
memmove(bin->cur_ptr.ptr + nflushed, bin->cur_ptr.ptr,
|
||||
memmove(bin->stack_head + nflushed, bin->stack_head,
|
||||
rem * sizeof(void *));
|
||||
cache_bin_ncached_set(bin, info, rem);
|
||||
if (bin->cur_ptr.lowbits > bin->low_water_position) {
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
bin->stack_head = bin->stack_head + nflushed;
|
||||
if (cache_bin_ncached_get(bin, info)
|
||||
< cache_bin_low_water_get_internal(bin, info)) {
|
||||
bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@ cache_bin_info_init(cache_bin_info_t *info,
|
||||
cache_bin_sz_t ncached_max) {
|
||||
size_t stack_size = (size_t)ncached_max * sizeof(void *);
|
||||
assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
|
||||
info->stack_size = (cache_bin_sz_t)stack_size;
|
||||
info->ncached_max = (cache_bin_sz_t)ncached_max;
|
||||
}
|
||||
|
||||
void
|
||||
@ -23,23 +23,14 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
|
||||
*/
|
||||
*size = sizeof(void *) * 2;
|
||||
for (szind_t i = 0; i < ninfos; i++) {
|
||||
*size += infos[i].stack_size;
|
||||
*size += infos[i].ncached_max * sizeof(void *);
|
||||
}
|
||||
|
||||
/*
|
||||
* 1) Align to at least PAGE, to minimize the # of TLBs needed by the
|
||||
* Align to at least PAGE, to minimize the # of TLBs needed by the
|
||||
* smaller sizes; also helps if the larger sizes don't get used at all.
|
||||
* 2) On 32-bit the pointers won't be compressed; use minimal alignment.
|
||||
*/
|
||||
if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
|
||||
*alignment = PAGE;
|
||||
} else {
|
||||
/*
|
||||
* Align pow2 to avoid overflow the cache bin compressed
|
||||
* pointers.
|
||||
*/
|
||||
*alignment = pow2_ceil_zu(*size);
|
||||
}
|
||||
*alignment = PAGE;
|
||||
}
|
||||
|
||||
void
|
||||
@ -53,10 +44,6 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
|
||||
cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
|
||||
&computed_alignment);
|
||||
assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
|
||||
|
||||
/* And that alignment should disallow overflow. */
|
||||
uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
|
||||
assert((uint32_t)(uintptr_t)alloc < lowbits);
|
||||
}
|
||||
/*
|
||||
* Leave a noticeable mark pattern on the boundaries, in case a bug
|
||||
@ -81,7 +68,6 @@ cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
|
||||
void
|
||||
cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
|
||||
size_t *cur_offset) {
|
||||
assert(sizeof(bin->cur_ptr) == sizeof(void *));
|
||||
/*
|
||||
* The full_position points to the lowest available space. Allocations
|
||||
* will access the slots toward higher addresses (for the benefit of
|
||||
@ -89,21 +75,23 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
|
||||
*/
|
||||
void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
|
||||
void *full_position = stack_cur;
|
||||
uint32_t bin_stack_size = info->stack_size;
|
||||
uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
|
||||
|
||||
*cur_offset += bin_stack_size;
|
||||
void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
|
||||
|
||||
/* Init to the empty position. */
|
||||
bin->cur_ptr.ptr = empty_position;
|
||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||
bin->full_position = (uint32_t)(uintptr_t)full_position;
|
||||
assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
|
||||
bin->stack_head = (void **)empty_position;
|
||||
bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
|
||||
bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
|
||||
bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
|
||||
assert(cache_bin_diff(bin, bin->low_bits_full,
|
||||
(uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
|
||||
assert(cache_bin_ncached_get(bin, info) == 0);
|
||||
assert(cache_bin_empty_position_get(bin, info) == empty_position);
|
||||
}
|
||||
|
||||
bool
|
||||
cache_bin_still_zero_initialized(cache_bin_t *bin) {
|
||||
return bin->cur_ptr.ptr == NULL;
|
||||
return bin->stack_head == NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user