Optimize cache_bin_alloc_easy for malloc fast path
`tcache_bin_info` is not accessed on malloc fast path but the compiler reserves a register for it, as well as an additional register for `tcache_bin_info[ind].stack_size`. The optimization gets rid of the need for the two registers.
This commit is contained in:
parent
4fe50bc7d0
commit
05681e387a
@ -183,8 +183,11 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
|
|||||||
descriptor->bins_large = bins_large;
|
descriptor->bins_large = bins_large;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INVALID_SZIND ((szind_t)(unsigned)-1)
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void *
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
|
cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
|
||||||
|
const bool adjust_low_water) {
|
||||||
/*
|
/*
|
||||||
* This may read from the empty position; however the loaded value won't
|
* This may read from the empty position; however the loaded value won't
|
||||||
* be used. It's safe because the stack has one more slot reserved.
|
* be used. It's safe because the stack has one more slot reserved.
|
||||||
@ -192,10 +195,13 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
|
|||||||
void *ret = *(bin->cur_ptr.ptr++);
|
void *ret = *(bin->cur_ptr.ptr++);
|
||||||
/*
|
/*
|
||||||
* Check for both bin->ncached == 0 and ncached < low_water in a single
|
* Check for both bin->ncached == 0 and ncached < low_water in a single
|
||||||
* branch. This also avoids accessing tcache_bin_info (which is on a
|
* branch. When adjust_low_water is true, this also avoids accessing
|
||||||
* separate cacheline / page) in the common case.
|
* tcache_bin_info (which is on a separate cacheline / page) in the
|
||||||
|
* common case.
|
||||||
*/
|
*/
|
||||||
if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
|
if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
|
||||||
|
if (adjust_low_water) {
|
||||||
|
assert(ind != INVALID_SZIND);
|
||||||
uint32_t empty_position = bin->full_position +
|
uint32_t empty_position = bin->full_position +
|
||||||
tcache_bin_info[ind].stack_size;
|
tcache_bin_info[ind].stack_size;
|
||||||
if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
|
if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
|
||||||
@ -206,6 +212,13 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
bin->low_water_position = bin->cur_ptr.lowbits;
|
bin->low_water_position = bin->cur_ptr.lowbits;
|
||||||
|
} else {
|
||||||
|
assert(ind == INVALID_SZIND);
|
||||||
|
bin->cur_ptr.ptr--;
|
||||||
|
assert(bin->cur_ptr.lowbits == bin->low_water_position);
|
||||||
|
*success = false;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -220,6 +233,19 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
|
cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
|
||||||
|
/* The szind parameter won't be used. */
|
||||||
|
return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
|
cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
|
||||||
|
return cache_bin_alloc_easy_impl(bin, success, ind, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef INVALID_SZIND
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
|
cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
|
||||||
if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
|
if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
|
||||||
|
@ -2372,7 +2372,7 @@ je_malloc(size_t size) {
|
|||||||
|
|
||||||
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
|
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
|
||||||
bool tcache_success;
|
bool tcache_success;
|
||||||
void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
|
void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
|
||||||
|
|
||||||
if (tcache_success) {
|
if (tcache_success) {
|
||||||
*tsd_thread_allocatedp_get(tsd) += usize;
|
*tsd_thread_allocatedp_get(tsd) += usize;
|
||||||
|
Loading…
Reference in New Issue
Block a user