Optimize cache_bin_alloc_easy for malloc fast path

`tcache_bin_info` is not accessed on malloc fast path but the
compiler reserves a register for it, as well as an additional
register for `tcache_bin_info[ind].stack_size`.  The optimization
gets rid of the need for the two registers.
This commit is contained in:
Yinan Zhang 2019-10-08 11:33:55 -07:00
parent 4fe50bc7d0
commit 05681e387a
2 changed files with 36 additions and 10 deletions

View File

@ -183,8 +183,11 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
descriptor->bins_large = bins_large; descriptor->bins_large = bins_large;
} }
#define INVALID_SZIND ((szind_t)(unsigned)-1)
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) { cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
const bool adjust_low_water) {
/* /*
* This may read from the empty position; however the loaded value won't * This may read from the empty position; however the loaded value won't
* be used. It's safe because the stack has one more slot reserved. * be used. It's safe because the stack has one more slot reserved.
@ -192,10 +195,13 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
void *ret = *(bin->cur_ptr.ptr++); void *ret = *(bin->cur_ptr.ptr++);
/* /*
* Check for both bin->ncached == 0 and ncached < low_water in a single * Check for both bin->ncached == 0 and ncached < low_water in a single
* branch. This also avoids accessing tcache_bin_info (which is on a * branch. When adjust_low_water is true, this also avoids accessing
* separate cacheline / page) in the common case. * tcache_bin_info (which is on a separate cacheline / page) in the
* common case.
*/ */
if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) { if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
if (adjust_low_water) {
assert(ind != INVALID_SZIND);
uint32_t empty_position = bin->full_position + uint32_t empty_position = bin->full_position +
tcache_bin_info[ind].stack_size; tcache_bin_info[ind].stack_size;
if (unlikely(bin->cur_ptr.lowbits > empty_position)) { if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
@ -206,6 +212,13 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
return NULL; return NULL;
} }
bin->low_water_position = bin->cur_ptr.lowbits; bin->low_water_position = bin->cur_ptr.lowbits;
} else {
assert(ind == INVALID_SZIND);
bin->cur_ptr.ptr--;
assert(bin->cur_ptr.lowbits == bin->low_water_position);
*success = false;
return NULL;
}
} }
/* /*
@ -220,6 +233,19 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
return ret; return ret;
} }
JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
/* The szind parameter won't be used. */
return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
}
JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
return cache_bin_alloc_easy_impl(bin, success, ind, true);
}
#undef INVALID_SZIND
JEMALLOC_ALWAYS_INLINE bool JEMALLOC_ALWAYS_INLINE bool
cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) { if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {

View File

@ -2372,7 +2372,7 @@ je_malloc(size_t size) {
cache_bin_t *bin = tcache_small_bin_get(tcache, ind); cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
bool tcache_success; bool tcache_success;
void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind); void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
if (tcache_success) { if (tcache_success) {
*tsd_thread_allocatedp_get(tsd) += usize; *tsd_thread_allocatedp_get(tsd) += usize;