Fast path: allow low-water mark changes.
This lets us put more allocations on an "almost as fast" path after a flush. This results in around a 4% reduction in malloc cycles in prod workloads (corresponding to about a 0.1% reduction in overall cycles).
This commit is contained in:
parent
397da03865
commit
d701a085c2
@ -46,6 +46,12 @@ struct cache_bin_s {
|
|||||||
* the head points to one element past the owned array.
|
* the head points to one element past the owned array.
|
||||||
*/
|
*/
|
||||||
void **stack_head;
|
void **stack_head;
|
||||||
|
/*
|
||||||
|
* cur_ptr and stats are both modified frequently. Let's keep them
|
||||||
|
* close so that they have a higher chance of being on the same
|
||||||
|
* cacheline, thus less write-backs.
|
||||||
|
*/
|
||||||
|
cache_bin_stats_t tstats;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The low bits of the address of the first item in the stack that
|
* The low bits of the address of the first item in the stack that
|
||||||
@ -76,12 +82,6 @@ struct cache_bin_s {
|
|||||||
*/
|
*/
|
||||||
uint16_t low_bits_empty;
|
uint16_t low_bits_empty;
|
||||||
|
|
||||||
/*
|
|
||||||
* cur_ptr and stats are both modified frequently. Let's keep them
|
|
||||||
* close so that they have a higher chance of being on the same
|
|
||||||
* cacheline, thus less write-backs.
|
|
||||||
*/
|
|
||||||
cache_bin_stats_t tstats;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
|
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
|
||||||
@ -201,36 +201,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void *
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
|
cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
|
||||||
const bool adjust_low_water) {
|
|
||||||
/*
|
|
||||||
* This may read from the empty position; however the loaded value won't
|
|
||||||
* be used. It's safe because the stack has one more slot reserved.
|
|
||||||
*/
|
|
||||||
void *ret = *bin->stack_head;
|
|
||||||
uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
|
|
||||||
void **new_head = bin->stack_head + 1;
|
|
||||||
/*
|
|
||||||
* Note that the low water mark is at most empty; if we pass this check,
|
|
||||||
* we know we're non-empty.
|
|
||||||
*/
|
|
||||||
if (unlikely(low_bits == bin->low_bits_low_water)) {
|
|
||||||
if (adjust_low_water) {
|
|
||||||
if (unlikely(low_bits == bin->low_bits_empty)) {
|
|
||||||
*success = false;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
/* Overflow should be impossible. */
|
|
||||||
assert(bin->low_bits_low_water
|
|
||||||
< (uint16_t)(uintptr_t)new_head);
|
|
||||||
bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
|
|
||||||
} else {
|
|
||||||
*success = false;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bin->stack_head = new_head;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* success (instead of ret) should be checked upon the return of this
|
* success (instead of ret) should be checked upon the return of this
|
||||||
* function. We avoid checking (ret == NULL) because there is never a
|
* function. We avoid checking (ret == NULL) because there is never a
|
||||||
@ -238,22 +209,52 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
|
|||||||
* and eagerly checking ret would cause pipeline stall (waiting for the
|
* and eagerly checking ret would cause pipeline stall (waiting for the
|
||||||
* cacheline).
|
* cacheline).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This may read from the empty position; however the loaded value won't
|
||||||
|
* be used. It's safe because the stack has one more slot reserved.
|
||||||
|
*/
|
||||||
|
void *ret = *bin->stack_head;
|
||||||
|
uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
|
||||||
|
void **new_head = bin->stack_head + 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that the low water mark is at most empty; if we pass this check,
|
||||||
|
* we know we're non-empty.
|
||||||
|
*/
|
||||||
|
if (likely(low_bits != bin->low_bits_low_water)) {
|
||||||
|
bin->stack_head = new_head;
|
||||||
*success = true;
|
*success = true;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
}
|
||||||
|
if (!adjust_low_water) {
|
||||||
|
*success = false;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* In the fast-path case where we call alloc_easy and then alloc, the
|
||||||
|
* previous checking and computation is optimized away -- we didn't
|
||||||
|
* actually commit any of our operations.
|
||||||
|
*/
|
||||||
|
if (likely(low_bits != bin->low_bits_empty)) {
|
||||||
|
bin->stack_head = new_head;
|
||||||
|
bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
|
||||||
|
*success = true;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
*success = false;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void *
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
|
cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
|
||||||
/* We don't look at info if we're not adjusting low-water. */
|
/* We don't look at info if we're not adjusting low-water. */
|
||||||
return cache_bin_alloc_easy_impl(bin, success, false);
|
return cache_bin_alloc_impl(bin, success, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void *
|
JEMALLOC_ALWAYS_INLINE void *
|
||||||
cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
|
cache_bin_alloc(cache_bin_t *bin, bool *success) {
|
||||||
/* We don't use info now, but we may want to in the future. */
|
return cache_bin_alloc_impl(bin, success, true);
|
||||||
(void)info;
|
|
||||||
return cache_bin_alloc_easy_impl(bin, success, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
|
@ -36,8 +36,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
|||||||
|
|
||||||
assert(binind < SC_NBINS);
|
assert(binind < SC_NBINS);
|
||||||
bin = tcache_small_bin_get(tcache, binind);
|
bin = tcache_small_bin_get(tcache, binind);
|
||||||
ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
|
ret = cache_bin_alloc(bin, &tcache_success);
|
||||||
&tcache_success);
|
|
||||||
assert(tcache_success == (ret != NULL));
|
assert(tcache_success == (ret != NULL));
|
||||||
if (unlikely(!tcache_success)) {
|
if (unlikely(!tcache_success)) {
|
||||||
bool tcache_hard_success;
|
bool tcache_hard_success;
|
||||||
@ -80,8 +79,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
|||||||
|
|
||||||
assert(binind >= SC_NBINS &&binind < nhbins);
|
assert(binind >= SC_NBINS &&binind < nhbins);
|
||||||
bin = tcache_large_bin_get(tcache, binind);
|
bin = tcache_large_bin_get(tcache, binind);
|
||||||
ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
|
ret = cache_bin_alloc(bin, &tcache_success);
|
||||||
&tcache_success);
|
|
||||||
assert(tcache_success == (ret != NULL));
|
assert(tcache_success == (ret != NULL));
|
||||||
if (unlikely(!tcache_success)) {
|
if (unlikely(!tcache_success)) {
|
||||||
/*
|
/*
|
||||||
|
@ -2377,6 +2377,17 @@ malloc_default(size_t size) {
|
|||||||
* Begin malloc(3)-compatible functions.
|
* Begin malloc(3)-compatible functions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
|
fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
|
||||||
|
cache_bin_t *bin, void *ret) {
|
||||||
|
thread_allocated_set(tsd, allocated_after);
|
||||||
|
if (config_stats) {
|
||||||
|
bin->tstats.nrequests++;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG("core.malloc.exit", "result: %p", ret);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* malloc() fastpath.
|
* malloc() fastpath.
|
||||||
*
|
*
|
||||||
@ -2451,17 +2462,22 @@ je_malloc(size_t size) {
|
|||||||
tcache_t *tcache = tsd_tcachep_get(tsd);
|
tcache_t *tcache = tsd_tcachep_get(tsd);
|
||||||
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
|
cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
|
||||||
bool tcache_success;
|
bool tcache_success;
|
||||||
void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
|
void *ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We split up the code this way so that redundant low-water
|
||||||
|
* computation doesn't happen on the (more common) case in which we
|
||||||
|
* don't touch the low water mark. The compiler won't do this
|
||||||
|
* duplication on its own.
|
||||||
|
*/
|
||||||
|
ret = cache_bin_alloc_easy(bin, &tcache_success);
|
||||||
if (tcache_success) {
|
if (tcache_success) {
|
||||||
thread_allocated_set(tsd, allocated_after);
|
fastpath_success_finish(tsd, allocated_after, bin, ret);
|
||||||
if (config_stats) {
|
return ret;
|
||||||
bin->tstats.nrequests++;
|
|
||||||
}
|
}
|
||||||
|
ret = cache_bin_alloc(bin, &tcache_success);
|
||||||
LOG("core.malloc.exit", "result: %p", ret);
|
if (tcache_success) {
|
||||||
|
fastpath_success_finish(tsd, allocated_after, bin, ret);
|
||||||
/* Fastpath success */
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,8 +104,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
|||||||
|
|
||||||
assert(tcache->arena != NULL);
|
assert(tcache->arena != NULL);
|
||||||
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
|
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
|
||||||
ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
|
ret = cache_bin_alloc(tbin, tcache_success);
|
||||||
tcache_success);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
|
|||||||
cache_bin_low_water_set(bin);
|
cache_bin_low_water_set(bin);
|
||||||
|
|
||||||
for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
|
for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
|
||||||
ptr = cache_bin_alloc_easy(bin, info, &success);
|
ptr = cache_bin_alloc(bin, &success);
|
||||||
expect_true(success, "");
|
expect_true(success, "");
|
||||||
expect_ptr_eq(ptr, (void *)&ptrs[i],
|
expect_ptr_eq(ptr, (void *)&ptrs[i],
|
||||||
"Should pop in order filled");
|
"Should pop in order filled");
|
||||||
@ -48,7 +48,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
|
|||||||
|
|
||||||
expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
|
expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
|
||||||
while (cache_bin_ncached_get(bin, info) > 0) {
|
while (cache_bin_ncached_get(bin, info) > 0) {
|
||||||
cache_bin_alloc_easy(bin, info, &success);
|
cache_bin_alloc(bin, &success);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,11 +78,11 @@ TEST_BEGIN(test_cache_bin) {
|
|||||||
expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
|
expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
|
||||||
expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
|
expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
|
||||||
|
|
||||||
ptr = cache_bin_alloc_easy_reduced(&bin, &success);
|
ptr = cache_bin_alloc_easy(&bin, &success);
|
||||||
expect_false(success, "Shouldn't successfully allocate when empty");
|
expect_false(success, "Shouldn't successfully allocate when empty");
|
||||||
expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
|
expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
|
||||||
|
|
||||||
ptr = cache_bin_alloc_easy(&bin, &info, &success);
|
ptr = cache_bin_alloc(&bin, &success);
|
||||||
expect_false(success, "Shouldn't successfully allocate when empty");
|
expect_false(success, "Shouldn't successfully allocate when empty");
|
||||||
expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
|
expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
|
||||||
|
|
||||||
@ -112,10 +112,10 @@ TEST_BEGIN(test_cache_bin) {
|
|||||||
expect_true(cache_bin_ncached_get(&bin, &info)
|
expect_true(cache_bin_ncached_get(&bin, &info)
|
||||||
== ncached_max - i, "");
|
== ncached_max - i, "");
|
||||||
/*
|
/*
|
||||||
* This should fail -- the reduced version can't change low
|
* This should fail -- the easy variant can't change the low
|
||||||
* water.
|
* water mark.
|
||||||
*/
|
*/
|
||||||
ptr = cache_bin_alloc_easy_reduced(&bin, &success);
|
ptr = cache_bin_alloc_easy(&bin, &success);
|
||||||
expect_ptr_null(ptr, "");
|
expect_ptr_null(ptr, "");
|
||||||
expect_false(success, "");
|
expect_false(success, "");
|
||||||
expect_true(cache_bin_low_water_get(&bin, &info)
|
expect_true(cache_bin_low_water_get(&bin, &info)
|
||||||
@ -124,7 +124,7 @@ TEST_BEGIN(test_cache_bin) {
|
|||||||
== ncached_max - i, "");
|
== ncached_max - i, "");
|
||||||
|
|
||||||
/* This should succeed, though. */
|
/* This should succeed, though. */
|
||||||
ptr = cache_bin_alloc_easy(&bin, &info, &success);
|
ptr = cache_bin_alloc(&bin, &success);
|
||||||
expect_true(success, "");
|
expect_true(success, "");
|
||||||
expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
|
expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
|
||||||
"Alloc should pop in stack order");
|
"Alloc should pop in stack order");
|
||||||
@ -135,10 +135,10 @@ TEST_BEGIN(test_cache_bin) {
|
|||||||
}
|
}
|
||||||
/* Now we're empty -- all alloc attempts should fail. */
|
/* Now we're empty -- all alloc attempts should fail. */
|
||||||
expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
|
expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
|
||||||
ptr = cache_bin_alloc_easy_reduced(&bin, &success);
|
ptr = cache_bin_alloc_easy(&bin, &success);
|
||||||
expect_ptr_null(ptr, "");
|
expect_ptr_null(ptr, "");
|
||||||
expect_false(success, "");
|
expect_false(success, "");
|
||||||
ptr = cache_bin_alloc_easy(&bin, &info, &success);
|
ptr = cache_bin_alloc(&bin, &success);
|
||||||
expect_ptr_null(ptr, "");
|
expect_ptr_null(ptr, "");
|
||||||
expect_false(success, "");
|
expect_false(success, "");
|
||||||
|
|
||||||
@ -156,18 +156,18 @@ TEST_BEGIN(test_cache_bin) {
|
|||||||
* Size is bigger than low water -- the reduced version should
|
* Size is bigger than low water -- the reduced version should
|
||||||
* succeed.
|
* succeed.
|
||||||
*/
|
*/
|
||||||
ptr = cache_bin_alloc_easy_reduced(&bin, &success);
|
ptr = cache_bin_alloc_easy(&bin, &success);
|
||||||
expect_true(success, "");
|
expect_true(success, "");
|
||||||
expect_ptr_eq(ptr, &ptrs[i], "");
|
expect_ptr_eq(ptr, &ptrs[i], "");
|
||||||
}
|
}
|
||||||
/* But now, we've hit low-water. */
|
/* But now, we've hit low-water. */
|
||||||
ptr = cache_bin_alloc_easy_reduced(&bin, &success);
|
ptr = cache_bin_alloc_easy(&bin, &success);
|
||||||
expect_false(success, "");
|
expect_false(success, "");
|
||||||
expect_ptr_null(ptr, "");
|
expect_ptr_null(ptr, "");
|
||||||
|
|
||||||
/* We're going to test filling -- we must be empty to start. */
|
/* We're going to test filling -- we must be empty to start. */
|
||||||
while (cache_bin_ncached_get(&bin, &info)) {
|
while (cache_bin_ncached_get(&bin, &info)) {
|
||||||
cache_bin_alloc_easy(&bin, &info, &success);
|
cache_bin_alloc(&bin, &success);
|
||||||
expect_true(success, "");
|
expect_true(success, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user