Tcache fluhing: increase cache miss parallelism.
In practice, many rtree_leaf_elm accesses are cache misses. By restructuring, we can make it more likely that these misses occur without blocking us from starting later lookups, taking more of those misses in parallel.
This commit is contained in:
parent
181ba7fd4d
commit
9f9247a62e
@ -226,23 +226,47 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
|
|||||||
*/
|
*/
|
||||||
typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
|
typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
|
||||||
|
|
||||||
|
typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
|
||||||
|
union emap_batch_lookup_result_u {
|
||||||
|
edata_t *edata;
|
||||||
|
rtree_leaf_elm_t *rtree_leaf;
|
||||||
|
};
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE void
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
|
emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
|
||||||
emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
|
emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
|
||||||
emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
|
emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
|
||||||
edata_t **r_edatas) {
|
emap_batch_lookup_result_t *result) {
|
||||||
|
|
||||||
/* Avoids null-checking tsdn in the loop below. */
|
/* Avoids null-checking tsdn in the loop below. */
|
||||||
util_assume(tsd != NULL);
|
util_assume(tsd != NULL);
|
||||||
|
rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get(tsd);
|
||||||
|
|
||||||
for (size_t i = 0; i < nptrs; i++) {
|
for (size_t i = 0; i < nptrs; i++) {
|
||||||
emap_full_alloc_ctx_t full_alloc_ctx;
|
|
||||||
const void *ptr = ptr_getter(ptr_getter_ctx, i);
|
const void *ptr = ptr_getter(ptr_getter_ctx, i);
|
||||||
|
/*
|
||||||
|
* Reuse the edatas array as a temp buffer, lying a little about
|
||||||
|
* the types.
|
||||||
|
*/
|
||||||
|
result[i].rtree_leaf = rtree_leaf_elm_lookup(tsd_tsdn(tsd),
|
||||||
|
&emap->rtree, rtree_ctx, (uintptr_t)ptr,
|
||||||
|
/* dependent */ true, /* init_missing */ false);
|
||||||
|
}
|
||||||
|
|
||||||
emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), emap, ptr,
|
for (size_t i = 0; i < nptrs; i++) {
|
||||||
&full_alloc_ctx);
|
rtree_leaf_elm_t *elm = result[i].rtree_leaf;
|
||||||
r_edatas[i] = full_alloc_ctx.edata;
|
rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
|
||||||
metadata_visitor(metadata_visitor_ctx, &full_alloc_ctx);
|
&emap->rtree, elm, /* dependent */ true);
|
||||||
|
result[i].edata = contents.edata;
|
||||||
|
emap_full_alloc_ctx_t alloc_ctx;
|
||||||
|
/*
|
||||||
|
* Not all these fields are read in practice by the metadata
|
||||||
|
* visitor. But the compiler can easily optimize away the ones
|
||||||
|
* that aren't, so no sense in being incomplete.
|
||||||
|
*/
|
||||||
|
alloc_ctx.szind = contents.metadata.szind;
|
||||||
|
alloc_ctx.slab = contents.metadata.slab;
|
||||||
|
alloc_ctx.edata = contents.edata;
|
||||||
|
metadata_visitor(metadata_visitor_ctx, &alloc_ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
12
src/tcache.c
12
src/tcache.c
@ -251,7 +251,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
|
tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
|
||||||
szind_t binind, size_t nflush, edata_t **edatas) {
|
szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This gets compiled away when config_opt_safety_checks is false.
|
* This gets compiled away when config_opt_safety_checks is false.
|
||||||
@ -305,7 +305,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
* Variable length array must have > 0 length; the last element is never
|
* Variable length array must have > 0 length; the last element is never
|
||||||
* touched (it's just included to satisfy the no-zero-length rule).
|
* touched (it's just included to satisfy the no-zero-length rule).
|
||||||
*/
|
*/
|
||||||
VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
|
VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
|
||||||
CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
|
CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
|
||||||
|
|
||||||
cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
|
cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
|
||||||
@ -329,7 +329,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
bool merged_stats = false;
|
bool merged_stats = false;
|
||||||
while (nflush > 0) {
|
while (nflush > 0) {
|
||||||
/* Lock the arena, or bin, associated with the first object. */
|
/* Lock the arena, or bin, associated with the first object. */
|
||||||
edata_t *edata = item_edata[0];
|
edata_t *edata = item_edata[0].edata;
|
||||||
unsigned cur_arena_ind = edata_arena_ind_get(edata);
|
unsigned cur_arena_ind = edata_arena_ind_get(edata);
|
||||||
arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
|
arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
|
||||||
|
|
||||||
@ -382,7 +382,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
if (!small) {
|
if (!small) {
|
||||||
for (unsigned i = 0; i < nflush; i++) {
|
for (unsigned i = 0; i < nflush; i++) {
|
||||||
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
||||||
edata = item_edata[i];
|
edata = item_edata[i].edata;
|
||||||
assert(ptr != NULL && edata != NULL);
|
assert(ptr != NULL && edata != NULL);
|
||||||
|
|
||||||
if (tcache_bin_flush_match(edata, cur_arena_ind,
|
if (tcache_bin_flush_match(edata, cur_arena_ind,
|
||||||
@ -400,7 +400,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
unsigned ndeferred = 0;
|
unsigned ndeferred = 0;
|
||||||
for (unsigned i = 0; i < nflush; i++) {
|
for (unsigned i = 0; i < nflush; i++) {
|
||||||
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
||||||
edata = item_edata[i];
|
edata = item_edata[i].edata;
|
||||||
assert(ptr != NULL && edata != NULL);
|
assert(ptr != NULL && edata != NULL);
|
||||||
if (!tcache_bin_flush_match(edata, cur_arena_ind,
|
if (!tcache_bin_flush_match(edata, cur_arena_ind,
|
||||||
cur_binshard, small)) {
|
cur_binshard, small)) {
|
||||||
@ -411,7 +411,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
* it can be handled in a future pass.
|
* it can be handled in a future pass.
|
||||||
*/
|
*/
|
||||||
cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
|
cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
|
||||||
item_edata[ndeferred] = edata;
|
item_edata[ndeferred].edata = edata;
|
||||||
ndeferred++;
|
ndeferred++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user