Tcache flush: prefetch edata contents.
This frontloads more of the miss latency. It also moves it to a pathway where we have not yet acquired any locks, so that it should (hopefully) reduce hold times.
This commit is contained in:
parent
9f9247a62e
commit
31a629c3de
@ -69,6 +69,55 @@ util_assume(bool b) {
|
||||
}
|
||||
}
|
||||
|
||||
/* ptr should be valid. */
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
util_prefetch_read(void *ptr) {
|
||||
/*
|
||||
* This should arguably be a config check; but any version of GCC so old
|
||||
* that it doesn't support __builtin_prefetch is also too old to build
|
||||
* jemalloc.
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
if (config_debug) {
|
||||
/* Enforce the "valid ptr" requirement. */
|
||||
*(volatile char *)ptr;
|
||||
}
|
||||
__builtin_prefetch(ptr, /* read or write */ 0, /* locality hint */ 3);
|
||||
#else
|
||||
*(volatile char *)ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
util_prefetch_write(void *ptr) {
|
||||
#ifdef __GNUC__
|
||||
if (config_debug) {
|
||||
*(volatile char *)ptr;
|
||||
}
|
||||
/*
|
||||
* The only difference from the read variant is that this has a 1 as the
|
||||
* second argument (the write hint).
|
||||
*/
|
||||
__builtin_prefetch(ptr, 1, 3);
|
||||
#else
|
||||
*(volatile char *)ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
util_prefetch_read_range(void *ptr, size_t sz) {
|
||||
for (size_t i = 0; i < sz; i += CACHELINE) {
|
||||
util_prefetch_read((void *)((uintptr_t)ptr + i));
|
||||
}
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
util_prefetch_write_range(void *ptr, size_t sz) {
|
||||
for (size_t i = 0; i < sz; i += CACHELINE) {
|
||||
util_prefetch_write((void *)((uintptr_t)ptr + i));
|
||||
}
|
||||
}
|
||||
|
||||
#undef UTIL_INLINE
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_UTIL_H */
|
||||
|
@ -247,6 +247,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
|
||||
emap_full_alloc_ctx_t *alloc_ctx) {
|
||||
size_t *szind_sum = (size_t *)szind_sum_ctx;
|
||||
*szind_sum -= alloc_ctx->szind;
|
||||
util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user