Add rtree lookup path caching.

rtree-based extent lookups remain more expensive than chunk-based run
lookups, but with this optimization the fast path slowdown is ~3 CPU
cycles per metadata lookup (on Intel Core i7-4980HQ), versus ~11 cycles
prior.  The path caching speedup tends to degrade gracefully unless
allocated memory is spread far apart (as is the case when using a
mixture of sbrk() and mmap()).
This commit is contained in:
Jason Evans
2016-06-02 18:43:10 -07:00
parent 7be2ebc23f
commit 6f29a83924
7 changed files with 267 additions and 94 deletions

View File

@@ -259,18 +259,19 @@ extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
}
static bool
extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
const extent_t *extent, bool dependent, bool init_missing,
rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
{
*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree,
*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_base_get(extent), dependent, init_missing);
if (!dependent && *r_elm_a == NULL)
return (true);
assert(*r_elm_a != NULL);
if (extent_size_get(extent) > PAGE) {
*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree,
*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_last_get(extent), dependent,
init_missing);
if (!dependent && *r_elm_b == NULL)
@@ -302,14 +303,15 @@ extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
}
static void
extent_interior_register(tsdn_t *tsdn, const extent_t *extent)
extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
const extent_t *extent)
{
size_t i;
assert(extent_slab_get(extent));
for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
rtree_write(tsdn, &extents_rtree,
rtree_write(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
LG_PAGE), extent);
}
@@ -318,13 +320,16 @@ extent_interior_register(tsdn_t *tsdn, const extent_t *extent)
static bool
extent_register(tsdn_t *tsdn, const extent_t *extent)
{
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
rtree_elm_t *elm_a, *elm_b;
if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
&elm_b))
return (true);
extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
if (extent_slab_get(extent))
extent_interior_register(tsdn, extent);
extent_interior_register(tsdn, rtree_ctx, extent);
extent_rtree_release(tsdn, elm_a, elm_b);
if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -347,14 +352,15 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
}
static void
extent_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
const extent_t *extent)
{
size_t i;
assert(extent_slab_get(extent));
for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
rtree_clear(tsdn, &extents_rtree,
rtree_clear(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
LG_PAGE));
}
@@ -363,12 +369,15 @@ extent_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
static void
extent_deregister(tsdn_t *tsdn, const extent_t *extent)
{
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
rtree_elm_t *elm_a, *elm_b;
extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
&elm_b);
extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
if (extent_slab_get(extent))
extent_interior_deregister(tsdn, extent);
extent_interior_deregister(tsdn, rtree_ctx, extent);
extent_rtree_release(tsdn, elm_a, elm_b);
if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -422,6 +431,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
bool slab)
{
extent_t *extent;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
size_t size, alloc_size, leadsize, trailsize;
assert(new_addr == NULL || !slab);
@@ -437,7 +448,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
if (new_addr != NULL) {
rtree_elm_t *elm;
elm = rtree_elm_acquire(tsdn, &extents_rtree,
elm = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)new_addr, false, false);
if (elm != NULL) {
extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
@@ -515,7 +526,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
extent_active_set(extent, true);
if (slab) {
extent_slab_set(extent, slab);
extent_interior_register(tsdn, extent);
extent_interior_register(tsdn, rtree_ctx, extent);
}
malloc_mutex_unlock(tsdn, &arena->extents_mtx);
@@ -731,6 +742,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
{
extent_t *prev, *next;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
assert(!cache || !extent_zeroed_get(extent));
@@ -741,7 +754,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
extent_active_set(extent, false);
extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
if (extent_slab_get(extent)) {
extent_interior_deregister(tsdn, extent);
extent_interior_deregister(tsdn, rtree_ctx, extent);
extent_slab_set(extent, false);
}
@@ -750,7 +763,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
arena_extent_cache_maybe_insert(arena, extent, cache);
/* Try to coalesce forward. */
next = rtree_read(tsdn, &extents_rtree,
next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_past_get(extent), false);
if (next != NULL) {
extent_try_coalesce(tsdn, arena, extent_hooks, extent, next,
@@ -758,7 +771,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
}
/* Try to coalesce backward. */
prev = rtree_read(tsdn, &extents_rtree,
prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)extent_before_get(extent), false);
if (prev != NULL) {
extent_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
@@ -910,6 +923,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
size_t usize_b)
{
extent_t *trail;
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
assert(extent_size_get(extent) == size_a + size_b);
@@ -928,8 +943,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
extent_zeroed_get(extent), extent_committed_get(extent),
extent_slab_get(extent));
if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
&lead_elm_b))
if (extent_rtree_acquire(tsdn, rtree_ctx, &lead, false, true,
&lead_elm_a, &lead_elm_b))
goto label_error_b;
}
@@ -937,8 +952,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
size_a), size_b, usize_b, extent_active_get(extent),
extent_zeroed_get(extent), extent_committed_get(extent),
extent_slab_get(extent));
if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
&trail_elm_b))
if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
&trail_elm_a, &trail_elm_b))
goto label_error_c;
if (extent_hooks->split(extent_base_get(extent), size_a + size_b,
@@ -985,6 +1000,8 @@ bool
extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
extent_t *a, extent_t *b)
{
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
@@ -998,8 +1015,10 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
* owned, so the following code uses decomposed helper functions rather
* than extent_{,de}register() to do things in the right order.
*/
extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
extent_rtree_acquire(tsdn, rtree_ctx, a, true, false, &a_elm_a,
&a_elm_b);
extent_rtree_acquire(tsdn, rtree_ctx, b, true, false, &b_elm_a,
&b_elm_b);
if (a_elm_b != NULL) {
rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);

View File

@@ -52,11 +52,12 @@ rtree_new(rtree_t *rtree, unsigned bits)
rtree->levels[height-1].cumbits = bits;
}
/* Compute lookup table to be used by rtree_start_level(). */
/* Compute lookup table to be used by rtree_[ctx_]start_level(). */
for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height -
1);
}
rtree->start_level[RTREE_HEIGHT_MAX] = 0;
return (false);
}