From 25f7bbcf28f5c83b11149989b3552d87c1f3c5e9 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Mon, 31 Oct 2016 16:23:33 -0700 Subject: [PATCH] Fix long spinning in rtree_node_init rtree_node_init spinlocks the node, allocates, and then sets the node. This is under heavy contention at the top of the tree if many threads start to allocate at the same time. Instead, take a per-rtree sleeping mutex to reduce spinning. Tested both pthreads and osx OSSpinLock, and both reduce spinning adequately Previous benchmark time: ./ttest1 500 100 ~15s New benchmark time: ./ttest1 500 100 .57s --- include/jemalloc/internal/rtree.h | 6 ++---- include/jemalloc/internal/witness.h | 3 ++- src/rtree.c | 23 +++++++++-------------- test/unit/rtree.c | 2 ++ 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index fc88dfec..9c6cc22f 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -23,9 +23,6 @@ typedef struct rtree_s rtree_t; #define RTREE_HEIGHT_MAX \ ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -/* Used for two-stage lock-free node initialization. */ -#define RTREE_NODE_INITIALIZING ((rtree_elm_t *)0x1) - #define RTREE_CTX_INITIALIZER { \ false, \ 0, \ @@ -139,6 +136,7 @@ struct rtree_s { */ unsigned start_level[RTREE_HEIGHT_MAX + 1]; rtree_level_t levels[RTREE_HEIGHT_MAX]; + malloc_mutex_t init_lock; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -251,7 +249,7 @@ JEMALLOC_ALWAYS_INLINE bool rtree_node_valid(rtree_elm_t *node) { - return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); + return ((uintptr_t)node != (uintptr_t)0); } JEMALLOC_ALWAYS_INLINE rtree_elm_t * diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 26024ac2..86ddb64a 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -28,7 +28,8 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *, #define WITNESS_RANK_ARENA_EXTENT_CACHE 10 #define WITNESS_RANK_RTREE_ELM 11U -#define WITNESS_RANK_BASE 12U +#define WITNESS_RANK_RTREE 12U +#define WITNESS_RANK_BASE 13U #define WITNESS_RANK_LEAF 0xffffffffU #define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF diff --git a/src/rtree.c b/src/rtree.c index 0a42a982..b6b9ed76 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -59,6 +59,8 @@ rtree_new(rtree_t *rtree, unsigned bits) } rtree->start_level[RTREE_HEIGHT_MAX] = 0; + malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE); + return (false); } @@ -135,25 +137,18 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level, { rtree_elm_t *node; - if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { - spin_t spinner; - - /* - * Another thread is already in the process of initializing. - * Spin-wait until initialization is complete. - */ - spin_init(&spinner); - do { - spin_adaptive(&spinner); - node = atomic_read_p((void **)elmp); - } while (node == RTREE_NODE_INITIALIZING); - } else { + malloc_mutex_lock(tsdn, &rtree->init_lock); + node = atomic_read_p((void**)elmp); + if (node == NULL) { node = rtree_node_alloc(tsdn, rtree, ZU(1) << rtree->levels[level].bits); - if (node == NULL) + if (node == NULL) { + malloc_mutex_unlock(tsdn, &rtree->init_lock); return (NULL); + } atomic_write_p((void **)elmp, node); } + malloc_mutex_unlock(tsdn, &rtree->init_lock); return (node); } diff --git a/test/unit/rtree.c b/test/unit/rtree.c index a05834fa..03f4e269 100644 --- a/test/unit/rtree.c +++ b/test/unit/rtree.c @@ -13,8 +13,10 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) if (rtree != test_rtree) return rtree_node_alloc_orig(tsdn, rtree, nelms); + malloc_mutex_unlock(tsdn, &rtree->init_lock); node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t)); assert_ptr_not_null(node, "Unexpected calloc() failure"); + malloc_mutex_lock(tsdn, &rtree->init_lock); return (node); }