Fix long spinning in rtree_node_init

rtree_node_init spinlocks the node, allocates, and then sets the node.
This is under heavy contention at the top of the tree if many threads
start to allocate at the same time.

Instead, take a per-rtree sleeping mutex to reduce spinning.  Tested
both pthreads and osx OSSpinLock, and both reduce spinning adequately

Previous benchmark time:
./ttest1 500 100
~15s

New benchmark time:
./ttest1 500 100
.57s
This commit is contained in:
Dave Watson 2016-10-31 16:23:33 -07:00 committed by Jason Evans
parent 712fde79fd
commit 25f7bbcf28
4 changed files with 15 additions and 19 deletions

View File

@ -23,9 +23,6 @@ typedef struct rtree_s rtree_t;
#define RTREE_HEIGHT_MAX \ #define RTREE_HEIGHT_MAX \
((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
/* Used for two-stage lock-free node initialization. */
#define RTREE_NODE_INITIALIZING ((rtree_elm_t *)0x1)
#define RTREE_CTX_INITIALIZER { \ #define RTREE_CTX_INITIALIZER { \
false, \ false, \
0, \ 0, \
@ -139,6 +136,7 @@ struct rtree_s {
*/ */
unsigned start_level[RTREE_HEIGHT_MAX + 1]; unsigned start_level[RTREE_HEIGHT_MAX + 1];
rtree_level_t levels[RTREE_HEIGHT_MAX]; rtree_level_t levels[RTREE_HEIGHT_MAX];
malloc_mutex_t init_lock;
}; };
#endif /* JEMALLOC_H_STRUCTS */ #endif /* JEMALLOC_H_STRUCTS */
@ -251,7 +249,7 @@ JEMALLOC_ALWAYS_INLINE bool
rtree_node_valid(rtree_elm_t *node) rtree_node_valid(rtree_elm_t *node)
{ {
return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); return ((uintptr_t)node != (uintptr_t)0);
} }
JEMALLOC_ALWAYS_INLINE rtree_elm_t * JEMALLOC_ALWAYS_INLINE rtree_elm_t *

View File

@ -28,7 +28,8 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
#define WITNESS_RANK_ARENA_EXTENT_CACHE 10 #define WITNESS_RANK_ARENA_EXTENT_CACHE 10
#define WITNESS_RANK_RTREE_ELM 11U #define WITNESS_RANK_RTREE_ELM 11U
#define WITNESS_RANK_BASE 12U #define WITNESS_RANK_RTREE 12U
#define WITNESS_RANK_BASE 13U
#define WITNESS_RANK_LEAF 0xffffffffU #define WITNESS_RANK_LEAF 0xffffffffU
#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF #define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF

View File

@ -59,6 +59,8 @@ rtree_new(rtree_t *rtree, unsigned bits)
} }
rtree->start_level[RTREE_HEIGHT_MAX] = 0; rtree->start_level[RTREE_HEIGHT_MAX] = 0;
malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
return (false); return (false);
} }
@ -135,25 +137,18 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
{ {
rtree_elm_t *node; rtree_elm_t *node;
if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) { malloc_mutex_lock(tsdn, &rtree->init_lock);
spin_t spinner; node = atomic_read_p((void**)elmp);
if (node == NULL) {
/*
* Another thread is already in the process of initializing.
* Spin-wait until initialization is complete.
*/
spin_init(&spinner);
do {
spin_adaptive(&spinner);
node = atomic_read_p((void **)elmp);
} while (node == RTREE_NODE_INITIALIZING);
} else {
node = rtree_node_alloc(tsdn, rtree, ZU(1) << node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
rtree->levels[level].bits); rtree->levels[level].bits);
if (node == NULL) if (node == NULL) {
malloc_mutex_unlock(tsdn, &rtree->init_lock);
return (NULL); return (NULL);
}
atomic_write_p((void **)elmp, node); atomic_write_p((void **)elmp, node);
} }
malloc_mutex_unlock(tsdn, &rtree->init_lock);
return (node); return (node);
} }

View File

@ -13,8 +13,10 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
if (rtree != test_rtree) if (rtree != test_rtree)
return rtree_node_alloc_orig(tsdn, rtree, nelms); return rtree_node_alloc_orig(tsdn, rtree, nelms);
malloc_mutex_unlock(tsdn, &rtree->init_lock);
node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t)); node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
assert_ptr_not_null(node, "Unexpected calloc() failure"); assert_ptr_not_null(node, "Unexpected calloc() failure");
malloc_mutex_lock(tsdn, &rtree->init_lock);
return (node); return (node);
} }