Convert rtree code to use C11 atomics
In the process, I changed the implementation of rtree_elm_acquire so that it won't even try to CAS if its initial read (getting the extent + lock bit) indicates that the CAS is doomed to fail. This can significantly improve performance under contention.
This commit is contained in:
parent
3a2b183d5f
commit
21a68e2d22
@ -55,14 +55,16 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
|
|||||||
* synchronization, because the rtree update became visible in
|
* synchronization, because the rtree update became visible in
|
||||||
* memory before the pointer came into existence.
|
* memory before the pointer came into existence.
|
||||||
*/
|
*/
|
||||||
extent = elm->extent;
|
extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
|
||||||
|
ATOMIC_RELAXED);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* An arbitrary read, e.g. on behalf of ivsalloc(), may not be
|
* An arbitrary read, e.g. on behalf of ivsalloc(), may not be
|
||||||
* dependent on a previous rtree write, which means a stale read
|
* dependent on a previous rtree write, which means a stale read
|
||||||
* could result if synchronization were omitted here.
|
* could result if synchronization were omitted here.
|
||||||
*/
|
*/
|
||||||
extent = (extent_t *)atomic_read_p(&elm->pun);
|
extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
|
||||||
|
ATOMIC_ACQUIRE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mask the lock bit. */
|
/* Mask the lock bit. */
|
||||||
@ -73,7 +75,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
|
|||||||
|
|
||||||
JEMALLOC_INLINE void
|
JEMALLOC_INLINE void
|
||||||
rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
|
rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
|
||||||
atomic_write_p(&elm->pun, extent);
|
atomic_store_p(&elm->child_or_extent, (void *)extent, ATOMIC_RELEASE);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||||
@ -161,11 +163,18 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
|||||||
|
|
||||||
spin_t spinner = SPIN_INITIALIZER;
|
spin_t spinner = SPIN_INITIALIZER;
|
||||||
while (true) {
|
while (true) {
|
||||||
extent_t *extent = rtree_elm_read(elm, false);
|
|
||||||
/* The least significant bit serves as a lock. */
|
/* The least significant bit serves as a lock. */
|
||||||
void *s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
void *extent_and_lock = atomic_load_p(&elm->child_or_extent,
|
||||||
if (!atomic_cas_p(&elm->pun, (void *)extent, s)) {
|
ATOMIC_RELAXED);
|
||||||
break;
|
if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
|
||||||
|
{
|
||||||
|
void *locked = (void *)((uintptr_t)extent_and_lock
|
||||||
|
| (uintptr_t)0x1);
|
||||||
|
if (likely(atomic_compare_exchange_strong_p(
|
||||||
|
&elm->child_or_extent, &extent_and_lock, locked,
|
||||||
|
ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
spin_adaptive(&spinner);
|
spin_adaptive(&spinner);
|
||||||
}
|
}
|
||||||
@ -180,9 +189,9 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
|||||||
JEMALLOC_INLINE extent_t *
|
JEMALLOC_INLINE extent_t *
|
||||||
rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
|
rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
|
||||||
extent_t *extent;
|
extent_t *extent;
|
||||||
|
void *ptr = atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED);
|
||||||
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
|
assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1);
|
||||||
extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
|
extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1));
|
||||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||||
|
|
||||||
if (config_debug) {
|
if (config_debug) {
|
||||||
@ -196,13 +205,14 @@ JEMALLOC_INLINE void
|
|||||||
rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
|
rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
|
||||||
const extent_t *extent) {
|
const extent_t *extent) {
|
||||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||||
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
|
assert(((uintptr_t)atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED)
|
||||||
|
& (uintptr_t)0x1) == (uintptr_t)0x1);
|
||||||
|
|
||||||
if (config_debug) {
|
if (config_debug) {
|
||||||
rtree_elm_witness_access(tsdn, rtree, elm);
|
rtree_elm_witness_access(tsdn, rtree, elm);
|
||||||
}
|
}
|
||||||
|
atomic_store_p(&elm->child_or_extent, (void *)((uintptr_t)extent
|
||||||
elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
| (uintptr_t)0x1), ATOMIC_RELEASE);
|
||||||
assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
|
assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,11 +2,8 @@
|
|||||||
#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
|
#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
|
||||||
|
|
||||||
struct rtree_elm_s {
|
struct rtree_elm_s {
|
||||||
union {
|
/* Either "rtree_elm_t *child;" or "extent_t *extent;". */
|
||||||
void *pun;
|
atomic_p_t child_or_extent;
|
||||||
rtree_elm_t *child;
|
|
||||||
extent_t *extent;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rtree_elm_witness_s {
|
struct rtree_elm_witness_s {
|
||||||
@ -41,11 +38,9 @@ struct rtree_ctx_s {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct rtree_s {
|
struct rtree_s {
|
||||||
union {
|
/* An rtree_elm_t *. */
|
||||||
void *root_pun;
|
atomic_p_t root;
|
||||||
rtree_elm_t *root;
|
malloc_mutex_t init_lock;
|
||||||
};
|
|
||||||
malloc_mutex_t init_lock;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
|
#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
|
||||||
|
50
src/rtree.c
50
src/rtree.c
@ -7,7 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
rtree_new(rtree_t *rtree) {
|
rtree_new(rtree_t *rtree) {
|
||||||
rtree->root_pun = NULL;
|
atomic_store_p(&rtree->root, NULL, ATOMIC_RELAXED);
|
||||||
if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
|
if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -54,7 +54,8 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
|
|||||||
|
|
||||||
nchildren = ZU(1) << rtree_levels[level].bits;
|
nchildren = ZU(1) << rtree_levels[level].bits;
|
||||||
for (i = 0; i < nchildren; i++) {
|
for (i = 0; i < nchildren; i++) {
|
||||||
rtree_elm_t *child = node[i].child;
|
rtree_elm_t *child = (rtree_elm_t *)atomic_load_p(
|
||||||
|
&node[i].child_or_extent, ATOMIC_RELAXED);
|
||||||
if (child != NULL) {
|
if (child != NULL) {
|
||||||
rtree_delete_subtree(tsdn, rtree, child, level +
|
rtree_delete_subtree(tsdn, rtree, child, level +
|
||||||
1);
|
1);
|
||||||
@ -66,19 +67,25 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
|
|||||||
|
|
||||||
void
|
void
|
||||||
rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
|
rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
|
||||||
if (rtree->root_pun != NULL) {
|
rtree_elm_t *rtree_root = (rtree_elm_t *)atomic_load_p(&rtree->root,
|
||||||
rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
|
ATOMIC_RELAXED);
|
||||||
|
if (rtree_root != NULL) {
|
||||||
|
rtree_delete_subtree(tsdn, rtree, rtree_root, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static rtree_elm_t *
|
static rtree_elm_t *
|
||||||
rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
|
rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
|
||||||
rtree_elm_t **elmp) {
|
atomic_p_t *elmp) {
|
||||||
rtree_elm_t *node;
|
rtree_elm_t *node;
|
||||||
|
|
||||||
malloc_mutex_lock(tsdn, &rtree->init_lock);
|
malloc_mutex_lock(tsdn, &rtree->init_lock);
|
||||||
node = atomic_read_p((void**)elmp);
|
/*
|
||||||
|
* If *elmp is non-null, then it was initialized with the init lock
|
||||||
|
* held, so we can get by with 'relaxed' here.
|
||||||
|
*/
|
||||||
|
node = atomic_load_p(elmp, ATOMIC_RELAXED);
|
||||||
if (node == NULL) {
|
if (node == NULL) {
|
||||||
node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
|
node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
|
||||||
rtree_levels[level].bits);
|
rtree_levels[level].bits);
|
||||||
@ -86,7 +93,11 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
|
|||||||
malloc_mutex_unlock(tsdn, &rtree->init_lock);
|
malloc_mutex_unlock(tsdn, &rtree->init_lock);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
atomic_write_p((void **)elmp, node);
|
/*
|
||||||
|
* Even though we hold the lock, a later reader might not; we
|
||||||
|
* need release semantics.
|
||||||
|
*/
|
||||||
|
atomic_store_p(elmp, node, ATOMIC_RELEASE);
|
||||||
}
|
}
|
||||||
malloc_mutex_unlock(tsdn, &rtree->init_lock);
|
malloc_mutex_unlock(tsdn, &rtree->init_lock);
|
||||||
|
|
||||||
@ -102,11 +113,14 @@ static rtree_elm_t *
|
|||||||
rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
|
rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
|
||||||
rtree_elm_t *child;
|
rtree_elm_t *child;
|
||||||
|
|
||||||
/* Double-checked read (first read may be stale). */
|
if (dependent) {
|
||||||
child = elm->child;
|
child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
|
||||||
if (!dependent && !rtree_node_valid(child)) {
|
ATOMIC_RELAXED);
|
||||||
child = (rtree_elm_t *)atomic_read_p(&elm->pun);
|
} else {
|
||||||
|
child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
|
||||||
|
ATOMIC_ACQUIRE);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!dependent || child != NULL);
|
assert(!dependent || child != NULL);
|
||||||
return child;
|
return child;
|
||||||
}
|
}
|
||||||
@ -118,7 +132,8 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
|
|||||||
|
|
||||||
child = rtree_child_tryread(elm, dependent);
|
child = rtree_child_tryread(elm, dependent);
|
||||||
if (!dependent && unlikely(!rtree_node_valid(child))) {
|
if (!dependent && unlikely(!rtree_node_valid(child))) {
|
||||||
child = rtree_node_init(tsdn, rtree, level+1, &elm->child);
|
child = rtree_node_init(tsdn, rtree, level + 1,
|
||||||
|
&elm->child_or_extent);
|
||||||
}
|
}
|
||||||
assert(!dependent || child != NULL);
|
assert(!dependent || child != NULL);
|
||||||
return child;
|
return child;
|
||||||
@ -126,10 +141,13 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
|
|||||||
|
|
||||||
static rtree_elm_t *
|
static rtree_elm_t *
|
||||||
rtree_subtree_tryread(rtree_t *rtree, bool dependent) {
|
rtree_subtree_tryread(rtree_t *rtree, bool dependent) {
|
||||||
/* Double-checked read (first read may be stale). */
|
rtree_elm_t *subtree;
|
||||||
rtree_elm_t *subtree = rtree->root;
|
if (dependent) {
|
||||||
if (!dependent && unlikely(!rtree_node_valid(subtree))) {
|
subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
|
||||||
subtree = (rtree_elm_t *)atomic_read_p(&rtree->root_pun);
|
ATOMIC_RELAXED);
|
||||||
|
} else {
|
||||||
|
subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
|
||||||
|
ATOMIC_ACQUIRE);
|
||||||
}
|
}
|
||||||
assert(!dependent || subtree != NULL);
|
assert(!dependent || subtree != NULL);
|
||||||
return subtree;
|
return subtree;
|
||||||
|
Loading…
Reference in New Issue
Block a user