Convert rtree code to use C11 atomics

In the process, I changed the implementation of rtree_elm_acquire so that it
won't even try to CAS if its initial read (getting the extent + lock bit)
indicates that the CAS is doomed to fail.  This can significantly improve
performance under contention.
This commit is contained in:
David Goldblatt
2017-03-09 14:49:32 -08:00
committed by David Goldblatt
parent 3a2b183d5f
commit 21a68e2d22
3 changed files with 62 additions and 39 deletions

View File

@@ -55,14 +55,16 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
* synchronization, because the rtree update became visible in
* memory before the pointer came into existence.
*/
extent = elm->extent;
extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
ATOMIC_RELAXED);
} else {
/*
* An arbitrary read, e.g. on behalf of ivsalloc(), may not be
* dependent on a previous rtree write, which means a stale read
* could result if synchronization were omitted here.
*/
extent = (extent_t *)atomic_read_p(&elm->pun);
extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
ATOMIC_ACQUIRE);
}
/* Mask the lock bit. */
@@ -73,7 +75,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
JEMALLOC_INLINE void
rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
atomic_write_p(&elm->pun, extent);
atomic_store_p(&elm->child_or_extent, (void *)extent, ATOMIC_RELEASE);
}
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
@@ -161,11 +163,18 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
spin_t spinner = SPIN_INITIALIZER;
while (true) {
extent_t *extent = rtree_elm_read(elm, false);
/* The least significant bit serves as a lock. */
void *s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
if (!atomic_cas_p(&elm->pun, (void *)extent, s)) {
break;
void *extent_and_lock = atomic_load_p(&elm->child_or_extent,
ATOMIC_RELAXED);
if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
{
void *locked = (void *)((uintptr_t)extent_and_lock
| (uintptr_t)0x1);
if (likely(atomic_compare_exchange_strong_p(
&elm->child_or_extent, &extent_and_lock, locked,
ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
break;
}
}
spin_adaptive(&spinner);
}
@@ -180,9 +189,9 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
JEMALLOC_INLINE extent_t *
rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
extent_t *extent;
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
void *ptr = atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED);
assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1);
extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1));
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
if (config_debug) {
@@ -196,13 +205,14 @@ JEMALLOC_INLINE void
rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
const extent_t *extent) {
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
assert(((uintptr_t)atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED)
& (uintptr_t)0x1) == (uintptr_t)0x1);
if (config_debug) {
rtree_elm_witness_access(tsdn, rtree, elm);
}
elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
atomic_store_p(&elm->child_or_extent, (void *)((uintptr_t)extent
| (uintptr_t)0x1), ATOMIC_RELEASE);
assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
}

View File

@@ -2,11 +2,8 @@
#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
struct rtree_elm_s {
union {
void *pun;
rtree_elm_t *child;
extent_t *extent;
};
/* Either "rtree_elm_t *child;" or "extent_t *extent;". */
atomic_p_t child_or_extent;
};
struct rtree_elm_witness_s {
@@ -41,11 +38,9 @@ struct rtree_ctx_s {
};
struct rtree_s {
union {
void *root_pun;
rtree_elm_t *root;
};
malloc_mutex_t init_lock;
/* An rtree_elm_t *. */
atomic_p_t root;
malloc_mutex_t init_lock;
};
#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */