Protect the rtree/extent interactions with a mutex pool.
Instead of embedding a lock bit in rtree leaf elements, we associate extents with a small set of mutexes. This gets us two things: - We can use the system mutexes. This (hypothetically) protects us from priority inversion, and lets us stop doing a backoff/sleep loop, instead opting for precise wakeups from the mutex. - Cuts down on the number of mutex acquisitions we have to do (from 4 in the worst case to two). We end up simplifying most of the rtree code (which no longer has to deal with locking or concurrency at all), at the cost of additional complexity in the extent code: since the mutex protecting the rtree leaf elements is determined by reading the extent out of those elements, the initial read is racy, so that we may acquire an out of date mutex. We re-check the extent in the leaf after acquiring the mutex to protect us from this race.
This commit is contained in:
committed by
David Goldblatt
parent
26c792e61a
commit
3f685e8824
@@ -47,21 +47,16 @@ rtree_subkey(uintptr_t key, unsigned level) {
|
||||
# ifdef RTREE_LEAF_COMPACT
|
||||
JEMALLOC_ALWAYS_INLINE uintptr_t
|
||||
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
if (config_debug && acquired) {
|
||||
assert(dependent);
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
bool dependent) {
|
||||
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
|
||||
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
|
||||
/* Restore sign-extended high bits, mask slab and lock bits. */
|
||||
/* Restore sign-extended high bits, mask slab bit. */
|
||||
return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
|
||||
RTREE_NHIB) & ~((uintptr_t)0x3));
|
||||
RTREE_NHIB) & ~((uintptr_t)0x1));
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
@@ -71,51 +66,29 @@ rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
|
||||
return (bool)((bits >> 1) & (uintptr_t)0x1);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_leaf_elm_bits_locked_get(uintptr_t bits) {
|
||||
return (bool)(bits & (uintptr_t)0x1);
|
||||
}
|
||||
|
||||
# endif
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
if (config_debug && acquired) {
|
||||
assert(dependent);
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
bool dependent) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
|
||||
return rtree_leaf_elm_bits_extent_get(bits);
|
||||
#else
|
||||
extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
|
||||
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
|
||||
assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
|
||||
(uintptr_t)0x1);
|
||||
/* Mask lock bit. */
|
||||
extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
|
||||
return extent;
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
if (config_debug && acquired) {
|
||||
assert(dependent);
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
bool dependent) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
|
||||
return rtree_leaf_elm_bits_szind_get(bits);
|
||||
#else
|
||||
return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
|
||||
@@ -125,16 +98,9 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
if (config_debug && acquired) {
|
||||
assert(dependent);
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
bool dependent) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
|
||||
return rtree_leaf_elm_bits_slab_get(bits);
|
||||
#else
|
||||
return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
|
||||
@@ -143,46 +109,31 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
}
|
||||
|
||||
static inline void
|
||||
rtree_leaf_elm_extent_lock_write(tsdn_t *tsdn, rtree_t *rtree,
|
||||
rtree_leaf_elm_t *elm, bool acquired, extent_t *extent, bool lock) {
|
||||
if (config_debug && acquired) {
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||
|
||||
rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
extent_t *extent) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
|
||||
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
|
||||
LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
|
||||
| ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
|
||||
(uintptr_t)lock;
|
||||
| ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
if (lock) {
|
||||
/* Overlay lock bit. */
|
||||
extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
|
||||
}
|
||||
atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, szind_t szind) {
|
||||
if (config_debug && acquired) {
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
szind_t szind) {
|
||||
assert(szind <= NSIZES);
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
true);
|
||||
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
|
||||
((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
|
||||
(((uintptr_t)0x1 << LG_VADDR) - 1)) |
|
||||
((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
|
||||
(uintptr_t)acquired;
|
||||
((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
|
||||
@@ -191,18 +142,13 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
|
||||
static inline void
|
||||
rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool slab) {
|
||||
if (config_debug && acquired) {
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
bool slab) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
true);
|
||||
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
|
||||
LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
|
||||
(((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab << 1) |
|
||||
(uintptr_t)acquired;
|
||||
(((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
|
||||
@@ -211,27 +157,20 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
|
||||
static inline void
|
||||
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, extent_t *extent, szind_t szind, bool slab) {
|
||||
if (config_debug && acquired) {
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
assert(!slab || szind < NBINS);
|
||||
|
||||
extent_t *extent, szind_t szind, bool slab) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
|
||||
((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
|
||||
((uintptr_t)slab << 1) |
|
||||
(uintptr_t)acquired;
|
||||
((uintptr_t)slab);
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
|
||||
rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
|
||||
rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
|
||||
rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
|
||||
/*
|
||||
* Write extent last, since the element is atomically considered valid
|
||||
* as soon as the extent field is non-NULL.
|
||||
*/
|
||||
rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, acquired, extent,
|
||||
acquired);
|
||||
rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -244,32 +183,8 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
|
||||
* The caller implicitly assures that it is the only writer to the szind
|
||||
* and slab fields, and that the extent field cannot currently change.
|
||||
*/
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
/*
|
||||
* Another thread may concurrently acquire the elm, which means that
|
||||
* even though the szind and slab fields will not be concurrently
|
||||
* modified by another thread, the fact that the lock is embedded in the
|
||||
* same word requires that a CAS operation be used here.
|
||||
*/
|
||||
spin_t spinner = SPIN_INITIALIZER;
|
||||
while (true) {
|
||||
void *old_bits = (void *)(rtree_leaf_elm_bits_read(tsdn, rtree,
|
||||
elm, false, true) & ~((uintptr_t)0x1)); /* Mask lock bit. */
|
||||
void *bits = (void *)(((uintptr_t)szind << LG_VADDR) |
|
||||
((uintptr_t)rtree_leaf_elm_bits_extent_get(
|
||||
(uintptr_t)old_bits) & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
|
||||
((uintptr_t)slab << 1));
|
||||
if (likely(atomic_compare_exchange_strong_p(&elm->le_bits,
|
||||
&old_bits, bits, ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
|
||||
break;
|
||||
}
|
||||
spin_adaptive(&spinner);
|
||||
}
|
||||
#else
|
||||
/* No need to lock. */
|
||||
rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
|
||||
rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
|
||||
#endif
|
||||
rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
|
||||
rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
|
||||
@@ -343,9 +258,8 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) ==
|
||||
NULL);
|
||||
rtree_leaf_elm_write(tsdn, rtree, elm, false, extent, szind, slab);
|
||||
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
|
||||
rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -370,7 +284,7 @@ rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
if (!dependent && elm == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, dependent);
|
||||
return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
@@ -381,7 +295,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
if (!dependent && elm == NULL) {
|
||||
return NSIZES;
|
||||
}
|
||||
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, false, dependent);
|
||||
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -397,10 +311,8 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
if (!dependent && elm == NULL) {
|
||||
return true;
|
||||
}
|
||||
*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, false,
|
||||
dependent);
|
||||
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
|
||||
dependent);
|
||||
*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
|
||||
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -412,63 +324,11 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
if (!dependent && elm == NULL) {
|
||||
return true;
|
||||
}
|
||||
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
|
||||
dependent);
|
||||
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, false, dependent);
|
||||
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
|
||||
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline rtree_leaf_elm_t *
|
||||
rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
uintptr_t key, bool dependent, bool init_missing) {
|
||||
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
|
||||
key, dependent, init_missing);
|
||||
if (!dependent && elm == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
assert(elm != NULL);
|
||||
|
||||
spin_t spinner = SPIN_INITIALIZER;
|
||||
while (true) {
|
||||
/* The least significant bit serves as a lock. */
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
# define RTREE_FIELD_WITH_LOCK le_bits
|
||||
#else
|
||||
# define RTREE_FIELD_WITH_LOCK le_extent
|
||||
#endif
|
||||
void *bits = atomic_load_p(&elm->RTREE_FIELD_WITH_LOCK,
|
||||
ATOMIC_RELAXED);
|
||||
if (likely(((uintptr_t)bits & (uintptr_t)0x1) == 0)) {
|
||||
void *locked = (void *)((uintptr_t)bits |
|
||||
(uintptr_t)0x1);
|
||||
if (likely(atomic_compare_exchange_strong_p(
|
||||
&elm->RTREE_FIELD_WITH_LOCK, &bits, locked,
|
||||
ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_adaptive(&spinner);
|
||||
#undef RTREE_FIELD_WITH_LOCK
|
||||
}
|
||||
|
||||
if (config_debug) {
|
||||
rtree_leaf_elm_witness_acquire(tsdn, rtree, key, elm);
|
||||
}
|
||||
|
||||
return elm;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
|
||||
extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
|
||||
true);
|
||||
rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, true, extent, false);
|
||||
|
||||
if (config_debug) {
|
||||
rtree_leaf_elm_witness_release(tsdn, rtree, elm);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
uintptr_t key, szind_t szind, bool slab) {
|
||||
@@ -482,9 +342,9 @@ static inline void
|
||||
rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
uintptr_t key) {
|
||||
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
|
||||
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) !=
|
||||
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
|
||||
NULL);
|
||||
rtree_leaf_elm_write(tsdn, rtree, elm, false, NULL, NSIZES, false);
|
||||
rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
|
||||
}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */
|
||||
|
Reference in New Issue
Block a user