Protect the rtree/extent interactions with a mutex pool.

Instead of embedding a lock bit in rtree leaf elements, we associate extents
with a small set of mutexes.  This gets us two things:

- We can use the system mutexes.  This (hypothetically) protects us from
  priority inversion, and lets us stop doing a backoff/sleep loop, instead
  opting for precise wakeups from the mutex.
- Cuts down on the number of mutex acquisitions we have to do (from 4 in the
  worst case to two).

We end up simplifying most of the rtree code (which no longer has to deal with
locking or concurrency at all), at the cost of additional complexity in the
extent code: since the mutex protecting the rtree leaf elements is determined by
reading the extent out of those elements, the initial read is racy, so that we
may acquire an out of date mutex.  We re-check the extent in the leaf after
acquiring the mutex to protect us from this race.
This commit is contained in:
David Goldblatt
2017-05-15 14:23:51 -07:00
committed by David Goldblatt
parent 26c792e61a
commit 3f685e8824
18 changed files with 341 additions and 537 deletions

View File

@@ -47,21 +47,16 @@ rtree_subkey(uintptr_t key, unsigned level) {
# ifdef RTREE_LEAF_COMPACT
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, bool dependent) {
if (config_debug && acquired) {
assert(dependent);
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
bool dependent) {
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
}
JEMALLOC_ALWAYS_INLINE extent_t *
rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
/* Restore sign-extended high bits, mask slab and lock bits. */
/* Restore sign-extended high bits, mask slab bit. */
return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
RTREE_NHIB) & ~((uintptr_t)0x3));
RTREE_NHIB) & ~((uintptr_t)0x1));
}
JEMALLOC_ALWAYS_INLINE szind_t
@@ -71,51 +66,29 @@ rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
return (bool)((bits >> 1) & (uintptr_t)0x1);
}
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_bits_locked_get(uintptr_t bits) {
return (bool)(bits & (uintptr_t)0x1);
}
# endif
JEMALLOC_ALWAYS_INLINE extent_t *
rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, bool dependent) {
if (config_debug && acquired) {
assert(dependent);
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
dependent);
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_extent_get(bits);
#else
extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
(uintptr_t)0x1);
/* Mask lock bit. */
extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
return extent;
#endif
}
JEMALLOC_ALWAYS_INLINE szind_t
rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, bool dependent) {
if (config_debug && acquired) {
assert(dependent);
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
dependent);
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_szind_get(bits);
#else
return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
@@ -125,16 +98,9 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, bool dependent) {
if (config_debug && acquired) {
assert(dependent);
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
dependent);
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_slab_get(bits);
#else
return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
@@ -143,46 +109,31 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
static inline void
rtree_leaf_elm_extent_lock_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool acquired, extent_t *extent, bool lock) {
if (config_debug && acquired) {
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
extent_t *extent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
acquired, acquired);
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
| ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
(uintptr_t)lock;
| ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
if (lock) {
/* Overlay lock bit. */
extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
}
atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
#endif
}
static inline void
rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, szind_t szind) {
if (config_debug && acquired) {
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
szind_t szind) {
assert(szind <= NSIZES);
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
acquired, acquired);
true);
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
(((uintptr_t)0x1 << LG_VADDR) - 1)) |
((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
(uintptr_t)acquired;
((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
@@ -191,18 +142,13 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
static inline void
rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, bool slab) {
if (config_debug && acquired) {
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
acquired, acquired);
true);
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
(((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab << 1) |
(uintptr_t)acquired;
(((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
@@ -211,27 +157,20 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
static inline void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool acquired, extent_t *extent, szind_t szind, bool slab) {
if (config_debug && acquired) {
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
}
assert(!slab || szind < NBINS);
extent_t *extent, szind_t szind, bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
((uintptr_t)slab << 1) |
(uintptr_t)acquired;
((uintptr_t)slab);
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
/*
* Write extent last, since the element is atomically considered valid
* as soon as the extent field is non-NULL.
*/
rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, acquired, extent,
acquired);
rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
#endif
}
@@ -244,32 +183,8 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
* The caller implicitly assures that it is the only writer to the szind
* and slab fields, and that the extent field cannot currently change.
*/
#ifdef RTREE_LEAF_COMPACT
/*
* Another thread may concurrently acquire the elm, which means that
* even though the szind and slab fields will not be concurrently
* modified by another thread, the fact that the lock is embedded in the
* same word requires that a CAS operation be used here.
*/
spin_t spinner = SPIN_INITIALIZER;
while (true) {
void *old_bits = (void *)(rtree_leaf_elm_bits_read(tsdn, rtree,
elm, false, true) & ~((uintptr_t)0x1)); /* Mask lock bit. */
void *bits = (void *)(((uintptr_t)szind << LG_VADDR) |
((uintptr_t)rtree_leaf_elm_bits_extent_get(
(uintptr_t)old_bits) & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
((uintptr_t)slab << 1));
if (likely(atomic_compare_exchange_strong_p(&elm->le_bits,
&old_bits, bits, ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
break;
}
spin_adaptive(&spinner);
}
#else
/* No need to lock. */
rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
#endif
rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
}
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -343,9 +258,8 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
return true;
}
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) ==
NULL);
rtree_leaf_elm_write(tsdn, rtree, elm, false, extent, szind, slab);
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
return false;
}
@@ -370,7 +284,7 @@ rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return NULL;
}
return rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, dependent);
return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
}
JEMALLOC_ALWAYS_INLINE szind_t
@@ -381,7 +295,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return NSIZES;
}
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, false, dependent);
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
}
/*
@@ -397,10 +311,8 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return true;
}
*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, false,
dependent);
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
dependent);
*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
return false;
}
@@ -412,63 +324,11 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return true;
}
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
dependent);
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, false, dependent);
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
return false;
}
static inline rtree_leaf_elm_t *
rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, bool dependent, bool init_missing) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, dependent, init_missing);
if (!dependent && elm == NULL) {
return NULL;
}
assert(elm != NULL);
spin_t spinner = SPIN_INITIALIZER;
while (true) {
/* The least significant bit serves as a lock. */
#ifdef RTREE_LEAF_COMPACT
# define RTREE_FIELD_WITH_LOCK le_bits
#else
# define RTREE_FIELD_WITH_LOCK le_extent
#endif
void *bits = atomic_load_p(&elm->RTREE_FIELD_WITH_LOCK,
ATOMIC_RELAXED);
if (likely(((uintptr_t)bits & (uintptr_t)0x1) == 0)) {
void *locked = (void *)((uintptr_t)bits |
(uintptr_t)0x1);
if (likely(atomic_compare_exchange_strong_p(
&elm->RTREE_FIELD_WITH_LOCK, &bits, locked,
ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
break;
}
}
spin_adaptive(&spinner);
#undef RTREE_FIELD_WITH_LOCK
}
if (config_debug) {
rtree_leaf_elm_witness_acquire(tsdn, rtree, key, elm);
}
return elm;
}
static inline void
rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
true);
rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, true, extent, false);
if (config_debug) {
rtree_leaf_elm_witness_release(tsdn, rtree, elm);
}
}
static inline void
rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, szind_t szind, bool slab) {
@@ -482,9 +342,9 @@ static inline void
rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key) {
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) !=
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
NULL);
rtree_leaf_elm_write(tsdn, rtree, elm, false, NULL, NSIZES, false);
rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
}
#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */