From 21a68e2d22da08e0f60ff79d6866dd3add19775b Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Thu, 9 Mar 2017 14:49:32 -0800 Subject: [PATCH] Convert rtree code to use C11 atomics In the process, I changed the implementation of rtree_elm_acquire so that it won't even try to CAS if its initial read (getting the extent + lock bit) indicates that the CAS is doomed to fail. This can significantly improve performance under contention. --- include/jemalloc/internal/rtree_inlines.h | 36 ++++++++++------ include/jemalloc/internal/rtree_structs.h | 15 +++---- src/rtree.c | 50 +++++++++++++++-------- 3 files changed, 62 insertions(+), 39 deletions(-) diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h index f2efd710..b3301095 100644 --- a/include/jemalloc/internal/rtree_inlines.h +++ b/include/jemalloc/internal/rtree_inlines.h @@ -55,14 +55,16 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) { * synchronization, because the rtree update became visible in * memory before the pointer came into existence. */ - extent = elm->extent; + extent = (extent_t *)atomic_load_p(&elm->child_or_extent, + ATOMIC_RELAXED); } else { /* * An arbitrary read, e.g. on behalf of ivsalloc(), may not be * dependent on a previous rtree write, which means a stale read * could result if synchronization were omitted here. */ - extent = (extent_t *)atomic_read_p(&elm->pun); + extent = (extent_t *)atomic_load_p(&elm->child_or_extent, + ATOMIC_ACQUIRE); } /* Mask the lock bit. */ @@ -73,7 +75,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) { JEMALLOC_INLINE void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) { - atomic_write_p(&elm->pun, extent); + atomic_store_p(&elm->child_or_extent, (void *)extent, ATOMIC_RELEASE); } JEMALLOC_ALWAYS_INLINE rtree_elm_t * @@ -161,11 +163,18 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, spin_t spinner = SPIN_INITIALIZER; while (true) { - extent_t *extent = rtree_elm_read(elm, false); /* The least significant bit serves as a lock. */ - void *s = (void *)((uintptr_t)extent | (uintptr_t)0x1); - if (!atomic_cas_p(&elm->pun, (void *)extent, s)) { - break; + void *extent_and_lock = atomic_load_p(&elm->child_or_extent, + ATOMIC_RELAXED); + if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0)) + { + void *locked = (void *)((uintptr_t)extent_and_lock + | (uintptr_t)0x1); + if (likely(atomic_compare_exchange_strong_p( + &elm->child_or_extent, &extent_and_lock, locked, + ATOMIC_ACQUIRE, ATOMIC_RELAXED))) { + break; + } } spin_adaptive(&spinner); } @@ -180,9 +189,9 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, JEMALLOC_INLINE extent_t * rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) { extent_t *extent; - - assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); - extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1)); + void *ptr = atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED); + assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1); + extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1)); assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); if (config_debug) { @@ -196,13 +205,14 @@ JEMALLOC_INLINE void rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm, const extent_t *extent) { assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0); - assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1); + assert(((uintptr_t)atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED) + & (uintptr_t)0x1) == (uintptr_t)0x1); if (config_debug) { rtree_elm_witness_access(tsdn, rtree, elm); } - - elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1); + atomic_store_p(&elm->child_or_extent, (void *)((uintptr_t)extent + | (uintptr_t)0x1), ATOMIC_RELEASE); assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent); } diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h index 312171e3..b62c489d 100644 --- a/include/jemalloc/internal/rtree_structs.h +++ b/include/jemalloc/internal/rtree_structs.h @@ -2,11 +2,8 @@ #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H struct rtree_elm_s { - union { - void *pun; - rtree_elm_t *child; - extent_t *extent; - }; + /* Either "rtree_elm_t *child;" or "extent_t *extent;". */ + atomic_p_t child_or_extent; }; struct rtree_elm_witness_s { @@ -41,11 +38,9 @@ struct rtree_ctx_s { }; struct rtree_s { - union { - void *root_pun; - rtree_elm_t *root; - }; - malloc_mutex_t init_lock; + /* An rtree_elm_t *. */ + atomic_p_t root; + malloc_mutex_t init_lock; }; #endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */ diff --git a/src/rtree.c b/src/rtree.c index a86fa45d..54dc3487 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -7,7 +7,7 @@ */ bool rtree_new(rtree_t *rtree) { - rtree->root_pun = NULL; + atomic_store_p(&rtree->root, NULL, ATOMIC_RELAXED); if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) { return true; } @@ -54,7 +54,8 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node, nchildren = ZU(1) << rtree_levels[level].bits; for (i = 0; i < nchildren; i++) { - rtree_elm_t *child = node[i].child; + rtree_elm_t *child = (rtree_elm_t *)atomic_load_p( + &node[i].child_or_extent, ATOMIC_RELAXED); if (child != NULL) { rtree_delete_subtree(tsdn, rtree, child, level + 1); @@ -66,19 +67,25 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node, void rtree_delete(tsdn_t *tsdn, rtree_t *rtree) { - if (rtree->root_pun != NULL) { - rtree_delete_subtree(tsdn, rtree, rtree->root, 0); + rtree_elm_t *rtree_root = (rtree_elm_t *)atomic_load_p(&rtree->root, + ATOMIC_RELAXED); + if (rtree_root != NULL) { + rtree_delete_subtree(tsdn, rtree, rtree_root, 0); } } #endif static rtree_elm_t * rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level, - rtree_elm_t **elmp) { + atomic_p_t *elmp) { rtree_elm_t *node; malloc_mutex_lock(tsdn, &rtree->init_lock); - node = atomic_read_p((void**)elmp); + /* + * If *elmp is non-null, then it was initialized with the init lock + * held, so we can get by with 'relaxed' here. + */ + node = atomic_load_p(elmp, ATOMIC_RELAXED); if (node == NULL) { node = rtree_node_alloc(tsdn, rtree, ZU(1) << rtree_levels[level].bits); @@ -86,7 +93,11 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level, malloc_mutex_unlock(tsdn, &rtree->init_lock); return NULL; } - atomic_write_p((void **)elmp, node); + /* + * Even though we hold the lock, a later reader might not; we + * need release semantics. + */ + atomic_store_p(elmp, node, ATOMIC_RELEASE); } malloc_mutex_unlock(tsdn, &rtree->init_lock); @@ -102,11 +113,14 @@ static rtree_elm_t * rtree_child_tryread(rtree_elm_t *elm, bool dependent) { rtree_elm_t *child; - /* Double-checked read (first read may be stale). */ - child = elm->child; - if (!dependent && !rtree_node_valid(child)) { - child = (rtree_elm_t *)atomic_read_p(&elm->pun); + if (dependent) { + child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent, + ATOMIC_RELAXED); + } else { + child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent, + ATOMIC_ACQUIRE); } + assert(!dependent || child != NULL); return child; } @@ -118,7 +132,8 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level, child = rtree_child_tryread(elm, dependent); if (!dependent && unlikely(!rtree_node_valid(child))) { - child = rtree_node_init(tsdn, rtree, level+1, &elm->child); + child = rtree_node_init(tsdn, rtree, level + 1, + &elm->child_or_extent); } assert(!dependent || child != NULL); return child; @@ -126,10 +141,13 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level, static rtree_elm_t * rtree_subtree_tryread(rtree_t *rtree, bool dependent) { - /* Double-checked read (first read may be stale). */ - rtree_elm_t *subtree = rtree->root; - if (!dependent && unlikely(!rtree_node_valid(subtree))) { - subtree = (rtree_elm_t *)atomic_read_p(&rtree->root_pun); + rtree_elm_t *subtree; + if (dependent) { + subtree = (rtree_elm_t *)atomic_load_p(&rtree->root, + ATOMIC_RELAXED); + } else { + subtree = (rtree_elm_t *)atomic_load_p(&rtree->root, + ATOMIC_ACQUIRE); } assert(!dependent || subtree != NULL); return subtree;