Optimize rtree_get().

Specialize fast path to avoid code that cannot execute for dependent
loads.

Manually unroll.
This commit is contained in:
Jason Evans 2016-03-22 17:54:35 -07:00
parent 18903c592f
commit 6c460ad91b
3 changed files with 134 additions and 35 deletions

View File

@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
*/ */
#define LG_RTREE_BITS_PER_LEVEL 4 #define LG_RTREE_BITS_PER_LEVEL 4
#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) #define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
#define RTREE_HEIGHT_MAX \ /*
((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
* conditionals. The following defininitions are precomputed equivalents to:
*
* #define RTREE_HEIGHT_MAX \
* ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
*/
#if LG_RTREE_BITS_PER_LEVEL == 2
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 16
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 8
# endif
#elif LG_RTREE_BITS_PER_LEVEL == 3
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 8
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 4
# endif
#elif LG_RTREE_BITS_PER_LEVEL == 4
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 4
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 2
# endif
#else
# error Unsupported LG_RTREE_BITS_PER_LEVEL
#endif
/* Used for two-stage lock-free node initialization. */ /* Used for two-stage lock-free node initialization. */
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) #define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
@ -111,15 +137,18 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
bool rtree_node_valid(rtree_node_elm_t *node); bool rtree_node_valid(rtree_node_elm_t *node);
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm,
bool dependent);
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
unsigned level); unsigned level, bool dependent);
extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
bool dependent); bool dependent);
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
const extent_node_t *val); const extent_node_t *val);
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); bool dependent);
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
bool dependent);
extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
} }
JEMALLOC_INLINE rtree_node_elm_t * JEMALLOC_INLINE rtree_node_elm_t *
rtree_child_tryread(rtree_node_elm_t *elm) rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
{ {
rtree_node_elm_t *child; rtree_node_elm_t *child;
/* Double-checked read (first read may be stale. */ /* Double-checked read (first read may be stale. */
child = elm->child; child = elm->child;
if (!rtree_node_valid(child)) if (!dependent && !rtree_node_valid(child))
child = atomic_read_p(&elm->pun); child = atomic_read_p(&elm->pun);
assert(!dependent || child != NULL);
return (child); return (child);
} }
JEMALLOC_INLINE rtree_node_elm_t * JEMALLOC_INLINE rtree_node_elm_t *
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
bool dependent)
{ {
rtree_node_elm_t *child; rtree_node_elm_t *child;
child = rtree_child_tryread(elm); child = rtree_child_tryread(elm, dependent);
if (unlikely(!rtree_node_valid(child))) if (!dependent && unlikely(!rtree_node_valid(child)))
child = rtree_child_read_hard(rtree, elm, level); child = rtree_child_read_hard(rtree, elm, level);
assert(!dependent || child != NULL);
return (child); return (child);
} }
@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
} }
JEMALLOC_INLINE rtree_node_elm_t * JEMALLOC_INLINE rtree_node_elm_t *
rtree_subtree_tryread(rtree_t *rtree, unsigned level) rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
{ {
rtree_node_elm_t *subtree; rtree_node_elm_t *subtree;
/* Double-checked read (first read may be stale. */ /* Double-checked read (first read may be stale. */
subtree = rtree->levels[level].subtree; subtree = rtree->levels[level].subtree;
if (!rtree_node_valid(subtree)) if (!dependent && unlikely(!rtree_node_valid(subtree)))
subtree = atomic_read_p(&rtree->levels[level].subtree_pun); subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
assert(!dependent || subtree != NULL);
return (subtree); return (subtree);
} }
JEMALLOC_INLINE rtree_node_elm_t * JEMALLOC_INLINE rtree_node_elm_t *
rtree_subtree_read(rtree_t *rtree, unsigned level) rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
{ {
rtree_node_elm_t *subtree; rtree_node_elm_t *subtree;
subtree = rtree_subtree_tryread(rtree, level); subtree = rtree_subtree_tryread(rtree, level, dependent);
if (unlikely(!rtree_node_valid(subtree))) if (!dependent && unlikely(!rtree_node_valid(subtree)))
subtree = rtree_subtree_read_hard(rtree, level); subtree = rtree_subtree_read_hard(rtree, level);
assert(!dependent || subtree != NULL);
return (subtree); return (subtree);
} }
@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t *
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
{ {
uintptr_t subkey; uintptr_t subkey;
unsigned i, start_level; unsigned start_level;
rtree_node_elm_t *node, *child; rtree_node_elm_t *node;
start_level = rtree_start_level(rtree, key); start_level = rtree_start_level(rtree, key);
for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); node = rtree_subtree_tryread(rtree, start_level, dependent);
/**/; i++, node = child) { #define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
if (!dependent && unlikely(!rtree_node_valid(node))) switch (start_level + RTREE_GET_BIAS) {
return (NULL); #define RTREE_GET_SUBTREE(level) \
subkey = rtree_subkey(rtree, key, i); case level: \
if (i == rtree->height - 1) { assert(level < (RTREE_HEIGHT_MAX-1)); \
/* if (!dependent && unlikely(!rtree_node_valid(node))) \
* node is a leaf, so it contains values rather than return (NULL); \
* child pointers. subkey = rtree_subkey(rtree, key, level - \
*/ RTREE_GET_BIAS); \
return (rtree_val_read(rtree, &node[subkey], node = rtree_child_tryread(&node[subkey], dependent); \
dependent)); /* Fall through. */
} #define RTREE_GET_LEAF(level) \
assert(i < rtree->height - 1); case level: \
child = rtree_child_tryread(&node[subkey]); assert(level == (RTREE_HEIGHT_MAX-1)); \
if (!dependent && unlikely(!rtree_node_valid(node))) \
return (NULL); \
subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
/* \
* node is a leaf, so it contains values rather than \
* child pointers. \
*/ \
return (rtree_val_read(rtree, &node[subkey], \
dependent));
#if RTREE_HEIGHT_MAX > 1
RTREE_GET_SUBTREE(0)
#endif
#if RTREE_HEIGHT_MAX > 2
RTREE_GET_SUBTREE(1)
#endif
#if RTREE_HEIGHT_MAX > 3
RTREE_GET_SUBTREE(2)
#endif
#if RTREE_HEIGHT_MAX > 4
RTREE_GET_SUBTREE(3)
#endif
#if RTREE_HEIGHT_MAX > 5
RTREE_GET_SUBTREE(4)
#endif
#if RTREE_HEIGHT_MAX > 6
RTREE_GET_SUBTREE(5)
#endif
#if RTREE_HEIGHT_MAX > 7
RTREE_GET_SUBTREE(6)
#endif
#if RTREE_HEIGHT_MAX > 8
RTREE_GET_SUBTREE(7)
#endif
#if RTREE_HEIGHT_MAX > 9
RTREE_GET_SUBTREE(8)
#endif
#if RTREE_HEIGHT_MAX > 10
RTREE_GET_SUBTREE(9)
#endif
#if RTREE_HEIGHT_MAX > 11
RTREE_GET_SUBTREE(10)
#endif
#if RTREE_HEIGHT_MAX > 12
RTREE_GET_SUBTREE(11)
#endif
#if RTREE_HEIGHT_MAX > 13
RTREE_GET_SUBTREE(12)
#endif
#if RTREE_HEIGHT_MAX > 14
RTREE_GET_SUBTREE(13)
#endif
#if RTREE_HEIGHT_MAX > 15
RTREE_GET_SUBTREE(14)
#endif
#if RTREE_HEIGHT_MAX > 16
# error Unsupported RTREE_HEIGHT_MAX
#endif
RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
#undef RTREE_GET_SUBTREE
#undef RTREE_GET_LEAF
default: not_reached();
} }
not_reached(); not_reached();
} }
@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
start_level = rtree_start_level(rtree, key); start_level = rtree_start_level(rtree, key);
node = rtree_subtree_read(rtree, start_level); node = rtree_subtree_read(rtree, start_level, false);
if (node == NULL) if (node == NULL)
return (true); return (true);
for (i = start_level; /**/; i++, node = child) { for (i = start_level; /**/; i++, node = child) {
@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
return (false); return (false);
} }
assert(i + 1 < rtree->height); assert(i + 1 < rtree->height);
child = rtree_child_read(rtree, &node[subkey], i); child = rtree_child_read(rtree, &node[subkey], i, false);
if (child == NULL) if (child == NULL)
return (true); return (true);
} }

View File

@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
{ {
unsigned bits_in_leaf, height, i; unsigned bits_in_leaf, height, i;
assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
RTREE_BITS_PER_LEVEL));
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL

View File

@ -14,6 +14,7 @@
malloc_write("<jemalloc>: Unreachable code reached\n"); \ malloc_write("<jemalloc>: Unreachable code reached\n"); \
abort(); \ abort(); \
} \ } \
unreachable(); \
} while (0) } while (0)
#define not_implemented() do { \ #define not_implemented() do { \