Optimize rtree_get().

Specialize fast path to avoid code that cannot execute for dependent
loads.

Manually unroll.
This commit is contained in:
Jason Evans 2016-03-22 17:54:35 -07:00
parent 18903c592f
commit 6c460ad91b
3 changed files with 134 additions and 35 deletions

View File

@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
*/
#define LG_RTREE_BITS_PER_LEVEL 4
#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
#define RTREE_HEIGHT_MAX \
((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
/*
* Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
* conditionals. The following defininitions are precomputed equivalents to:
*
* #define RTREE_HEIGHT_MAX \
* ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
*/
#if LG_RTREE_BITS_PER_LEVEL == 2
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 16
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 8
# endif
#elif LG_RTREE_BITS_PER_LEVEL == 3
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 8
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 4
# endif
#elif LG_RTREE_BITS_PER_LEVEL == 4
# if LG_SIZEOF_PTR == 3
# define RTREE_HEIGHT_MAX 4
# elif LG_SIZEOF_PTR == 2
# define RTREE_HEIGHT_MAX 2
# endif
#else
# error Unsupported LG_RTREE_BITS_PER_LEVEL
#endif
/* Used for two-stage lock-free node initialization. */
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
@ -111,15 +137,18 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
bool rtree_node_valid(rtree_node_elm_t *node);
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm,
bool dependent);
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
unsigned level);
unsigned level, bool dependent);
extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
bool dependent);
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
const extent_node_t *val);
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
bool dependent);
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
bool dependent);
extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
}
JEMALLOC_INLINE rtree_node_elm_t *
rtree_child_tryread(rtree_node_elm_t *elm)
rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
{
rtree_node_elm_t *child;
/* Double-checked read (first read may be stale. */
child = elm->child;
if (!rtree_node_valid(child))
if (!dependent && !rtree_node_valid(child))
child = atomic_read_p(&elm->pun);
assert(!dependent || child != NULL);
return (child);
}
JEMALLOC_INLINE rtree_node_elm_t *
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
bool dependent)
{
rtree_node_elm_t *child;
child = rtree_child_tryread(elm);
if (unlikely(!rtree_node_valid(child)))
child = rtree_child_tryread(elm, dependent);
if (!dependent && unlikely(!rtree_node_valid(child)))
child = rtree_child_read_hard(rtree, elm, level);
assert(!dependent || child != NULL);
return (child);
}
@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
}
JEMALLOC_INLINE rtree_node_elm_t *
rtree_subtree_tryread(rtree_t *rtree, unsigned level)
rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
{
rtree_node_elm_t *subtree;
/* Double-checked read (first read may be stale. */
subtree = rtree->levels[level].subtree;
if (!rtree_node_valid(subtree))
if (!dependent && unlikely(!rtree_node_valid(subtree)))
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
assert(!dependent || subtree != NULL);
return (subtree);
}
JEMALLOC_INLINE rtree_node_elm_t *
rtree_subtree_read(rtree_t *rtree, unsigned level)
rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
{
rtree_node_elm_t *subtree;
subtree = rtree_subtree_tryread(rtree, level);
if (unlikely(!rtree_node_valid(subtree)))
subtree = rtree_subtree_tryread(rtree, level, dependent);
if (!dependent && unlikely(!rtree_node_valid(subtree)))
subtree = rtree_subtree_read_hard(rtree, level);
assert(!dependent || subtree != NULL);
return (subtree);
}
@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t *
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
{
uintptr_t subkey;
unsigned i, start_level;
rtree_node_elm_t *node, *child;
unsigned start_level;
rtree_node_elm_t *node;
start_level = rtree_start_level(rtree, key);
for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
/**/; i++, node = child) {
if (!dependent && unlikely(!rtree_node_valid(node)))
return (NULL);
subkey = rtree_subkey(rtree, key, i);
if (i == rtree->height - 1) {
/*
* node is a leaf, so it contains values rather than
* child pointers.
*/
return (rtree_val_read(rtree, &node[subkey],
dependent));
}
assert(i < rtree->height - 1);
child = rtree_child_tryread(&node[subkey]);
node = rtree_subtree_tryread(rtree, start_level, dependent);
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
switch (start_level + RTREE_GET_BIAS) {
#define RTREE_GET_SUBTREE(level) \
case level: \
assert(level < (RTREE_HEIGHT_MAX-1)); \
if (!dependent && unlikely(!rtree_node_valid(node))) \
return (NULL); \
subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
node = rtree_child_tryread(&node[subkey], dependent); \
/* Fall through. */
#define RTREE_GET_LEAF(level) \
case level: \
assert(level == (RTREE_HEIGHT_MAX-1)); \
if (!dependent && unlikely(!rtree_node_valid(node))) \
return (NULL); \
subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
/* \
* node is a leaf, so it contains values rather than \
* child pointers. \
*/ \
return (rtree_val_read(rtree, &node[subkey], \
dependent));
#if RTREE_HEIGHT_MAX > 1
RTREE_GET_SUBTREE(0)
#endif
#if RTREE_HEIGHT_MAX > 2
RTREE_GET_SUBTREE(1)
#endif
#if RTREE_HEIGHT_MAX > 3
RTREE_GET_SUBTREE(2)
#endif
#if RTREE_HEIGHT_MAX > 4
RTREE_GET_SUBTREE(3)
#endif
#if RTREE_HEIGHT_MAX > 5
RTREE_GET_SUBTREE(4)
#endif
#if RTREE_HEIGHT_MAX > 6
RTREE_GET_SUBTREE(5)
#endif
#if RTREE_HEIGHT_MAX > 7
RTREE_GET_SUBTREE(6)
#endif
#if RTREE_HEIGHT_MAX > 8
RTREE_GET_SUBTREE(7)
#endif
#if RTREE_HEIGHT_MAX > 9
RTREE_GET_SUBTREE(8)
#endif
#if RTREE_HEIGHT_MAX > 10
RTREE_GET_SUBTREE(9)
#endif
#if RTREE_HEIGHT_MAX > 11
RTREE_GET_SUBTREE(10)
#endif
#if RTREE_HEIGHT_MAX > 12
RTREE_GET_SUBTREE(11)
#endif
#if RTREE_HEIGHT_MAX > 13
RTREE_GET_SUBTREE(12)
#endif
#if RTREE_HEIGHT_MAX > 14
RTREE_GET_SUBTREE(13)
#endif
#if RTREE_HEIGHT_MAX > 15
RTREE_GET_SUBTREE(14)
#endif
#if RTREE_HEIGHT_MAX > 16
# error Unsupported RTREE_HEIGHT_MAX
#endif
RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
#undef RTREE_GET_SUBTREE
#undef RTREE_GET_LEAF
default: not_reached();
}
not_reached();
}
@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
start_level = rtree_start_level(rtree, key);
node = rtree_subtree_read(rtree, start_level);
node = rtree_subtree_read(rtree, start_level, false);
if (node == NULL)
return (true);
for (i = start_level; /**/; i++, node = child) {
@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
return (false);
}
assert(i + 1 < rtree->height);
child = rtree_child_read(rtree, &node[subkey], i);
child = rtree_child_read(rtree, &node[subkey], i, false);
if (child == NULL)
return (true);
}

View File

@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
{
unsigned bits_in_leaf, height, i;
assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
RTREE_BITS_PER_LEVEL));
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL

View File

@ -14,6 +14,7 @@
malloc_write("<jemalloc>: Unreachable code reached\n"); \
abort(); \
} \
unreachable(); \
} while (0)
#define not_implemented() do { \