Optimize rtree_get().
Specialize fast path to avoid code that cannot execute for dependent loads. Manually unroll.
This commit is contained in:
parent
18903c592f
commit
6c460ad91b
@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
|
|||||||
*/
|
*/
|
||||||
#define LG_RTREE_BITS_PER_LEVEL 4
|
#define LG_RTREE_BITS_PER_LEVEL 4
|
||||||
#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
|
#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
|
||||||
#define RTREE_HEIGHT_MAX \
|
/*
|
||||||
((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
|
* Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
|
||||||
|
* conditionals. The following defininitions are precomputed equivalents to:
|
||||||
|
*
|
||||||
|
* #define RTREE_HEIGHT_MAX \
|
||||||
|
* ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
|
||||||
|
*/
|
||||||
|
#if LG_RTREE_BITS_PER_LEVEL == 2
|
||||||
|
# if LG_SIZEOF_PTR == 3
|
||||||
|
# define RTREE_HEIGHT_MAX 16
|
||||||
|
# elif LG_SIZEOF_PTR == 2
|
||||||
|
# define RTREE_HEIGHT_MAX 8
|
||||||
|
# endif
|
||||||
|
#elif LG_RTREE_BITS_PER_LEVEL == 3
|
||||||
|
# if LG_SIZEOF_PTR == 3
|
||||||
|
# define RTREE_HEIGHT_MAX 8
|
||||||
|
# elif LG_SIZEOF_PTR == 2
|
||||||
|
# define RTREE_HEIGHT_MAX 4
|
||||||
|
# endif
|
||||||
|
#elif LG_RTREE_BITS_PER_LEVEL == 4
|
||||||
|
# if LG_SIZEOF_PTR == 3
|
||||||
|
# define RTREE_HEIGHT_MAX 4
|
||||||
|
# elif LG_SIZEOF_PTR == 2
|
||||||
|
# define RTREE_HEIGHT_MAX 2
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# error Unsupported LG_RTREE_BITS_PER_LEVEL
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Used for two-stage lock-free node initialization. */
|
/* Used for two-stage lock-free node initialization. */
|
||||||
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
|
#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
|
||||||
@ -111,15 +137,18 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
|
|||||||
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
|
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
|
||||||
|
|
||||||
bool rtree_node_valid(rtree_node_elm_t *node);
|
bool rtree_node_valid(rtree_node_elm_t *node);
|
||||||
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
|
rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm,
|
||||||
|
bool dependent);
|
||||||
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||||
unsigned level);
|
unsigned level, bool dependent);
|
||||||
extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||||
bool dependent);
|
bool dependent);
|
||||||
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
|
void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
|
||||||
const extent_node_t *val);
|
const extent_node_t *val);
|
||||||
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
|
rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level,
|
||||||
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
|
bool dependent);
|
||||||
|
rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level,
|
||||||
|
bool dependent);
|
||||||
|
|
||||||
extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
|
extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
|
||||||
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
|
bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
|
||||||
@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE rtree_node_elm_t *
|
JEMALLOC_INLINE rtree_node_elm_t *
|
||||||
rtree_child_tryread(rtree_node_elm_t *elm)
|
rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
|
||||||
{
|
{
|
||||||
rtree_node_elm_t *child;
|
rtree_node_elm_t *child;
|
||||||
|
|
||||||
/* Double-checked read (first read may be stale. */
|
/* Double-checked read (first read may be stale. */
|
||||||
child = elm->child;
|
child = elm->child;
|
||||||
if (!rtree_node_valid(child))
|
if (!dependent && !rtree_node_valid(child))
|
||||||
child = atomic_read_p(&elm->pun);
|
child = atomic_read_p(&elm->pun);
|
||||||
|
assert(!dependent || child != NULL);
|
||||||
return (child);
|
return (child);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE rtree_node_elm_t *
|
JEMALLOC_INLINE rtree_node_elm_t *
|
||||||
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
|
rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
|
||||||
|
bool dependent)
|
||||||
{
|
{
|
||||||
rtree_node_elm_t *child;
|
rtree_node_elm_t *child;
|
||||||
|
|
||||||
child = rtree_child_tryread(elm);
|
child = rtree_child_tryread(elm, dependent);
|
||||||
if (unlikely(!rtree_node_valid(child)))
|
if (!dependent && unlikely(!rtree_node_valid(child)))
|
||||||
child = rtree_child_read_hard(rtree, elm, level);
|
child = rtree_child_read_hard(rtree, elm, level);
|
||||||
|
assert(!dependent || child != NULL);
|
||||||
return (child);
|
return (child);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE rtree_node_elm_t *
|
JEMALLOC_INLINE rtree_node_elm_t *
|
||||||
rtree_subtree_tryread(rtree_t *rtree, unsigned level)
|
rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
|
||||||
{
|
{
|
||||||
rtree_node_elm_t *subtree;
|
rtree_node_elm_t *subtree;
|
||||||
|
|
||||||
/* Double-checked read (first read may be stale. */
|
/* Double-checked read (first read may be stale. */
|
||||||
subtree = rtree->levels[level].subtree;
|
subtree = rtree->levels[level].subtree;
|
||||||
if (!rtree_node_valid(subtree))
|
if (!dependent && unlikely(!rtree_node_valid(subtree)))
|
||||||
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
|
subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
|
||||||
|
assert(!dependent || subtree != NULL);
|
||||||
return (subtree);
|
return (subtree);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE rtree_node_elm_t *
|
JEMALLOC_INLINE rtree_node_elm_t *
|
||||||
rtree_subtree_read(rtree_t *rtree, unsigned level)
|
rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
|
||||||
{
|
{
|
||||||
rtree_node_elm_t *subtree;
|
rtree_node_elm_t *subtree;
|
||||||
|
|
||||||
subtree = rtree_subtree_tryread(rtree, level);
|
subtree = rtree_subtree_tryread(rtree, level, dependent);
|
||||||
if (unlikely(!rtree_node_valid(subtree)))
|
if (!dependent && unlikely(!rtree_node_valid(subtree)))
|
||||||
subtree = rtree_subtree_read_hard(rtree, level);
|
subtree = rtree_subtree_read_hard(rtree, level);
|
||||||
|
assert(!dependent || subtree != NULL);
|
||||||
return (subtree);
|
return (subtree);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t *
|
|||||||
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
|
||||||
{
|
{
|
||||||
uintptr_t subkey;
|
uintptr_t subkey;
|
||||||
unsigned i, start_level;
|
unsigned start_level;
|
||||||
rtree_node_elm_t *node, *child;
|
rtree_node_elm_t *node;
|
||||||
|
|
||||||
start_level = rtree_start_level(rtree, key);
|
start_level = rtree_start_level(rtree, key);
|
||||||
|
|
||||||
for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
|
node = rtree_subtree_tryread(rtree, start_level, dependent);
|
||||||
/**/; i++, node = child) {
|
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
|
||||||
if (!dependent && unlikely(!rtree_node_valid(node)))
|
switch (start_level + RTREE_GET_BIAS) {
|
||||||
return (NULL);
|
#define RTREE_GET_SUBTREE(level) \
|
||||||
subkey = rtree_subkey(rtree, key, i);
|
case level: \
|
||||||
if (i == rtree->height - 1) {
|
assert(level < (RTREE_HEIGHT_MAX-1)); \
|
||||||
/*
|
if (!dependent && unlikely(!rtree_node_valid(node))) \
|
||||||
* node is a leaf, so it contains values rather than
|
return (NULL); \
|
||||||
* child pointers.
|
subkey = rtree_subkey(rtree, key, level - \
|
||||||
*/
|
RTREE_GET_BIAS); \
|
||||||
return (rtree_val_read(rtree, &node[subkey],
|
node = rtree_child_tryread(&node[subkey], dependent); \
|
||||||
dependent));
|
/* Fall through. */
|
||||||
}
|
#define RTREE_GET_LEAF(level) \
|
||||||
assert(i < rtree->height - 1);
|
case level: \
|
||||||
child = rtree_child_tryread(&node[subkey]);
|
assert(level == (RTREE_HEIGHT_MAX-1)); \
|
||||||
|
if (!dependent && unlikely(!rtree_node_valid(node))) \
|
||||||
|
return (NULL); \
|
||||||
|
subkey = rtree_subkey(rtree, key, level - \
|
||||||
|
RTREE_GET_BIAS); \
|
||||||
|
/* \
|
||||||
|
* node is a leaf, so it contains values rather than \
|
||||||
|
* child pointers. \
|
||||||
|
*/ \
|
||||||
|
return (rtree_val_read(rtree, &node[subkey], \
|
||||||
|
dependent));
|
||||||
|
#if RTREE_HEIGHT_MAX > 1
|
||||||
|
RTREE_GET_SUBTREE(0)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 2
|
||||||
|
RTREE_GET_SUBTREE(1)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 3
|
||||||
|
RTREE_GET_SUBTREE(2)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 4
|
||||||
|
RTREE_GET_SUBTREE(3)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 5
|
||||||
|
RTREE_GET_SUBTREE(4)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 6
|
||||||
|
RTREE_GET_SUBTREE(5)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 7
|
||||||
|
RTREE_GET_SUBTREE(6)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 8
|
||||||
|
RTREE_GET_SUBTREE(7)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 9
|
||||||
|
RTREE_GET_SUBTREE(8)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 10
|
||||||
|
RTREE_GET_SUBTREE(9)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 11
|
||||||
|
RTREE_GET_SUBTREE(10)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 12
|
||||||
|
RTREE_GET_SUBTREE(11)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 13
|
||||||
|
RTREE_GET_SUBTREE(12)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 14
|
||||||
|
RTREE_GET_SUBTREE(13)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 15
|
||||||
|
RTREE_GET_SUBTREE(14)
|
||||||
|
#endif
|
||||||
|
#if RTREE_HEIGHT_MAX > 16
|
||||||
|
# error Unsupported RTREE_HEIGHT_MAX
|
||||||
|
#endif
|
||||||
|
RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
|
||||||
|
#undef RTREE_GET_SUBTREE
|
||||||
|
#undef RTREE_GET_LEAF
|
||||||
|
default: not_reached();
|
||||||
}
|
}
|
||||||
not_reached();
|
not_reached();
|
||||||
}
|
}
|
||||||
@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
|
|||||||
|
|
||||||
start_level = rtree_start_level(rtree, key);
|
start_level = rtree_start_level(rtree, key);
|
||||||
|
|
||||||
node = rtree_subtree_read(rtree, start_level);
|
node = rtree_subtree_read(rtree, start_level, false);
|
||||||
if (node == NULL)
|
if (node == NULL)
|
||||||
return (true);
|
return (true);
|
||||||
for (i = start_level; /**/; i++, node = child) {
|
for (i = start_level; /**/; i++, node = child) {
|
||||||
@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
|
|||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
assert(i + 1 < rtree->height);
|
assert(i + 1 < rtree->height);
|
||||||
child = rtree_child_read(rtree, &node[subkey], i);
|
child = rtree_child_read(rtree, &node[subkey], i, false);
|
||||||
if (child == NULL)
|
if (child == NULL)
|
||||||
return (true);
|
return (true);
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
|
|||||||
{
|
{
|
||||||
unsigned bits_in_leaf, height, i;
|
unsigned bits_in_leaf, height, i;
|
||||||
|
|
||||||
|
assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
|
||||||
|
RTREE_BITS_PER_LEVEL));
|
||||||
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
|
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
|
||||||
|
|
||||||
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
|
bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
malloc_write("<jemalloc>: Unreachable code reached\n"); \
|
malloc_write("<jemalloc>: Unreachable code reached\n"); \
|
||||||
abort(); \
|
abort(); \
|
||||||
} \
|
} \
|
||||||
|
unreachable(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define not_implemented() do { \
|
#define not_implemented() do { \
|
||||||
|
Loading…
Reference in New Issue
Block a user