From 6c460ad91bf349ebac3b23e58d97769a982110fe Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Mar 2016 17:54:35 -0700 Subject: [PATCH] Optimize rtree_get(). Specialize fast path to avoid code that cannot execute for dependent loads. Manually unroll. --- include/jemalloc/internal/rtree.h | 166 +++++++++++++++++++++++------- src/rtree.c | 2 + src/util.c | 1 + 3 files changed, 134 insertions(+), 35 deletions(-) diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h index 28ae9d1d..3f8db3ad 100644 --- a/include/jemalloc/internal/rtree.h +++ b/include/jemalloc/internal/rtree.h @@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t; */ #define LG_RTREE_BITS_PER_LEVEL 4 #define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) -#define RTREE_HEIGHT_MAX \ - ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) +/* + * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp + * conditionals. The following defininitions are precomputed equivalents to: + * + * #define RTREE_HEIGHT_MAX \ + * ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) + */ +#if LG_RTREE_BITS_PER_LEVEL == 2 +# if LG_SIZEOF_PTR == 3 +# define RTREE_HEIGHT_MAX 16 +# elif LG_SIZEOF_PTR == 2 +# define RTREE_HEIGHT_MAX 8 +# endif +#elif LG_RTREE_BITS_PER_LEVEL == 3 +# if LG_SIZEOF_PTR == 3 +# define RTREE_HEIGHT_MAX 8 +# elif LG_SIZEOF_PTR == 2 +# define RTREE_HEIGHT_MAX 4 +# endif +#elif LG_RTREE_BITS_PER_LEVEL == 4 +# if LG_SIZEOF_PTR == 3 +# define RTREE_HEIGHT_MAX 4 +# elif LG_SIZEOF_PTR == 2 +# define RTREE_HEIGHT_MAX 2 +# endif +#else +# error Unsupported LG_RTREE_BITS_PER_LEVEL +#endif /* Used for two-stage lock-free node initialization. */ #define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) @@ -111,15 +137,18 @@ unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); bool rtree_node_valid(rtree_node_elm_t *node); -rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm, + bool dependent); rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, - unsigned level); + unsigned level, bool dependent); extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent); void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val); -rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); -rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level, + bool dependent); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level, + bool dependent); extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); @@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node) } JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_tryread(rtree_node_elm_t *elm) +rtree_child_tryread(rtree_node_elm_t *elm, bool dependent) { rtree_node_elm_t *child; /* Double-checked read (first read may be stale. */ child = elm->child; - if (!rtree_node_valid(child)) + if (!dependent && !rtree_node_valid(child)) child = atomic_read_p(&elm->pun); + assert(!dependent || child != NULL); return (child); } JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level, + bool dependent) { rtree_node_elm_t *child; - child = rtree_child_tryread(elm); - if (unlikely(!rtree_node_valid(child))) + child = rtree_child_tryread(elm, dependent); + if (!dependent && unlikely(!rtree_node_valid(child))) child = rtree_child_read_hard(rtree, elm, level); + assert(!dependent || child != NULL); return (child); } @@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) } JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level) +rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) { rtree_node_elm_t *subtree; /* Double-checked read (first read may be stale. */ subtree = rtree->levels[level].subtree; - if (!rtree_node_valid(subtree)) + if (!dependent && unlikely(!rtree_node_valid(subtree))) subtree = atomic_read_p(&rtree->levels[level].subtree_pun); + assert(!dependent || subtree != NULL); return (subtree); } JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_read(rtree_t *rtree, unsigned level) +rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent) { rtree_node_elm_t *subtree; - subtree = rtree_subtree_tryread(rtree, level); - if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_tryread(rtree, level, dependent); + if (!dependent && unlikely(!rtree_node_valid(subtree))) subtree = rtree_subtree_read_hard(rtree, level); + assert(!dependent || subtree != NULL); return (subtree); } @@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t * rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) { uintptr_t subkey; - unsigned i, start_level; - rtree_node_elm_t *node, *child; + unsigned start_level; + rtree_node_elm_t *node; start_level = rtree_start_level(rtree, key); - for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); - /**/; i++, node = child) { - if (!dependent && unlikely(!rtree_node_valid(node))) - return (NULL); - subkey = rtree_subkey(rtree, key, i); - if (i == rtree->height - 1) { - /* - * node is a leaf, so it contains values rather than - * child pointers. - */ - return (rtree_val_read(rtree, &node[subkey], - dependent)); - } - assert(i < rtree->height - 1); - child = rtree_child_tryread(&node[subkey]); + node = rtree_subtree_tryread(rtree, start_level, dependent); +#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height) + switch (start_level + RTREE_GET_BIAS) { +#define RTREE_GET_SUBTREE(level) \ + case level: \ + assert(level < (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + node = rtree_child_tryread(&node[subkey], dependent); \ + /* Fall through. */ +#define RTREE_GET_LEAF(level) \ + case level: \ + assert(level == (RTREE_HEIGHT_MAX-1)); \ + if (!dependent && unlikely(!rtree_node_valid(node))) \ + return (NULL); \ + subkey = rtree_subkey(rtree, key, level - \ + RTREE_GET_BIAS); \ + /* \ + * node is a leaf, so it contains values rather than \ + * child pointers. \ + */ \ + return (rtree_val_read(rtree, &node[subkey], \ + dependent)); +#if RTREE_HEIGHT_MAX > 1 + RTREE_GET_SUBTREE(0) +#endif +#if RTREE_HEIGHT_MAX > 2 + RTREE_GET_SUBTREE(1) +#endif +#if RTREE_HEIGHT_MAX > 3 + RTREE_GET_SUBTREE(2) +#endif +#if RTREE_HEIGHT_MAX > 4 + RTREE_GET_SUBTREE(3) +#endif +#if RTREE_HEIGHT_MAX > 5 + RTREE_GET_SUBTREE(4) +#endif +#if RTREE_HEIGHT_MAX > 6 + RTREE_GET_SUBTREE(5) +#endif +#if RTREE_HEIGHT_MAX > 7 + RTREE_GET_SUBTREE(6) +#endif +#if RTREE_HEIGHT_MAX > 8 + RTREE_GET_SUBTREE(7) +#endif +#if RTREE_HEIGHT_MAX > 9 + RTREE_GET_SUBTREE(8) +#endif +#if RTREE_HEIGHT_MAX > 10 + RTREE_GET_SUBTREE(9) +#endif +#if RTREE_HEIGHT_MAX > 11 + RTREE_GET_SUBTREE(10) +#endif +#if RTREE_HEIGHT_MAX > 12 + RTREE_GET_SUBTREE(11) +#endif +#if RTREE_HEIGHT_MAX > 13 + RTREE_GET_SUBTREE(12) +#endif +#if RTREE_HEIGHT_MAX > 14 + RTREE_GET_SUBTREE(13) +#endif +#if RTREE_HEIGHT_MAX > 15 + RTREE_GET_SUBTREE(14) +#endif +#if RTREE_HEIGHT_MAX > 16 +# error Unsupported RTREE_HEIGHT_MAX +#endif + RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1) +#undef RTREE_GET_SUBTREE +#undef RTREE_GET_LEAF + default: not_reached(); } not_reached(); } @@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) start_level = rtree_start_level(rtree, key); - node = rtree_subtree_read(rtree, start_level); + node = rtree_subtree_read(rtree, start_level, false); if (node == NULL) return (true); for (i = start_level; /**/; i++, node = child) { @@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) return (false); } assert(i + 1 < rtree->height); - child = rtree_child_read(rtree, &node[subkey], i); + child = rtree_child_read(rtree, &node[subkey], i, false); if (child == NULL) return (true); } diff --git a/src/rtree.c b/src/rtree.c index af0d97e7..3166b45f 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, { unsigned bits_in_leaf, height, i; + assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) / + RTREE_BITS_PER_LEVEL)); assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3)); bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL diff --git a/src/util.c b/src/util.c index 982a2e31..581d540b 100644 --- a/src/util.c +++ b/src/util.c @@ -14,6 +14,7 @@ malloc_write(": Unreachable code reached\n"); \ abort(); \ } \ + unreachable(); \ } while (0) #define not_implemented() do { \