Optimize away a branch on the free fastpath.

On the rtree metadata lookup fast path, there will never be a NULL returned when
the cache key matches (which is unknown to the compiler).  The previous logic
was checking for NULL return value, resulting in the extra branch (in addition to
the cache key match checking).  Make the lookup_fast return a bool to indicate
cache miss / match, so that the extra branch is avoided.
This commit is contained in:
Qi Wang 2021-10-20 14:17:57 -07:00 committed by Qi Wang
parent 4d56aaeca5
commit b6a7a535b3

View File

@ -330,28 +330,27 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
}
/*
* Tries to look up the key in the L1 cache, returning it if there's a hit, or
* NULL if there's a miss.
* Key is allowed to be NULL; returns NULL in this case.
* Tries to look up the key in the L1 cache, returning false if there's a hit, or
* true if there's a miss.
* Key is allowed to be NULL; returns true in this case.
*/
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key) {
rtree_leaf_elm_t *elm;
uintptr_t key, rtree_leaf_elm_t **elm) {
size_t slot = rtree_cache_direct_map(key);
uintptr_t leafkey = rtree_leafkey(key);
assert(leafkey != RTREE_LEAFKEY_INVALID);
if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
assert(leaf != NULL);
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
elm = &leaf[subkey];
return elm;
} else {
return NULL;
if (unlikely(rtree_ctx->cache[slot].leafkey != leafkey)) {
return true;
}
rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
assert(leaf != NULL);
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
*elm = &leaf[subkey];
return false;
}
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@ -449,16 +448,22 @@ rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
}
/*
* Returns true on error.
* Returns true when the request cannot be fulfilled by fastpath.
*/
static inline bool
rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx,
key);
if (elm == NULL) {
rtree_leaf_elm_t *elm;
/*
* Should check the bool return value (lookup success or not) instead of
* elm == NULL (which will result in an extra branch). This is because
* when the cache lookup succeeds, there will never be a NULL pointer
* returned (which is unknown to the compiler).
*/
if (rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx, key, &elm)) {
return true;
}
assert(elm != NULL);
*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).metadata;
return false;