Replace rtree path cache with LRU cache.

Rework rtree_ctx_t to encapsulate an rtree leaf LRU lookup cache rather
than a single-path element lookup cache.  The replacement is logically
much simpler, as well as slightly faster in the fast path case and less
prone to degraded performance during non-trivial sequences of lookups.
This commit is contained in:
Jason Evans
2017-02-03 20:21:56 -08:00
parent 0ecf692726
commit 4a346f5593
4 changed files with 107 additions and 123 deletions

View File

@@ -3,8 +3,7 @@
#ifndef JEMALLOC_ENABLE_INLINE
unsigned rtree_start_level(const rtree_t *rtree, uintptr_t key);
unsigned rtree_ctx_start_level(const rtree_t *rtree,
const rtree_ctx_t *rtree_ctx, uintptr_t key);
uintptr_t rtree_leafkey(rtree_t *rtree, uintptr_t key);
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
bool rtree_node_valid(rtree_elm_t *node);
@@ -50,31 +49,24 @@ rtree_start_level(const rtree_t *rtree, uintptr_t key) {
return start_level;
}
JEMALLOC_ALWAYS_INLINE unsigned
rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
uintptr_t key) {
unsigned start_level;
uintptr_t key_diff;
/* Compute the difference between old and new lookup keys. */
key_diff = key ^ rtree_ctx->key;
assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */
/*
* Compute the last traversal path element at which the keys' paths
* are the same.
*/
start_level = rtree->start_level[(lg_floor(key_diff) + 1) >>
LG_RTREE_BITS_PER_LEVEL];
assert(start_level < rtree->height);
return start_level;
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leafkey(rtree_t *rtree, uintptr_t key) {
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = (rtree->levels[rtree->height-1].cumbits -
rtree->levels[rtree->height-1].bits);
unsigned maskbits = ptrbits - cumbits;
uintptr_t mask = ~((ZU(1) << maskbits) - 1);
return (key & mask);
}
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
rtree->levels[level].cumbits)) & ((ZU(1) <<
rtree->levels[level].bits) - 1));
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = rtree->levels[level].cumbits;
unsigned shiftbits = ptrbits - cumbits;
unsigned maskbits = rtree->levels[level].bits;
unsigned mask = (ZU(1) << maskbits) - 1;
return ((key >> shiftbits) & mask);
}
JEMALLOC_ALWAYS_INLINE bool
@@ -170,103 +162,89 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, bool dependent, bool init_missing) {
uintptr_t subkey;
unsigned start_level;
rtree_elm_t *node;
assert(!dependent || !init_missing);
if (dependent || init_missing) {
if (likely(rtree_ctx->valid)) {
if (key == rtree_ctx->key) {
return rtree_ctx->elms[rtree->height];
} else {
unsigned no_ctx_start_level =
rtree_start_level(rtree, key);
unsigned ctx_start_level;
if (likely(no_ctx_start_level <=
rtree_ctx->start_level && (ctx_start_level =
rtree_ctx_start_level(rtree, rtree_ctx,
key)) >= rtree_ctx->start_level)) {
start_level = ctx_start_level;
node = rtree_ctx->elms[ctx_start_level];
} else {
start_level = no_ctx_start_level;
node = init_missing ?
rtree_subtree_read(tsdn, rtree,
no_ctx_start_level, dependent) :
rtree_subtree_tryread(rtree,
no_ctx_start_level, dependent);
rtree_ctx->start_level =
no_ctx_start_level;
rtree_ctx->elms[no_ctx_start_level] =
node;
}
}
} else {
unsigned no_ctx_start_level = rtree_start_level(rtree,
key);
start_level = no_ctx_start_level;
node = init_missing ? rtree_subtree_read(tsdn, rtree,
no_ctx_start_level, dependent) :
rtree_subtree_tryread(rtree, no_ctx_start_level,
dependent);
rtree_ctx->valid = true;
rtree_ctx->start_level = no_ctx_start_level;
rtree_ctx->elms[no_ctx_start_level] = node;
/* Search the cache. */
uintptr_t leafkey = rtree_leafkey(rtree, key);
if (likely(key != 0)) {
#define RTREE_CACHE_CHECK(i) do { \
if (likely(rtree_ctx->cache[i].leafkey == leafkey)) { \
rtree_elm_t *leaf = rtree_ctx->cache[i].leaf; \
if (likely(leaf != NULL)) { \
/* Reorder. */ \
memmove(&rtree_ctx->cache[1], \
&rtree_ctx->cache[0], \
sizeof(rtree_ctx_cache_elm_t) * i); \
rtree_ctx->cache[0].leafkey = leafkey; \
rtree_ctx->cache[0].leaf = leaf; \
\
uintptr_t subkey = rtree_subkey(rtree, \
key, rtree->height-1); \
return &leaf[subkey]; \
} \
} \
} while (0)
/* Check the MRU cache entry. */
RTREE_CACHE_CHECK(0);
/*
* Search the remaining cache elements, and on success move the
* matching element to the front. Unroll the first iteration to
* avoid calling memmove() (the compiler typically optimizes it
* into raw moves).
*/
if (RTREE_CTX_NCACHE > 1) {
RTREE_CACHE_CHECK(1);
}
rtree_ctx->key = key;
} else {
start_level = rtree_start_level(rtree, key);
node = init_missing ? rtree_subtree_read(tsdn, rtree,
start_level, dependent) : rtree_subtree_tryread(rtree,
start_level, dependent);
for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
RTREE_CACHE_CHECK(i);
}
#undef RTREE_CACHE_CHECK
}
unsigned start_level = rtree_start_level(rtree, key);
rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
dependent);
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
switch (start_level + RTREE_GET_BIAS) {
#define RTREE_GET_SUBTREE(level) \
case level: \
case level: { \
assert(level < (RTREE_HEIGHT_MAX-1)); \
if (!dependent && unlikely(!rtree_node_valid(node))) { \
if (init_missing) { \
rtree_ctx->valid = false; \
} \
return NULL; \
} \
subkey = rtree_subkey(rtree, key, level - \
uintptr_t subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
node = init_missing ? rtree_child_read(tsdn, rtree, \
&node[subkey], level - RTREE_GET_BIAS, dependent) : \
rtree_child_tryread(&node[subkey], dependent); \
if (dependent || init_missing) { \
rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \
node; \
} \
/* Fall through. */
/* Fall through. */ \
}
#define RTREE_GET_LEAF(level) \
case level: \
case level: { \
assert(level == (RTREE_HEIGHT_MAX-1)); \
if (!dependent && unlikely(!rtree_node_valid(node))) { \
if (init_missing) { \
rtree_ctx->valid = false; \
} \
return NULL; \
} \
subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
/* \
* node is a leaf, so it contains values rather than \
* child pointers. \
*/ \
node = &node[subkey]; \
if (dependent || init_missing) { \
rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \
node; \
if (likely(key != 0)) { \
if (RTREE_CTX_NCACHE > 1) { \
memmove(&rtree_ctx->cache[1], \
&rtree_ctx->cache[0], \
sizeof(rtree_ctx_cache_elm_t) * \
(RTREE_CTX_NCACHE-1)); \
} \
rtree_ctx->cache[0].leafkey = leafkey; \
rtree_ctx->cache[0].leaf = node; \
} \
return node;
uintptr_t subkey = rtree_subkey(rtree, key, level - \
RTREE_GET_BIAS); \
return &node[subkey]; \
}
#if RTREE_HEIGHT_MAX > 1
RTREE_GET_SUBTREE(0)
#endif
@@ -365,16 +343,14 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return NULL;
}
{
extent_t *extent;
void *s;
do {
extent = rtree_elm_read(elm, false);
/* The least significant bit serves as a lock. */
s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
} while (atomic_cas_p(&elm->pun, (void *)extent, s));
}
extent_t *extent;
void *s;
do {
extent = rtree_elm_read(elm, false);
/* The least significant bit serves as a lock. */
s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
} while (atomic_cas_p(&elm->pun, (void *)extent, s));
if (config_debug) {
rtree_elm_witness_acquire(tsdn, rtree, key, elm);