Replace rtree path cache with LRU cache.
Rework rtree_ctx_t to encapsulate an rtree leaf LRU lookup cache rather than a single-path element lookup cache. The replacement is logically much simpler, as well as slightly faster in the fast path case and less prone to degraded performance during non-trivial sequences of lookups.
This commit is contained in:
parent
0ecf692726
commit
4a346f5593
@ -419,7 +419,6 @@ rtree_child_read
|
|||||||
rtree_child_read_hard
|
rtree_child_read_hard
|
||||||
rtree_child_tryread
|
rtree_child_tryread
|
||||||
rtree_clear
|
rtree_clear
|
||||||
rtree_ctx_start_level
|
|
||||||
rtree_delete
|
rtree_delete
|
||||||
rtree_elm_acquire
|
rtree_elm_acquire
|
||||||
rtree_elm_lookup
|
rtree_elm_lookup
|
||||||
@ -431,6 +430,7 @@ rtree_elm_witness_acquire
|
|||||||
rtree_elm_witness_release
|
rtree_elm_witness_release
|
||||||
rtree_elm_write
|
rtree_elm_write
|
||||||
rtree_elm_write_acquired
|
rtree_elm_write_acquired
|
||||||
|
rtree_leafkey
|
||||||
rtree_new
|
rtree_new
|
||||||
rtree_node_alloc
|
rtree_node_alloc
|
||||||
rtree_node_dalloc
|
rtree_node_dalloc
|
||||||
|
@ -3,8 +3,7 @@
|
|||||||
|
|
||||||
#ifndef JEMALLOC_ENABLE_INLINE
|
#ifndef JEMALLOC_ENABLE_INLINE
|
||||||
unsigned rtree_start_level(const rtree_t *rtree, uintptr_t key);
|
unsigned rtree_start_level(const rtree_t *rtree, uintptr_t key);
|
||||||
unsigned rtree_ctx_start_level(const rtree_t *rtree,
|
uintptr_t rtree_leafkey(rtree_t *rtree, uintptr_t key);
|
||||||
const rtree_ctx_t *rtree_ctx, uintptr_t key);
|
|
||||||
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
|
uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
|
||||||
|
|
||||||
bool rtree_node_valid(rtree_elm_t *node);
|
bool rtree_node_valid(rtree_elm_t *node);
|
||||||
@ -50,31 +49,24 @@ rtree_start_level(const rtree_t *rtree, uintptr_t key) {
|
|||||||
return start_level;
|
return start_level;
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE unsigned
|
JEMALLOC_ALWAYS_INLINE uintptr_t
|
||||||
rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
|
rtree_leafkey(rtree_t *rtree, uintptr_t key) {
|
||||||
uintptr_t key) {
|
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
|
||||||
unsigned start_level;
|
unsigned cumbits = (rtree->levels[rtree->height-1].cumbits -
|
||||||
uintptr_t key_diff;
|
rtree->levels[rtree->height-1].bits);
|
||||||
|
unsigned maskbits = ptrbits - cumbits;
|
||||||
/* Compute the difference between old and new lookup keys. */
|
uintptr_t mask = ~((ZU(1) << maskbits) - 1);
|
||||||
key_diff = key ^ rtree_ctx->key;
|
return (key & mask);
|
||||||
assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute the last traversal path element at which the keys' paths
|
|
||||||
* are the same.
|
|
||||||
*/
|
|
||||||
start_level = rtree->start_level[(lg_floor(key_diff) + 1) >>
|
|
||||||
LG_RTREE_BITS_PER_LEVEL];
|
|
||||||
assert(start_level < rtree->height);
|
|
||||||
return start_level;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE uintptr_t
|
JEMALLOC_ALWAYS_INLINE uintptr_t
|
||||||
rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
|
rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
|
||||||
return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
|
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
|
||||||
rtree->levels[level].cumbits)) & ((ZU(1) <<
|
unsigned cumbits = rtree->levels[level].cumbits;
|
||||||
rtree->levels[level].bits) - 1));
|
unsigned shiftbits = ptrbits - cumbits;
|
||||||
|
unsigned maskbits = rtree->levels[level].bits;
|
||||||
|
unsigned mask = (ZU(1) << maskbits) - 1;
|
||||||
|
return ((key >> shiftbits) & mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
@ -170,103 +162,89 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
|
|||||||
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
JEMALLOC_ALWAYS_INLINE rtree_elm_t *
|
||||||
rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||||
uintptr_t key, bool dependent, bool init_missing) {
|
uintptr_t key, bool dependent, bool init_missing) {
|
||||||
uintptr_t subkey;
|
|
||||||
unsigned start_level;
|
|
||||||
rtree_elm_t *node;
|
|
||||||
|
|
||||||
assert(!dependent || !init_missing);
|
assert(!dependent || !init_missing);
|
||||||
|
|
||||||
if (dependent || init_missing) {
|
/* Search the cache. */
|
||||||
if (likely(rtree_ctx->valid)) {
|
uintptr_t leafkey = rtree_leafkey(rtree, key);
|
||||||
if (key == rtree_ctx->key) {
|
if (likely(key != 0)) {
|
||||||
return rtree_ctx->elms[rtree->height];
|
#define RTREE_CACHE_CHECK(i) do { \
|
||||||
} else {
|
if (likely(rtree_ctx->cache[i].leafkey == leafkey)) { \
|
||||||
unsigned no_ctx_start_level =
|
rtree_elm_t *leaf = rtree_ctx->cache[i].leaf; \
|
||||||
rtree_start_level(rtree, key);
|
if (likely(leaf != NULL)) { \
|
||||||
unsigned ctx_start_level;
|
/* Reorder. */ \
|
||||||
|
memmove(&rtree_ctx->cache[1], \
|
||||||
if (likely(no_ctx_start_level <=
|
&rtree_ctx->cache[0], \
|
||||||
rtree_ctx->start_level && (ctx_start_level =
|
sizeof(rtree_ctx_cache_elm_t) * i); \
|
||||||
rtree_ctx_start_level(rtree, rtree_ctx,
|
rtree_ctx->cache[0].leafkey = leafkey; \
|
||||||
key)) >= rtree_ctx->start_level)) {
|
rtree_ctx->cache[0].leaf = leaf; \
|
||||||
start_level = ctx_start_level;
|
\
|
||||||
node = rtree_ctx->elms[ctx_start_level];
|
uintptr_t subkey = rtree_subkey(rtree, \
|
||||||
} else {
|
key, rtree->height-1); \
|
||||||
start_level = no_ctx_start_level;
|
return &leaf[subkey]; \
|
||||||
node = init_missing ?
|
} \
|
||||||
rtree_subtree_read(tsdn, rtree,
|
} \
|
||||||
no_ctx_start_level, dependent) :
|
} while (0)
|
||||||
rtree_subtree_tryread(rtree,
|
/* Check the MRU cache entry. */
|
||||||
no_ctx_start_level, dependent);
|
RTREE_CACHE_CHECK(0);
|
||||||
rtree_ctx->start_level =
|
/*
|
||||||
no_ctx_start_level;
|
* Search the remaining cache elements, and on success move the
|
||||||
rtree_ctx->elms[no_ctx_start_level] =
|
* matching element to the front. Unroll the first iteration to
|
||||||
node;
|
* avoid calling memmove() (the compiler typically optimizes it
|
||||||
}
|
* into raw moves).
|
||||||
}
|
*/
|
||||||
} else {
|
if (RTREE_CTX_NCACHE > 1) {
|
||||||
unsigned no_ctx_start_level = rtree_start_level(rtree,
|
RTREE_CACHE_CHECK(1);
|
||||||
key);
|
|
||||||
|
|
||||||
start_level = no_ctx_start_level;
|
|
||||||
node = init_missing ? rtree_subtree_read(tsdn, rtree,
|
|
||||||
no_ctx_start_level, dependent) :
|
|
||||||
rtree_subtree_tryread(rtree, no_ctx_start_level,
|
|
||||||
dependent);
|
|
||||||
rtree_ctx->valid = true;
|
|
||||||
rtree_ctx->start_level = no_ctx_start_level;
|
|
||||||
rtree_ctx->elms[no_ctx_start_level] = node;
|
|
||||||
}
|
}
|
||||||
rtree_ctx->key = key;
|
for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
|
||||||
} else {
|
RTREE_CACHE_CHECK(i);
|
||||||
start_level = rtree_start_level(rtree, key);
|
}
|
||||||
node = init_missing ? rtree_subtree_read(tsdn, rtree,
|
#undef RTREE_CACHE_CHECK
|
||||||
start_level, dependent) : rtree_subtree_tryread(rtree,
|
|
||||||
start_level, dependent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned start_level = rtree_start_level(rtree, key);
|
||||||
|
rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
|
||||||
|
start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
|
||||||
|
dependent);
|
||||||
|
|
||||||
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
|
#define RTREE_GET_BIAS (RTREE_HEIGHT_MAX - rtree->height)
|
||||||
switch (start_level + RTREE_GET_BIAS) {
|
switch (start_level + RTREE_GET_BIAS) {
|
||||||
#define RTREE_GET_SUBTREE(level) \
|
#define RTREE_GET_SUBTREE(level) \
|
||||||
case level: \
|
case level: { \
|
||||||
assert(level < (RTREE_HEIGHT_MAX-1)); \
|
assert(level < (RTREE_HEIGHT_MAX-1)); \
|
||||||
if (!dependent && unlikely(!rtree_node_valid(node))) { \
|
if (!dependent && unlikely(!rtree_node_valid(node))) { \
|
||||||
if (init_missing) { \
|
|
||||||
rtree_ctx->valid = false; \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
return NULL; \
|
||||||
} \
|
} \
|
||||||
subkey = rtree_subkey(rtree, key, level - \
|
uintptr_t subkey = rtree_subkey(rtree, key, level - \
|
||||||
RTREE_GET_BIAS); \
|
RTREE_GET_BIAS); \
|
||||||
node = init_missing ? rtree_child_read(tsdn, rtree, \
|
node = init_missing ? rtree_child_read(tsdn, rtree, \
|
||||||
&node[subkey], level - RTREE_GET_BIAS, dependent) : \
|
&node[subkey], level - RTREE_GET_BIAS, dependent) : \
|
||||||
rtree_child_tryread(&node[subkey], dependent); \
|
rtree_child_tryread(&node[subkey], dependent); \
|
||||||
if (dependent || init_missing) { \
|
/* Fall through. */ \
|
||||||
rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \
|
}
|
||||||
node; \
|
|
||||||
} \
|
|
||||||
/* Fall through. */
|
|
||||||
#define RTREE_GET_LEAF(level) \
|
#define RTREE_GET_LEAF(level) \
|
||||||
case level: \
|
case level: { \
|
||||||
assert(level == (RTREE_HEIGHT_MAX-1)); \
|
assert(level == (RTREE_HEIGHT_MAX-1)); \
|
||||||
if (!dependent && unlikely(!rtree_node_valid(node))) { \
|
if (!dependent && unlikely(!rtree_node_valid(node))) { \
|
||||||
if (init_missing) { \
|
|
||||||
rtree_ctx->valid = false; \
|
|
||||||
} \
|
|
||||||
return NULL; \
|
return NULL; \
|
||||||
} \
|
} \
|
||||||
subkey = rtree_subkey(rtree, key, level - \
|
|
||||||
RTREE_GET_BIAS); \
|
|
||||||
/* \
|
/* \
|
||||||
* node is a leaf, so it contains values rather than \
|
* node is a leaf, so it contains values rather than \
|
||||||
* child pointers. \
|
* child pointers. \
|
||||||
*/ \
|
*/ \
|
||||||
node = &node[subkey]; \
|
if (likely(key != 0)) { \
|
||||||
if (dependent || init_missing) { \
|
if (RTREE_CTX_NCACHE > 1) { \
|
||||||
rtree_ctx->elms[level - RTREE_GET_BIAS + 1] = \
|
memmove(&rtree_ctx->cache[1], \
|
||||||
node; \
|
&rtree_ctx->cache[0], \
|
||||||
|
sizeof(rtree_ctx_cache_elm_t) * \
|
||||||
|
(RTREE_CTX_NCACHE-1)); \
|
||||||
|
} \
|
||||||
|
rtree_ctx->cache[0].leafkey = leafkey; \
|
||||||
|
rtree_ctx->cache[0].leaf = node; \
|
||||||
} \
|
} \
|
||||||
return node;
|
uintptr_t subkey = rtree_subkey(rtree, key, level - \
|
||||||
|
RTREE_GET_BIAS); \
|
||||||
|
return &node[subkey]; \
|
||||||
|
}
|
||||||
#if RTREE_HEIGHT_MAX > 1
|
#if RTREE_HEIGHT_MAX > 1
|
||||||
RTREE_GET_SUBTREE(0)
|
RTREE_GET_SUBTREE(0)
|
||||||
#endif
|
#endif
|
||||||
@ -365,16 +343,14 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
|||||||
if (!dependent && elm == NULL) {
|
if (!dependent && elm == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
{
|
|
||||||
extent_t *extent;
|
|
||||||
void *s;
|
|
||||||
|
|
||||||
do {
|
extent_t *extent;
|
||||||
extent = rtree_elm_read(elm, false);
|
void *s;
|
||||||
/* The least significant bit serves as a lock. */
|
do {
|
||||||
s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
extent = rtree_elm_read(elm, false);
|
||||||
} while (atomic_cas_p(&elm->pun, (void *)extent, s));
|
/* The least significant bit serves as a lock. */
|
||||||
}
|
s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
|
||||||
|
} while (atomic_cas_p(&elm->pun, (void *)extent, s));
|
||||||
|
|
||||||
if (config_debug) {
|
if (config_debug) {
|
||||||
rtree_elm_witness_acquire(tsdn, rtree, key, elm);
|
rtree_elm_witness_acquire(tsdn, rtree, key, elm);
|
||||||
|
@ -54,22 +54,16 @@ struct rtree_level_s {
|
|||||||
unsigned cumbits;
|
unsigned cumbits;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct rtree_ctx_cache_elm_s {
|
||||||
|
uintptr_t leafkey;
|
||||||
|
rtree_elm_t *leaf;
|
||||||
|
};
|
||||||
|
|
||||||
struct rtree_ctx_s {
|
struct rtree_ctx_s {
|
||||||
/* If false, key/elms have not yet been initialized by a lookup. */
|
#ifndef _MSC_VER
|
||||||
bool valid;
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
/* Key that corresponds to the tree path recorded in elms. */
|
#endif
|
||||||
uintptr_t key;
|
rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
|
||||||
/* Memoized rtree_start_level(key). */
|
|
||||||
unsigned start_level;
|
|
||||||
/*
|
|
||||||
* A path through rtree, driven by key. Only elements that could
|
|
||||||
* actually be used for subsequent lookups are initialized, i.e. if
|
|
||||||
* start_level = rtree_start_level(key) is non-zero, the first
|
|
||||||
* start_level elements are uninitialized. The last element contains a
|
|
||||||
* pointer to the leaf node element that corresponds to key, so that
|
|
||||||
* exact matches require no tree node offset computation.
|
|
||||||
*/
|
|
||||||
rtree_elm_t *elms[RTREE_HEIGHT_MAX + 1];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct rtree_s {
|
struct rtree_s {
|
||||||
|
@ -12,6 +12,7 @@ typedef struct rtree_elm_s rtree_elm_t;
|
|||||||
typedef struct rtree_elm_witness_s rtree_elm_witness_t;
|
typedef struct rtree_elm_witness_s rtree_elm_witness_t;
|
||||||
typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
|
typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
|
||||||
typedef struct rtree_level_s rtree_level_t;
|
typedef struct rtree_level_s rtree_level_t;
|
||||||
|
typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
|
||||||
typedef struct rtree_ctx_s rtree_ctx_t;
|
typedef struct rtree_ctx_s rtree_ctx_t;
|
||||||
typedef struct rtree_s rtree_t;
|
typedef struct rtree_s rtree_t;
|
||||||
|
|
||||||
@ -25,11 +26,24 @@ typedef struct rtree_s rtree_t;
|
|||||||
#define RTREE_HEIGHT_MAX \
|
#define RTREE_HEIGHT_MAX \
|
||||||
((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
|
((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of leafkey/leaf pairs to cache. Each entry supports an entire leaf,
|
||||||
|
* so the cache hit rate is typically high even with a small number of entries.
|
||||||
|
* In rare cases extent activity will straddle the boundary between two leaf
|
||||||
|
* nodes. Furthermore, an arena may use a combination of dss and mmap. Four
|
||||||
|
* entries covers both of these considerations as long as locality of reference
|
||||||
|
* is high, and/or total memory usage doesn't exceed the range supported by
|
||||||
|
* those entries. Note that as memory usage grows past the amount that this
|
||||||
|
* cache can directly cover, the cache will become less effective if locality of
|
||||||
|
* reference is low, but the consequence is merely cache misses while traversing
|
||||||
|
* the tree nodes, and the cache will itself suffer cache misses if made overly
|
||||||
|
* large, not to mention the cost of linear search.
|
||||||
|
*/
|
||||||
|
#define RTREE_CTX_NCACHE 8
|
||||||
|
|
||||||
|
/* Static initializer for rtree_ctx_t. */
|
||||||
#define RTREE_CTX_INITIALIZER { \
|
#define RTREE_CTX_INITIALIZER { \
|
||||||
false, \
|
{{0, NULL} /* C initializes all trailing elements to NULL. */} \
|
||||||
0, \
|
|
||||||
0, \
|
|
||||||
{NULL /* C initializes all trailing elements to NULL. */} \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user