server-skynet-source-3rd-je.../include/jemalloc/internal/rtree.h
Qi Wang b6a7a535b3 Optimize away a branch on the free fastpath.
On the rtree metadata lookup fast path, there will never be a NULL returned when
the cache key matches (which is unknown to the compiler).  The previous logic
was checking for NULL return value, resulting in the extra branch (in addition to
the cache key match checking).  Make the lookup_fast return a bool to indicate
cache miss / match, so that the extra branch is avoided.
2021-10-28 16:55:54 -07:00

555 lines
18 KiB
C

#ifndef JEMALLOC_INTERNAL_RTREE_H
#define JEMALLOC_INTERNAL_RTREE_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree_tsd.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/tsd.h"
/*
* This radix tree implementation is tailored to the singular purpose of
* associating metadata with extents that are currently owned by jemalloc.
*
*******************************************************************************
*/
/* Number of high insignificant bits. */
#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
/* Number of low insigificant bits. */
#define RTREE_NLIB LG_PAGE
/* Number of significant bits. */
#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
/* Number of levels in radix tree. */
#if RTREE_NSB <= 10
# define RTREE_HEIGHT 1
#elif RTREE_NSB <= 36
# define RTREE_HEIGHT 2
#elif RTREE_NSB <= 52
# define RTREE_HEIGHT 3
#else
# error Unsupported number of significant virtual address bits
#endif
/* Use compact leaf representation if virtual address encoding allows. */
#if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
# define RTREE_LEAF_COMPACT
#endif
typedef struct rtree_node_elm_s rtree_node_elm_t;
struct rtree_node_elm_s {
atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
};
typedef struct rtree_metadata_s rtree_metadata_t;
struct rtree_metadata_s {
szind_t szind;
extent_state_t state; /* Mirrors edata->state. */
bool is_head; /* Mirrors edata->is_head. */
bool slab;
};
typedef struct rtree_contents_s rtree_contents_t;
struct rtree_contents_s {
edata_t *edata;
rtree_metadata_t metadata;
};
#define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
#define RTREE_LEAF_STATE_SHIFT 2
#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
struct rtree_leaf_elm_s {
#ifdef RTREE_LEAF_COMPACT
/*
* Single pointer-width field containing all three leaf element fields.
* For example, on a 64-bit x64 system with 48 significant virtual
* memory address bits, the index, edata, and slab fields are packed as
* such:
*
* x: index
* e: edata
* s: state
* h: is_head
* b: slab
*
* 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
*/
atomic_p_t le_bits;
#else
atomic_p_t le_edata; /* (edata_t *) */
/*
* From high to low bits: szind (8 bits), state (4 bits), is_head, slab
*/
atomic_u_t le_metadata;
#endif
};
typedef struct rtree_level_s rtree_level_t;
struct rtree_level_s {
/* Number of key bits distinguished by this level. */
unsigned bits;
/*
* Cumulative number of key bits distinguished by traversing to
* corresponding tree level.
*/
unsigned cumbits;
};
typedef struct rtree_s rtree_t;
struct rtree_s {
base_t *base;
malloc_mutex_t init_lock;
/* Number of elements based on rtree_levels[0].bits. */
#if RTREE_HEIGHT > 1
rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)];
#else
rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)];
#endif
};
/*
* Split the bits into one to three partitions depending on number of
* significant bits. It the number of bits does not divide evenly into the
* number of levels, place one remainder bit per level starting at the leaf
* level.
*/
static const rtree_level_t rtree_levels[] = {
#if RTREE_HEIGHT == 1
{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
#elif RTREE_HEIGHT == 2
{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
#elif RTREE_HEIGHT == 3
{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
{RTREE_NSB/3 + RTREE_NSB%3/2,
RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
#else
# error Unsupported rtree height
#endif
};
bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
JEMALLOC_ALWAYS_INLINE unsigned
rtree_leaf_maskbits(void) {
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
rtree_levels[RTREE_HEIGHT-1].bits);
return ptrbits - cumbits;
}
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leafkey(uintptr_t key) {
uintptr_t mask = ~((ZU(1) << rtree_leaf_maskbits()) - 1);
return (key & mask);
}
JEMALLOC_ALWAYS_INLINE size_t
rtree_cache_direct_map(uintptr_t key) {
return (size_t)((key >> rtree_leaf_maskbits()) &
(RTREE_CTX_NCACHE - 1));
}
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_subkey(uintptr_t key, unsigned level) {
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = rtree_levels[level].cumbits;
unsigned shiftbits = ptrbits - cumbits;
unsigned maskbits = rtree_levels[level].bits;
uintptr_t mask = (ZU(1) << maskbits) - 1;
return ((key >> shiftbits) & mask);
}
/*
* Atomic getters.
*
* dependent: Reading a value on behalf of a pointer to a valid allocation
* is guaranteed to be a clean read even without synchronization,
* because the rtree update became visible in memory before the
* pointer came into existence.
* !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be
* dependent on a previous rtree write, which means a stale read
* could result if synchronization were omitted here.
*/
# ifdef RTREE_LEAF_COMPACT
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
}
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
uintptr_t edata_bits = (uintptr_t)contents.edata
& (((uintptr_t)1 << LG_VADDR) - 1);
uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
RTREE_LEAF_STATE_SHIFT;
uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
slab_bits;
assert((edata_bits & metadata_bits) == 0);
return edata_bits | metadata_bits;
}
JEMALLOC_ALWAYS_INLINE rtree_contents_t
rtree_leaf_elm_bits_decode(uintptr_t bits) {
rtree_contents_t contents;
/* Do the easy things first. */
contents.metadata.szind = bits >> LG_VADDR;
contents.metadata.slab = (bool)(bits & 1);
contents.metadata.is_head = (bool)(bits & (1 << 1));
uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
RTREE_LEAF_STATE_SHIFT;
assert(state_bits <= extent_state_max);
contents.metadata.state = (extent_state_t)state_bits;
uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
# ifdef __aarch64__
/*
* aarch64 doesn't sign extend the highest virtual address bit to set
* the higher ones. Instead, the high bits get zeroed.
*/
uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
/* Mask off metadata. */
uintptr_t mask = high_bit_mask & low_bit_mask;
contents.edata = (edata_t *)(bits & mask);
# else
/* Restore sign-extended high bits, mask metadata bits. */
contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
>> RTREE_NHIB) & low_bit_mask);
# endif
assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
return contents;
}
# endif /* RTREE_LEAF_COMPACT */
JEMALLOC_ALWAYS_INLINE rtree_contents_t
rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
return contents;
#else
rtree_contents_t contents;
unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
contents.metadata.slab = (bool)(metadata_bits & 1);
contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
RTREE_LEAF_STATE_SHIFT;
assert(state_bits <= extent_state_max);
contents.metadata.state = (extent_state_t)state_bits;
contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
RTREE_LEAF_STATE_WIDTH);
contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
return contents;
#endif
}
JEMALLOC_ALWAYS_INLINE void
rtree_contents_encode(rtree_contents_t contents, void **bits,
unsigned *additional) {
#ifdef RTREE_LEAF_COMPACT
*bits = (void *)rtree_leaf_elm_bits_encode(contents);
#else
*additional = (unsigned)contents.metadata.slab
| ((unsigned)contents.metadata.is_head << 1)
| ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
| ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
RTREE_LEAF_STATE_WIDTH));
*bits = contents.edata;
#endif
}
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
#ifdef RTREE_LEAF_COMPACT
atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
#else
atomic_store_u(&elm->le_metadata, additional, ATOMIC_RELEASE);
/*
* Write edata last, since the element is atomically considered valid
* as soon as the edata field is non-NULL.
*/
atomic_store_p(&elm->le_edata, bits, ATOMIC_RELEASE);
#endif
}
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, rtree_contents_t contents) {
assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
void *bits;
unsigned additional;
rtree_contents_encode(contents, &bits, &additional);
rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
}
/* The state field can be updated independently (and more frequently). */
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
assert(elm1 != NULL);
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm1,
/* dependent */ true);
bits &= ~RTREE_LEAF_STATE_MASK;
bits |= state << RTREE_LEAF_STATE_SHIFT;
atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
if (elm2 != NULL) {
atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
}
#else
unsigned bits = atomic_load_u(&elm1->le_metadata, ATOMIC_RELAXED);
bits &= ~RTREE_LEAF_STATE_MASK;
bits |= state << RTREE_LEAF_STATE_SHIFT;
atomic_store_u(&elm1->le_metadata, bits, ATOMIC_RELEASE);
if (elm2 != NULL) {
atomic_store_u(&elm2->le_metadata, bits, ATOMIC_RELEASE);
}
#endif
}
/*
* Tries to look up the key in the L1 cache, returning false if there's a hit, or
* true if there's a miss.
* Key is allowed to be NULL; returns true in this case.
*/
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, rtree_leaf_elm_t **elm) {
size_t slot = rtree_cache_direct_map(key);
uintptr_t leafkey = rtree_leafkey(key);
assert(leafkey != RTREE_LEAFKEY_INVALID);
if (unlikely(rtree_ctx->cache[slot].leafkey != leafkey)) {
return true;
}
rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
assert(leaf != NULL);
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
*elm = &leaf[subkey];
return false;
}
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, bool dependent, bool init_missing) {
assert(key != 0);
assert(!dependent || !init_missing);
size_t slot = rtree_cache_direct_map(key);
uintptr_t leafkey = rtree_leafkey(key);
assert(leafkey != RTREE_LEAFKEY_INVALID);
/* Fast path: L1 direct mapped cache. */
if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
assert(leaf != NULL);
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
return &leaf[subkey];
}
/*
* Search the L2 LRU cache. On hit, swap the matching element into the
* slot in L1 cache, and move the position in L2 up by 1.
*/
#define RTREE_CACHE_CHECK_L2(i) do { \
if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \
rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \
assert(leaf != NULL); \
if (i > 0) { \
/* Bubble up by one. */ \
rtree_ctx->l2_cache[i].leafkey = \
rtree_ctx->l2_cache[i - 1].leafkey; \
rtree_ctx->l2_cache[i].leaf = \
rtree_ctx->l2_cache[i - 1].leaf; \
rtree_ctx->l2_cache[i - 1].leafkey = \
rtree_ctx->cache[slot].leafkey; \
rtree_ctx->l2_cache[i - 1].leaf = \
rtree_ctx->cache[slot].leaf; \
} else { \
rtree_ctx->l2_cache[0].leafkey = \
rtree_ctx->cache[slot].leafkey; \
rtree_ctx->l2_cache[0].leaf = \
rtree_ctx->cache[slot].leaf; \
} \
rtree_ctx->cache[slot].leafkey = leafkey; \
rtree_ctx->cache[slot].leaf = leaf; \
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \
return &leaf[subkey]; \
} \
} while (0)
/* Check the first cache entry. */
RTREE_CACHE_CHECK_L2(0);
/* Search the remaining cache elements. */
for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) {
RTREE_CACHE_CHECK_L2(i);
}
#undef RTREE_CACHE_CHECK_L2
return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
dependent, init_missing);
}
/*
* Returns true on lookup failure.
*/
static inline bool
rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, rtree_contents_t *r_contents) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, /* dependent */ false, /* init_missing */ false);
if (elm == NULL) {
return true;
}
*r_contents = rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ false);
return false;
}
static inline rtree_contents_t
rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, /* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
return rtree_leaf_elm_read(tsdn, rtree, elm, /* dependent */ true);
}
static inline rtree_metadata_t
rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, /* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
return rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).metadata;
}
/*
* Returns true when the request cannot be fulfilled by fastpath.
*/
static inline bool
rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
rtree_leaf_elm_t *elm;
/*
* Should check the bool return value (lookup success or not) instead of
* elm == NULL (which will result in an extra branch). This is because
* when the cache lookup succeeds, there will never be a NULL pointer
* returned (which is unknown to the compiler).
*/
if (rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx, key, &elm)) {
return true;
}
assert(elm != NULL);
*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).metadata;
return false;
}
JEMALLOC_ALWAYS_INLINE void
rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end, rtree_contents_t contents, bool clearing) {
assert((base & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0);
/*
* Only used for emap_(de)register_interior, which implies the
* boundaries have been registered already. Therefore all the lookups
* are dependent w/o init_missing, assuming the range spans across at
* most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
*/
void *bits;
unsigned additional;
rtree_contents_encode(contents, &bits, &additional);
rtree_leaf_elm_t *elm = NULL; /* Dead store. */
for (uintptr_t addr = base; addr <= end; addr += PAGE) {
if (addr == base ||
(addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
/* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
}
assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
/* dependent */ true, /* init_missing */ false));
assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).edata != NULL);
rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
elm++;
}
}
JEMALLOC_ALWAYS_INLINE void
rtree_write_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end, rtree_contents_t contents) {
rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
/* clearing */ false);
}
JEMALLOC_ALWAYS_INLINE bool
rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
rtree_contents_t contents) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, /* dependent */ false, /* init_missing */ true);
if (elm == NULL) {
return true;
}
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
return false;
}
static inline void
rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
key, /* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
assert(rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).edata != NULL);
rtree_contents_t contents;
contents.edata = NULL;
contents.metadata.szind = SC_NSIZES;
contents.metadata.slab = false;
contents.metadata.is_head = false;
contents.metadata.state = (extent_state_t)0;
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
}
static inline void
rtree_clear_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end) {
rtree_contents_t contents;
contents.edata = NULL;
contents.metadata.szind = SC_NSIZES;
contents.metadata.slab = false;
contents.metadata.is_head = false;
contents.metadata.state = (extent_state_t)0;
rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
/* clearing */ true);
}
#endif /* JEMALLOC_INTERNAL_RTREE_H */