Implement compact rtree leaf element representation.
If a single virtual adddress pointer has enough unused bits to pack {szind_t, extent_t *, bool, bool}, use a single pointer-sized field in each rtree leaf element, rather than using three separate fields. This has little impact on access speed (fewer loads/stores, but more bit twiddling), except that denser representation increases TLB effectiveness.
This commit is contained in:
parent
ce41ab0c57
commit
0ee0e0c155
@ -419,6 +419,11 @@ rtree_extent_szind_read
|
||||
rtree_leaf_alloc
|
||||
rtree_leaf_dalloc
|
||||
rtree_leaf_elm_acquire
|
||||
rtree_leaf_elm_bits_extent_get
|
||||
rtree_leaf_elm_bits_locked_get
|
||||
rtree_leaf_elm_bits_read
|
||||
rtree_leaf_elm_bits_slab_get
|
||||
rtree_leaf_elm_bits_szind_get
|
||||
rtree_leaf_elm_extent_read
|
||||
rtree_leaf_elm_extent_write
|
||||
rtree_leaf_elm_lookup
|
||||
|
@ -4,6 +4,14 @@
|
||||
#ifndef JEMALLOC_ENABLE_INLINE
|
||||
uintptr_t rtree_leafkey(uintptr_t key);
|
||||
uintptr_t rtree_subkey(uintptr_t key, unsigned level);
|
||||
# ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
|
||||
rtree_leaf_elm_t *elm, bool acquired, bool dependent);
|
||||
extent_t *rtree_leaf_elm_bits_extent_get(uintptr_t bits);
|
||||
szind_t rtree_leaf_elm_bits_szind_get(uintptr_t bits);
|
||||
bool rtree_leaf_elm_bits_slab_get(uintptr_t bits);
|
||||
bool rtree_leaf_elm_bits_locked_get(uintptr_t bits);
|
||||
# endif
|
||||
extent_t *rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
|
||||
rtree_leaf_elm_t *elm, bool acquired, bool dependent);
|
||||
szind_t rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
|
||||
@ -75,6 +83,42 @@ rtree_subkey(uintptr_t key, unsigned level) {
|
||||
* dependent on a previous rtree write, which means a stale read
|
||||
* could result if synchronization were omitted here.
|
||||
*/
|
||||
# ifdef RTREE_LEAF_COMPACT
|
||||
JEMALLOC_ALWAYS_INLINE uintptr_t
|
||||
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
if (config_debug && acquired) {
|
||||
assert(dependent);
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
|
||||
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
|
||||
/* Restore sign-extended high bits, mask slab and lock bits. */
|
||||
return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
|
||||
RTREE_NHIB) & ~((uintptr_t)0x3));
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
|
||||
return (szind_t)(bits >> LG_VADDR);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
|
||||
return (bool)((bits >> 1) & (uintptr_t)0x1);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
rtree_leaf_elm_bits_locked_get(uintptr_t bits) {
|
||||
return (bool)(bits & (uintptr_t)0x1);
|
||||
}
|
||||
# endif
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, bool dependent) {
|
||||
@ -83,6 +127,12 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
return rtree_leaf_elm_bits_extent_get(bits);
|
||||
#else
|
||||
extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
|
||||
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
|
||||
assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
|
||||
@ -90,6 +140,7 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
/* Mask lock bit. */
|
||||
extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
|
||||
return extent;
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE szind_t
|
||||
@ -100,8 +151,15 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
return rtree_leaf_elm_bits_szind_get(bits);
|
||||
#else
|
||||
return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
|
||||
: ATOMIC_ACQUIRE);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
@ -112,8 +170,15 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
|
||||
dependent);
|
||||
assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
|
||||
return rtree_leaf_elm_bits_slab_get(bits);
|
||||
#else
|
||||
return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
|
||||
ATOMIC_ACQUIRE);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
@ -124,11 +189,21 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
}
|
||||
assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
|
||||
LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
|
||||
| ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
|
||||
(uintptr_t)acquired;
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
if (acquired) {
|
||||
/* Overlay lock bit. */
|
||||
extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
|
||||
}
|
||||
atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
@ -139,7 +214,18 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
}
|
||||
assert(szind <= NSIZES);
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
|
||||
((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
|
||||
(((uintptr_t)0x1 << LG_VADDR) - 1)) |
|
||||
((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
|
||||
(uintptr_t)acquired;
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
@ -149,12 +235,35 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
|
||||
acquired, acquired);
|
||||
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
|
||||
LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
|
||||
(((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab << 1) |
|
||||
(uintptr_t)acquired;
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
bool acquired, extent_t *extent, szind_t szind, bool slab) {
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
if (config_debug && acquired) {
|
||||
rtree_leaf_elm_witness_access(tsdn, rtree, elm);
|
||||
}
|
||||
assert(!slab || szind < NBINS);
|
||||
|
||||
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
|
||||
((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
|
||||
((uintptr_t)slab << 1) |
|
||||
(uintptr_t)acquired;
|
||||
|
||||
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
#else
|
||||
rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
|
||||
rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
|
||||
/*
|
||||
@ -162,6 +271,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
* as soon as the extent field is non-NULL.
|
||||
*/
|
||||
rtree_leaf_elm_extent_write(tsdn, rtree, elm, acquired, extent);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
|
||||
@ -317,19 +427,24 @@ rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
|
||||
spin_t spinner = SPIN_INITIALIZER;
|
||||
while (true) {
|
||||
/* The least significant bit serves as a lock. */
|
||||
void *extent_and_lock = atomic_load_p(&elm->le_extent,
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
# define RTREE_FIELD_WITH_LOCK le_bits
|
||||
#else
|
||||
# define RTREE_FIELD_WITH_LOCK le_extent
|
||||
#endif
|
||||
void *bits = atomic_load_p(&elm->RTREE_FIELD_WITH_LOCK,
|
||||
ATOMIC_RELAXED);
|
||||
if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
|
||||
{
|
||||
void *locked = (void *)((uintptr_t)extent_and_lock
|
||||
| (uintptr_t)0x1);
|
||||
if (likely(((uintptr_t)bits & (uintptr_t)0x1) == 0)) {
|
||||
void *locked = (void *)((uintptr_t)bits |
|
||||
(uintptr_t)0x1);
|
||||
if (likely(atomic_compare_exchange_strong_p(
|
||||
&elm->le_extent, &extent_and_lock, locked,
|
||||
&elm->RTREE_FIELD_WITH_LOCK, &bits, locked,
|
||||
ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_adaptive(&spinner);
|
||||
#undef RTREE_FIELD_WITH_LOCK
|
||||
}
|
||||
|
||||
if (config_debug) {
|
||||
|
@ -6,9 +6,26 @@ struct rtree_node_elm_s {
|
||||
};
|
||||
|
||||
struct rtree_leaf_elm_s {
|
||||
atomic_p_t le_extent; /* (extent_t *) */
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
/*
|
||||
* Single pointer-width field containing all three leaf element fields.
|
||||
* For example, on a 64-bit x64 system with 48 significant virtual
|
||||
* memory address bits, the index, extent, and slab fields are packed as
|
||||
* such:
|
||||
*
|
||||
* x: index
|
||||
* e: extent
|
||||
* b: slab
|
||||
* k: lock
|
||||
*
|
||||
* 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00bk
|
||||
*/
|
||||
atomic_p_t le_bits;
|
||||
#else
|
||||
atomic_p_t le_extent; /* (extent_t *), lock in low bit */
|
||||
atomic_u_t le_szind; /* (szind_t) */
|
||||
atomic_b_t le_slab; /* (bool) */
|
||||
#endif
|
||||
};
|
||||
|
||||
struct rtree_leaf_elm_witness_s {
|
||||
|
@ -33,6 +33,10 @@ typedef struct rtree_s rtree_t;
|
||||
#else
|
||||
# error Unsupported number of significant virtual address bits
|
||||
#endif
|
||||
/* Use compact leaf representation if virtual address encoding allows. */
|
||||
#if RTREE_NHIB >= LG_CEIL_NSIZES
|
||||
# define RTREE_LEAF_COMPACT
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Number of leafkey/leaf pairs to cache. Each entry supports an entire leaf,
|
||||
|
@ -40,6 +40,17 @@ lg() {
|
||||
done
|
||||
}
|
||||
|
||||
lg_ceil() {
|
||||
y=$1
|
||||
lg ${y}; lg_floor=${lg_result}
|
||||
pow2 ${lg_floor}; pow2_floor=${pow2_result}
|
||||
if [ ${pow2_floor} -lt ${y} ] ; then
|
||||
lg_ceil_result=$((${lg_floor} + 1))
|
||||
else
|
||||
lg_ceil_result=${lg_floor}
|
||||
fi
|
||||
}
|
||||
|
||||
reg_size_compute() {
|
||||
lg_grp=$1
|
||||
lg_delta=$2
|
||||
@ -246,12 +257,14 @@ size_classes() {
|
||||
done
|
||||
echo
|
||||
nsizes=${index}
|
||||
lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result}
|
||||
|
||||
# Defined upon completion:
|
||||
# - ntbins
|
||||
# - nlbins
|
||||
# - nbins
|
||||
# - nsizes
|
||||
# - lg_ceil_nsizes
|
||||
# - npsizes
|
||||
# - lg_tiny_maxclass
|
||||
# - lookup_maxclass
|
||||
@ -286,6 +299,7 @@ cat <<EOF
|
||||
* NLBINS: Number of bins supported by the lookup table.
|
||||
* NBINS: Number of small size class bins.
|
||||
* NSIZES: Number of size classes.
|
||||
* LG_CEIL_NSIZES: Number of bits required to store NSIZES.
|
||||
* NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
|
||||
* LG_TINY_MAXCLASS: Lg of maximum tiny size class.
|
||||
* LOOKUP_MAXCLASS: Maximum size class included in lookup table.
|
||||
@ -311,6 +325,7 @@ for lg_z in ${lg_zarr} ; do
|
||||
echo "#define NLBINS ${nlbins}"
|
||||
echo "#define NBINS ${nbins}"
|
||||
echo "#define NSIZES ${nsizes}"
|
||||
echo "#define LG_CEIL_NSIZES ${lg_ceil_nsizes}"
|
||||
echo "#define NPSIZES ${npsizes}"
|
||||
echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}"
|
||||
echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}"
|
||||
|
Loading…
Reference in New Issue
Block a user