Store edata->state in rtree leaf and make edata_t 128B aligned.

Verified that this doesn't result in any real increase of edata_t bytes
allocated.
This commit is contained in:
Qi Wang
2021-02-26 15:32:41 -08:00
committed by Qi Wang
parent 70d1541c5b
commit 4d8c22f9a5
8 changed files with 138 additions and 60 deletions

View File

@@ -13,6 +13,12 @@
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/typed_list.h"
/*
* sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment
* to free up the low bits in the rtree leaf.
*/
#define EDATA_ALIGNMENT 128
enum extent_state_e {
extent_state_active = 0,
extent_state_dirty = 1,
@@ -88,7 +94,7 @@ struct edata_s {
* f: nfree
* s: bin_shard
*
* 00000000 ... 000000ss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
* 00000000 ... 00000sss sssfffff fffffiii iiiiittt zpcbaaaa aaaaaaaa
*
* arena_ind: Arena from which this extent came, or all 1 bits if
* unassociated.
@@ -143,7 +149,7 @@ struct edata_s {
#define EDATA_BITS_ZEROED_SHIFT (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
#define EDATA_BITS_ZEROED_MASK MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
#define EDATA_BITS_STATE_WIDTH 2
#define EDATA_BITS_STATE_WIDTH 3
#define EDATA_BITS_STATE_SHIFT (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
#define EDATA_BITS_STATE_MASK MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)

View File

@@ -136,6 +136,36 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
}
}
static inline void
emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
uintptr_t addr, extent_state_t state) {
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
addr, /* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true);
assert(contents.edata == expected_edata);
contents.metadata.state = state;
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
}
static inline void
emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
extent_state_t state) {
/* Only emap is allowed to modify the edata internal state. */
edata_state_set(edata, state);
emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
(uintptr_t)edata_base_get(edata), state);
emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
(uintptr_t)edata_last_get(edata), state);
emap_assert_mapped(tsdn, emap, edata);
}
JEMALLOC_ALWAYS_INLINE edata_t *
emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
rtree_ctx_t rtree_ctx_fallback;

View File

@@ -46,6 +46,7 @@ struct rtree_node_elm_s {
typedef struct rtree_metadata_s rtree_metadata_t;
struct rtree_metadata_s {
szind_t szind;
extent_state_t state; /* Mirrors edata->state. */
bool is_head; /* Mirrors edata->is_head. */
bool slab;
};
@@ -56,6 +57,10 @@ struct rtree_contents_s {
rtree_metadata_t metadata;
};
#define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
#define RTREE_LEAF_STATE_SHIFT 2
#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
struct rtree_leaf_elm_s {
#ifdef RTREE_LEAF_COMPACT
/*
@@ -66,17 +71,17 @@ struct rtree_leaf_elm_s {
*
* x: index
* e: edata
* s: state
* h: is_head
* b: slab
*
* 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00hb
* 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
*/
atomic_p_t le_bits;
#else
atomic_p_t le_edata; /* (edata_t *) */
/*
* slab is stored in the low bit; szind is stored in the next lowest
* bits.
* From low to high bits: slab, is_head, state.
*/
atomic_u_t le_metadata;
#endif
@@ -184,15 +189,14 @@ JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
uintptr_t edata_bits = (uintptr_t)contents.edata
& (((uintptr_t)1 << LG_VADDR) - 1);
uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
/*
* Metadata shares the low bits of edata. edata is CACHELINE aligned (in
* fact, it's 128 bytes on 64-bit systems); we can enforce this
* alignment if we want to steal the extra rtree leaf bits someday.
*/
uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
uintptr_t metadata_bits = szind_bits | is_head_bits | slab_bits;
uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
RTREE_LEAF_STATE_SHIFT;
uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
slab_bits;
assert((edata_bits & metadata_bits) == 0);
return edata_bits | metadata_bits;
@@ -206,7 +210,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
contents.metadata.slab = (bool)(bits & 1);
contents.metadata.is_head = (bool)(bits & (1 << 1));
uintptr_t metadata_mask = ~((uintptr_t)((1 << 2) - 1));
uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
RTREE_LEAF_STATE_SHIFT;
contents.metadata.state = (extent_state_t)state_bits;
uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
# ifdef __aarch64__
/*
* aarch64 doesn't sign extend the highest virtual address bit to set
@@ -214,13 +222,12 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
*/
uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
/* Mask off metadata. */
uintptr_t low_bit_mask = metadata_mask;
uintptr_t mask = high_bit_mask & low_bit_mask;
contents.edata = (edata_t *)(bits & mask);
# else
/* Restore sign-extended high bits, mask metadata bits. */
contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
>> RTREE_NHIB) & metadata_mask);
>> RTREE_NHIB) & low_bit_mask);
# endif
return contents;
}
@@ -240,7 +247,12 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
contents.metadata.slab = (bool)(metadata_bits & 1);
contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
contents.metadata.szind = (metadata_bits >> 2);
uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
RTREE_LEAF_STATE_SHIFT;
contents.metadata.state = (extent_state_t)state_bits;
contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
RTREE_LEAF_STATE_WIDTH);
contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -252,13 +264,16 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
static inline void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, rtree_contents_t contents) {
assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
unsigned metadata_bits = ((unsigned)contents.metadata.slab
unsigned metadata_bits = (unsigned)contents.metadata.slab
| ((unsigned)contents.metadata.is_head << 1)
| ((unsigned)contents.metadata.szind << 2));
| ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
| ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
RTREE_LEAF_STATE_WIDTH));
atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
/*
* Write edata last, since the element is atomically considered valid
@@ -430,6 +445,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
contents.metadata.szind = SC_NSIZES;
contents.metadata.slab = false;
contents.metadata.is_head = false;
contents.metadata.state = (extent_state_t)0;
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
}