Use rtree tracked states to protect edata outside of ecache locks.
This avoids the addr-based mutexes (i.e. the mutex_pool), and instead relies on the metadata tracked in rtree leaf: the head state and extent_state. Before trying to access the neighbor edata (e.g. for coalescing), the states will be verified first -- only neighbor edatas from the same arena and with the same state will be accessed.
This commit is contained in:
@@ -23,7 +23,11 @@ enum extent_state_e {
|
||||
extent_state_active = 0,
|
||||
extent_state_dirty = 1,
|
||||
extent_state_muzzy = 2,
|
||||
extent_state_retained = 3
|
||||
extent_state_retained = 3,
|
||||
extent_state_transition = 4, /* States below are intermediate. */
|
||||
extent_state_updating = 4,
|
||||
extent_state_merging = 5,
|
||||
extent_state_max = 5 /* Sanity checking only. */
|
||||
};
|
||||
typedef enum extent_state_e extent_state_t;
|
||||
|
||||
@@ -550,6 +554,11 @@ edata_is_head_set(edata_t *edata, bool is_head) {
|
||||
((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
edata_state_in_transition(extent_state_t state) {
|
||||
return state >= extent_state_transition;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because this function is implemented as a sequence of bitfield modifications,
|
||||
* even though each individual bit is properly initialized, we technically read
|
||||
|
@@ -5,6 +5,15 @@
|
||||
#include "jemalloc/internal/mutex_pool.h"
|
||||
#include "jemalloc/internal/rtree.h"
|
||||
|
||||
/*
|
||||
* Note: Ends without at semicolon, so that
|
||||
* EMAP_DECLARE_RTREE_CTX;
|
||||
* in uses will avoid empty-statement warnings.
|
||||
*/
|
||||
#define EMAP_DECLARE_RTREE_CTX \
|
||||
rtree_ctx_t rtree_ctx_fallback; \
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
|
||||
|
||||
typedef struct emap_s emap_t;
|
||||
struct emap_s {
|
||||
rtree_t rtree;
|
||||
@@ -31,20 +40,16 @@ bool emap_init(emap_t *emap, base_t *base, bool zeroed);
|
||||
void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
|
||||
bool slab);
|
||||
|
||||
/*
|
||||
* Grab the lock or locks associated with the edata or edatas indicated (which
|
||||
* is done just by simple address hashing). The hashing strategy means that
|
||||
* it's never safe to grab locks incrementally -- you have to grab all the locks
|
||||
* you'll need at once, and release them all at once.
|
||||
*/
|
||||
void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
|
||||
void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
|
||||
void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
|
||||
edata_t *edata2);
|
||||
void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
|
||||
edata_t *edata2);
|
||||
edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
|
||||
bool inactive_only);
|
||||
void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
|
||||
extent_state_t state);
|
||||
|
||||
edata_t *emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
|
||||
extent_state_t expected_state, bool allow_head_extent);
|
||||
edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
|
||||
edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
|
||||
bool forward);
|
||||
void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
|
||||
extent_state_t new_state);
|
||||
|
||||
/*
|
||||
* Associate the given edata with its beginning and end address, setting the
|
||||
@@ -136,43 +141,66 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
|
||||
uintptr_t addr, extent_state_t state) {
|
||||
witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
|
||||
WITNESS_RANK_CORE);
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
|
||||
assert(config_debug);
|
||||
emap_assert_mapped(tsdn, emap, edata);
|
||||
|
||||
rtree_ctx_t rtree_ctx_fallback;
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
|
||||
(uintptr_t)edata_base_get(edata));
|
||||
|
||||
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
|
||||
addr, /* dependent */ true, /* init_missing */ false);
|
||||
assert(elm != NULL);
|
||||
rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
|
||||
/* dependent */ true);
|
||||
assert(contents.edata == expected_edata);
|
||||
contents.metadata.state = state;
|
||||
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
|
||||
return edata_state_in_transition(contents.metadata.state);
|
||||
}
|
||||
|
||||
static inline void
|
||||
emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
|
||||
extent_state_t state) {
|
||||
/* Only emap is allowed to modify the edata internal state. */
|
||||
edata_state_set(edata, state);
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
|
||||
if (!config_debug) {
|
||||
/* For assertions only. */
|
||||
return false;
|
||||
}
|
||||
|
||||
emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
|
||||
(uintptr_t)edata_base_get(edata), state);
|
||||
emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
|
||||
(uintptr_t)edata_last_get(edata), state);
|
||||
/*
|
||||
* The edata is considered acquired if no other threads will attempt to
|
||||
* read / write any fields from it. This includes a few cases:
|
||||
*
|
||||
* 1) edata not hooked into emap yet -- This implies the edata just got
|
||||
* allocated or initialized.
|
||||
*
|
||||
* 2) in an active or transition state -- In both cases, the edata can
|
||||
* be discovered from the emap, however the state tracked in the rtree
|
||||
* will prevent other threads from accessing the actual edata.
|
||||
*/
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
|
||||
rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
|
||||
/* init_missing */ false);
|
||||
if (elm == NULL) {
|
||||
return true;
|
||||
}
|
||||
rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
|
||||
/* dependent */ true);
|
||||
if (contents.edata == NULL ||
|
||||
contents.metadata.state == extent_state_active ||
|
||||
edata_state_in_transition(contents.metadata.state)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
emap_assert_mapped(tsdn, emap, edata);
|
||||
return false;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
|
||||
assert(edata_arena_ind_get(inner) == edata_arena_ind_get(outer));
|
||||
assert(edata_pai_get(inner) == edata_pai_get(outer));
|
||||
assert(edata_committed_get(inner) == edata_committed_get(outer));
|
||||
assert(edata_state_get(inner) == extent_state_active);
|
||||
assert(edata_state_get(outer) == extent_state_merging);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE edata_t *
|
||||
emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
|
||||
rtree_ctx_t rtree_ctx_fallback;
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
|
||||
return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
|
||||
}
|
||||
@@ -181,8 +209,7 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
||||
emap_alloc_ctx_t *alloc_ctx) {
|
||||
rtree_ctx_t rtree_ctx_fallback;
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
|
||||
rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
|
||||
rtree_ctx, (uintptr_t)ptr);
|
||||
@@ -194,8 +221,7 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
||||
emap_full_alloc_ctx_t *full_alloc_ctx) {
|
||||
rtree_ctx_t rtree_ctx_fallback;
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
|
||||
rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
|
||||
(uintptr_t)ptr);
|
||||
@@ -212,8 +238,7 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
|
||||
emap_full_alloc_ctx_t *full_alloc_ctx) {
|
||||
rtree_ctx_t rtree_ctx_fallback;
|
||||
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
|
||||
EMAP_DECLARE_RTREE_CTX;
|
||||
|
||||
rtree_contents_t contents;
|
||||
bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
|
||||
|
@@ -81,7 +81,7 @@ struct rtree_leaf_elm_s {
|
||||
#else
|
||||
atomic_p_t le_edata; /* (edata_t *) */
|
||||
/*
|
||||
* From low to high bits: slab, is_head, state.
|
||||
* From high to low bits: szind (8 bits), state (4 bits), is_head, slab
|
||||
*/
|
||||
atomic_u_t le_metadata;
|
||||
#endif
|
||||
@@ -187,6 +187,7 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE uintptr_t
|
||||
rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
|
||||
assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
|
||||
uintptr_t edata_bits = (uintptr_t)contents.edata
|
||||
& (((uintptr_t)1 << LG_VADDR) - 1);
|
||||
|
||||
@@ -212,6 +213,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
|
||||
|
||||
uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
|
||||
RTREE_LEAF_STATE_SHIFT;
|
||||
assert(state_bits <= extent_state_max);
|
||||
contents.metadata.state = (extent_state_t)state_bits;
|
||||
|
||||
uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
|
||||
@@ -229,6 +231,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
|
||||
contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
|
||||
>> RTREE_NHIB) & low_bit_mask);
|
||||
# endif
|
||||
assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
|
||||
return contents;
|
||||
}
|
||||
|
||||
@@ -250,6 +253,7 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
|
||||
|
||||
uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
|
||||
RTREE_LEAF_STATE_SHIFT;
|
||||
assert(state_bits <= extent_state_max);
|
||||
contents.metadata.state = (extent_state_t)state_bits;
|
||||
contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
|
||||
RTREE_LEAF_STATE_WIDTH);
|
||||
@@ -283,6 +287,31 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The state field can be updated independently (and more frequently). */
|
||||
static inline void
|
||||
rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
|
||||
rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
|
||||
assert(elm1 != NULL);
|
||||
#ifdef RTREE_LEAF_COMPACT
|
||||
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm1,
|
||||
/* dependent */ true);
|
||||
bits &= ~RTREE_LEAF_STATE_MASK;
|
||||
bits |= state << RTREE_LEAF_STATE_SHIFT;
|
||||
atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
if (elm2 != NULL) {
|
||||
atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
|
||||
}
|
||||
#else
|
||||
unsigned bits = atomic_load_u(&elm1->le_metadata, ATOMIC_RELAXED);
|
||||
bits &= ~RTREE_LEAF_STATE_MASK;
|
||||
bits |= state << RTREE_LEAF_STATE_SHIFT;
|
||||
atomic_store_u(&elm1->le_metadata, bits, ATOMIC_RELEASE);
|
||||
if (elm2 != NULL) {
|
||||
atomic_store_u(&elm2->le_metadata, bits, ATOMIC_RELEASE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Tries to look up the key in the L1 cache, returning it if there's a hit, or
|
||||
* NULL if there's a miss.
|
||||
|
Reference in New Issue
Block a user