Add rtree_write_range(): writing the same content to multiple leaf elements.

Apply to emap_(de)register_interior which became noticeable in perf profiles.
This commit is contained in:
Qi Wang 2021-03-17 16:35:57 -07:00 committed by Qi Wang
parent add636596a
commit 7c964b0352
4 changed files with 182 additions and 32 deletions

View File

@ -137,23 +137,24 @@ bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leafkey(uintptr_t key) {
JEMALLOC_ALWAYS_INLINE unsigned
rtree_leaf_maskbits(void) {
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
rtree_levels[RTREE_HEIGHT-1].bits);
unsigned maskbits = ptrbits - cumbits;
uintptr_t mask = ~((ZU(1) << maskbits) - 1);
return ptrbits - cumbits;
}
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leafkey(uintptr_t key) {
uintptr_t mask = ~((ZU(1) << rtree_leaf_maskbits()) - 1);
return (key & mask);
}
JEMALLOC_ALWAYS_INLINE size_t
rtree_cache_direct_map(uintptr_t key) {
unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
rtree_levels[RTREE_HEIGHT-1].bits);
unsigned maskbits = ptrbits - cumbits;
return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
return (size_t)((key >> rtree_leaf_maskbits()) &
(RTREE_CTX_NCACHE - 1));
}
JEMALLOC_ALWAYS_INLINE uintptr_t
@ -265,30 +266,49 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
#endif
}
static inline void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, rtree_contents_t contents) {
assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
JEMALLOC_ALWAYS_INLINE void
rtree_contents_encode(rtree_contents_t contents, void **bits,
unsigned *additional) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
*bits = (void *)rtree_leaf_elm_bits_encode(contents);
#else
unsigned metadata_bits = (unsigned)contents.metadata.slab
*additional = (unsigned)contents.metadata.slab
| ((unsigned)contents.metadata.is_head << 1)
| ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
| ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
RTREE_LEAF_STATE_WIDTH));
atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
*bits = contents.edata;
#endif
}
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
#ifdef RTREE_LEAF_COMPACT
atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
#else
atomic_store_u(&elm->le_metadata, additional, ATOMIC_RELEASE);
/*
* Write edata last, since the element is atomically considered valid
* as soon as the edata field is non-NULL.
*/
atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE);
atomic_store_p(&elm->le_edata, bits, ATOMIC_RELEASE);
#endif
}
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, rtree_contents_t contents) {
assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
void *bits;
unsigned additional;
rtree_contents_encode(contents, &bits, &additional);
rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
}
/* The state field can be updated independently (and more frequently). */
static inline void
JEMALLOC_ALWAYS_INLINE void
rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
assert(elm1 != NULL);
@ -447,7 +467,45 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
return false;
}
static inline bool
JEMALLOC_ALWAYS_INLINE void
rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end, rtree_contents_t contents, bool clearing) {
assert((base & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0);
/*
* Only used for emap_(de)register_interior, which implies the
* boundaries have been registered already. Therefore all the lookups
* are dependent w/o init_missing, assuming the range spans across at
* most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
*/
void *bits;
unsigned additional;
rtree_contents_encode(contents, &bits, &additional);
rtree_leaf_elm_t *elm = NULL; /* Dead store. */
for (uintptr_t addr = base; addr <= end; addr += PAGE) {
if (addr == base ||
(addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
/* dependent */ true, /* init_missing */ false);
assert(elm != NULL);
}
assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
/* dependent */ true, /* init_missing */ false));
assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
/* dependent */ true).edata != NULL);
rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
elm++;
}
}
JEMALLOC_ALWAYS_INLINE void
rtree_write_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end, rtree_contents_t contents) {
rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
/* clearing */ false);
}
JEMALLOC_ALWAYS_INLINE bool
rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
rtree_contents_t contents) {
rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
@ -478,4 +536,17 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
rtree_leaf_elm_write(tsdn, rtree, elm, contents);
}
static inline void
rtree_clear_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t base, uintptr_t end) {
rtree_contents_t contents;
contents.edata = NULL;
contents.metadata.szind = SC_NSIZES;
contents.metadata.slab = false;
contents.metadata.is_head = false;
contents.metadata.state = (extent_state_t)0;
rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
/* clearing */ true);
}
#endif /* JEMALLOC_INTERNAL_RTREE_H */

View File

@ -241,6 +241,7 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
return false;
}
/* Invoked *after* emap_register_boundary. */
void
emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
szind_t szind) {
@ -249,6 +250,22 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
assert(edata_slab_get(edata));
assert(edata_state_get(edata) == extent_state_active);
if (config_debug) {
/* Making sure the boundary is registered already. */
rtree_leaf_elm_t *elm_a, *elm_b;
bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx,
edata, /* dependent */ true, /* init_missing */ false,
&elm_a, &elm_b);
assert(!err);
rtree_contents_t contents_a, contents_b;
contents_a = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
/* dependent */ true);
contents_b = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
/* dependent */ true);
assert(contents_a.edata == edata && contents_b.edata == edata);
assert(contents_a.metadata.slab && contents_b.metadata.slab);
}
rtree_contents_t contents;
contents.edata = edata;
contents.metadata.szind = szind;
@ -256,12 +273,10 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
contents.metadata.state = extent_state_active;
contents.metadata.is_head = false; /* Not allowed to access. */
/* Register interior. */
for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
rtree_write(tsdn, &emap->rtree, rtree_ctx,
(uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
LG_PAGE), contents);
}
assert(edata_size_get(edata) > (2 << LG_PAGE));
rtree_write_range(tsdn, &emap->rtree, rtree_ctx,
(uintptr_t)edata_base_get(edata) + PAGE,
(uintptr_t)edata_last_get(edata) - PAGE, contents);
}
void
@ -289,10 +304,10 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
EMAP_DECLARE_RTREE_CTX;
assert(edata_slab_get(edata));
for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
rtree_clear(tsdn, &emap->rtree, rtree_ctx,
(uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
LG_PAGE));
if (edata_size_get(edata) > (2 << LG_PAGE)) {
rtree_clear_range(tsdn, &emap->rtree, rtree_ctx,
(uintptr_t)edata_base_get(edata) + PAGE,
(uintptr_t)edata_last_get(edata) - PAGE);
}
}

View File

@ -120,7 +120,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
emap_remap(tsdn, shard->emap, edata, szind, slab);
edata_szind_set(edata, szind);
edata_slab_set(edata, slab);
if (slab) {
if (slab && (size > 2 * PAGE)) {
emap_register_interior(tsdn, shard->emap, edata, szind);
}
}

View File

@ -210,11 +210,75 @@ TEST_BEGIN(test_rtree_random) {
}
TEST_END
static void
test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
uintptr_t end) {
rtree_ctx_t rtree_ctx;
rtree_ctx_data_init(&rtree_ctx);
edata_t *edata_e = alloc_edata();
edata_init(edata_e, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
rtree_contents_t contents;
contents.edata = edata_e;
contents.metadata.szind = SC_NSIZES;
contents.metadata.slab = false;
contents.metadata.is_head = false;
contents.metadata.state = extent_state_active;
expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
contents), "Unexpected rtree_write() failure");
expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
contents), "Unexpected rtree_write() failure");
rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
start + (i << LG_PAGE)).edata, edata_e,
"rtree_edata_read() should return previously set value");
}
rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
rtree_leaf_elm_t *elm;
for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
start + (i << LG_PAGE), false, false);
expect_ptr_not_null(elm, "Should have been initialized.");
expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
false).edata, "Should have been cleared.");
}
}
TEST_BEGIN(test_rtree_range) {
tsdn_t *tsdn = tsdn_fetch();
base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
expect_ptr_not_null(base, "Unexpected base_new failure");
rtree_t *rtree = &test_rtree;
expect_false(rtree_new(rtree, base, false),
"Unexpected rtree_new() failure");
/* Not crossing rtree node boundary first. */
uintptr_t start = ZU(1) << rtree_leaf_maskbits();
uintptr_t end = start + (ZU(100) << LG_PAGE);
test_rtree_range_write(tsdn, rtree, start, end);
/* Crossing rtree node boundary. */
start = (ZU(1) << rtree_leaf_maskbits()) - (ZU(10) << LG_PAGE);
end = start + (ZU(100) << LG_PAGE);
assert_ptr_ne((void *)rtree_leafkey(start), (void *)rtree_leafkey(end),
"The range should span across two rtree nodes");
test_rtree_range_write(tsdn, rtree, start, end);
base_delete(tsdn, base);
}
TEST_END
int
main(void) {
return test(
test_rtree_read_empty,
test_rtree_extrema,
test_rtree_bits,
test_rtree_random);
test_rtree_random,
test_rtree_range);
}