Further specialize arena_[s]dalloc() tcache fast path.

Use tsd_rtree_ctx() rather than tsdn_rtree_ctx() when tcache is
non-NULL, in order to avoid an extra branch (and potentially extra stack
space) in the fast path.
This commit is contained in:
Jason Evans 2017-03-22 11:00:40 -07:00
parent 5e67fbc367
commit 32e7cf51cd
3 changed files with 131 additions and 47 deletions

View File

@ -2,20 +2,22 @@
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
#ifndef JEMALLOC_ENABLE_INLINE
szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
prof_tctx_t *tctx);
void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
void arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
void arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
bool zero, tcache_t *tcache, bool slow_path);
arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr);
arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr);
size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr);
void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
bool slow_path);
#endif
@ -162,9 +164,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
return index2size(szind);
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
JEMALLOC_INLINE void
arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
assert(ptr != NULL);
rtree_ctx_t rtree_ctx_fallback;
@ -179,25 +180,55 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
assert(szind < NSIZES);
assert(slab == extent_slab_get(extent));
}
if (likely(slab)) {
/* Small allocation. */
if (likely(tcache != NULL)) {
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
arena_dalloc_small(tsdn, ptr);
}
arena_dalloc_small(tsdn, ptr);
} else {
if (likely(tcache != NULL) && szind < nhbins) {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
if (unlikely(tcache == NULL)) {
arena_dalloc_no_tcache(tsdn, ptr);
return;
}
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
szind_t szind;
bool slab;
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
true, &szind, &slab);
if (config_debug) {
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
assert(szind < NSIZES);
assert(slab == extent_slab_get(extent));
}
if (likely(slab)) {
/* Small allocation. */
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
if (szind < nhbins) {
if (config_prof && unlikely(szind < NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache,
slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
ptr, szind, slow_path);
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
szind, slow_path);
}
} else {
extent_t *extent = iealloc(tsdn, ptr);
@ -206,11 +237,10 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
}
}
JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
JEMALLOC_INLINE void
arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
assert(ptr != NULL);
assert(size <= LARGE_MAXCLASS);
szind_t szind;
bool slab;
@ -244,20 +274,65 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
if (likely(slab)) {
/* Small allocation. */
if (likely(tcache != NULL)) {
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
arena_dalloc_small(tsdn, ptr);
}
arena_dalloc_small(tsdn, ptr);
} else {
if (likely(tcache != NULL) && szind < nhbins) {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
assert(size <= LARGE_MAXCLASS);
if (unlikely(tcache == NULL)) {
arena_sdalloc_no_tcache(tsdn, ptr, size);
return;
}
szind_t szind;
bool slab;
if (!config_prof || !opt_prof) {
/*
* There is no risk of being confused by a promoted sampled
* object, so base szind and slab on the given size.
*/
szind = size2index(size);
slab = (szind < NBINS);
}
if ((config_prof && opt_prof) || config_debug) {
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)ptr, true, &szind, &slab);
assert(szind == size2index(size));
assert((config_prof && opt_prof) || slab == (szind < NBINS));
if (config_debug) {
extent_t *extent = rtree_extent_read(tsdn,
&extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
assert(slab == extent_slab_get(extent));
}
}
if (likely(slab)) {
/* Small allocation. */
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
if (szind < nhbins) {
if (config_prof && unlikely(szind < NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache,
slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
szind, slow_path);
tcache_dalloc_large(tsdn_tsd(tsdn),
tcache, ptr, szind, slow_path);
}
} else {
extent_t *extent = iealloc(tsdn, ptr);

View File

@ -13,6 +13,7 @@ arena_cleanup
arena_dalloc
arena_dalloc_bin_junked_locked
arena_dalloc_junk_small
arena_dalloc_no_tcache
arena_dalloc_promoted
arena_dalloc_small
arena_decay
@ -69,6 +70,7 @@ arena_ralloc_no_move
arena_reset
arena_salloc
arena_sdalloc
arena_sdalloc_no_tcache
arena_set
arena_slab_regind
arena_stats_init
@ -528,6 +530,7 @@ tsd_nominal
tsd_prof_tdata_get
tsd_prof_tdata_set
tsd_prof_tdatap_get
tsd_rtree_ctx
tsd_rtree_ctxp_get
tsd_rtree_leaf_elm_witnessesp_get
tsd_set

View File

@ -4,20 +4,21 @@
#ifndef JEMALLOC_ENABLE_INLINE
malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
tsd_t *tsd_fetch_impl(bool init);
tsd_t *tsd_fetch(void);
tsdn_t *tsd_tsdn(tsd_t *tsd);
bool tsd_nominal(tsd_t *tsd);
tsd_t *tsd_fetch_impl(bool init);
tsd_t *tsd_fetch(void);
tsdn_t *tsd_tsdn(tsd_t *tsd);
bool tsd_nominal(tsd_t *tsd);
#define O(n, t, gs, c) \
t *tsd_##n##p_get(tsd_t *tsd); \
t tsd_##n##_get(tsd_t *tsd); \
void tsd_##n##_set(tsd_t *tsd, t n);
t *tsd_##n##p_get(tsd_t *tsd); \
t tsd_##n##_get(tsd_t *tsd); \
void tsd_##n##_set(tsd_t *tsd, t n);
MALLOC_TSD
#undef O
tsdn_t *tsdn_fetch(void);
bool tsdn_null(const tsdn_t *tsdn);
tsd_t *tsdn_tsd(tsdn_t *tsdn);
rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
tsdn_t *tsdn_fetch(void);
bool tsdn_null(const tsdn_t *tsdn);
tsd_t *tsdn_tsd(tsdn_t *tsdn);
rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@ -108,6 +109,11 @@ tsdn_tsd(tsdn_t *tsdn) {
return &tsdn->tsd;
}
JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
tsd_rtree_ctx(tsd_t *tsd) {
return tsd_rtree_ctxp_get(tsd);
}
JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
/*
@ -119,7 +125,7 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
return fallback;
}
return tsd_rtree_ctxp_get(tsdn_tsd(tsdn));
return tsd_rtree_ctx(tsdn_tsd(tsdn));
}
#endif