server-skynet-source-3rd-je.../include/jemalloc/internal/arena_inlines_b.h
Kevin Svetlitski 5a858c64d6 Reduce the memory overhead of sampled small allocations
Previously, small allocations which were sampled as part of heap
profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory
usage becomes problematic when the page size is increased, as noted in #2358.

Small allocations are now rounded up to the nearest multiple of `PAGE`
instead, reducing the memory overhead by a factor of 4 in the most
extreme cases.
2023-07-03 16:19:06 -07:00

599 lines
18 KiB
C

#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
#include "jemalloc/internal/div.h"
#include "jemalloc/internal/emap.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
static inline arena_t *
arena_get_from_edata(edata_t *edata) {
return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE arena_t *
arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
if (arena != NULL) {
return arena;
}
/*
* For huge allocations, use the dedicated huge arena if both are true:
* 1) is using auto arena selection (i.e. arena == NULL), and 2) the
* thread is not assigned to a manual arena.
*/
arena_t *tsd_arena = tsd_arena_get(tsd);
if (tsd_arena == NULL) {
tsd_arena = arena_choose(tsd, NULL);
}
size_t threshold = atomic_load_zu(
&tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) {
return arena_choose_huge(tsd);
}
return tsd_arena;
}
JEMALLOC_ALWAYS_INLINE bool
large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
if (!config_opt_safety_checks) {
return false;
}
/*
* Eagerly detect double free and sized dealloc bugs for large sizes.
* The cost is low enough (as edata will be accessed anyway) to be
* enabled all the time.
*/
if (unlikely(edata == NULL ||
edata_state_get(edata) != extent_state_active)) {
safety_check_fail("Invalid deallocation detected: "
"pages being freed (%p) not currently active, "
"possibly caused by double free bugs.", ptr);
return true;
}
size_t input_size = sz_index2size(szind);
if (unlikely(input_size != edata_usize_get(edata))) {
safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
/* true_size */ edata_usize_get(edata), input_size);
return true;
}
return false;
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
prof_info_t *prof_info, bool reset_recent) {
cassert(config_prof);
assert(ptr != NULL);
assert(prof_info != NULL);
edata_t *edata = NULL;
bool is_slab;
/* Static check. */
if (alloc_ctx == NULL) {
edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
ptr);
is_slab = edata_slab_get(edata);
} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
ptr);
}
if (unlikely(!is_slab)) {
/* edata must have been initialized at this point. */
assert(edata != NULL);
if (reset_recent &&
large_dalloc_safety_checks(edata, ptr,
edata_szind_get(edata))) {
prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
return;
}
large_prof_info_get(tsd, edata, prof_info, reset_recent);
} else {
prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
/*
* No need to set other fields in prof_info; they will never be
* accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
*/
}
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
emap_alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
assert(ptr != NULL);
/* Static check. */
if (alloc_ctx == NULL) {
edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
&arena_emap_global, ptr);
if (unlikely(!edata_slab_get(edata))) {
large_prof_tctx_reset(edata);
}
} else {
if (unlikely(!alloc_ctx->slab)) {
edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
&arena_emap_global, ptr);
large_prof_tctx_reset(edata);
}
}
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
cassert(config_prof);
assert(ptr != NULL);
edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
ptr);
assert(!edata_slab_get(edata));
large_prof_tctx_reset(edata);
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
size_t size) {
cassert(config_prof);
assert(!edata_slab_get(edata));
large_prof_info_set(edata, tctx, size);
}
JEMALLOC_ALWAYS_INLINE void
arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
if (unlikely(tsdn_null(tsdn))) {
return;
}
tsd_t *tsd = tsdn_tsd(tsdn);
/*
* We use the ticker_geom_t to avoid having per-arena state in the tsd.
* Instead of having a countdown-until-decay timer running for every
* arena in every thread, we flip a coin once per tick, whose
* probability of coming up heads is 1/nticks; this is effectively the
* operation of the ticker_geom_t. Each arena has the same chance of a
* coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can
* use a single ticker for all of them.
*/
ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
uint64_t *prng_state = tsd_prng_statep_get(tsd);
if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
tsd_reentrancy_level_get(tsd) > 0))) {
arena_decay(tsdn, arena, false, false);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
arena_decay_ticks(tsdn, arena, 1);
}
JEMALLOC_ALWAYS_INLINE void *
arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
bool slab, tcache_t *tcache, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
if (likely(tcache != NULL)) {
if (likely(slab)) {
assert(sz_can_use_slab(size));
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
} else if (likely(size <= tcache_maxclass)) {
return tcache_alloc_large(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
}
/* (size > tcache_maxclass) case falls through. */
}
return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
}
JEMALLOC_ALWAYS_INLINE arena_t *
arena_aalloc(tsdn_t *tsdn, const void *ptr) {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
unsigned arena_ind = edata_arena_ind_get(edata);
return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE size_t
arena_salloc(tsdn_t *tsdn, const void *ptr) {
assert(ptr != NULL);
emap_alloc_ctx_t alloc_ctx;
emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
assert(alloc_ctx.szind != SC_NSIZES);
return sz_index2size(alloc_ctx.szind);
}
JEMALLOC_ALWAYS_INLINE size_t
arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
/*
* Return 0 if ptr is not within an extent managed by jemalloc. This
* function has two extra costs relative to isalloc():
* - The rtree calls cannot claim to be dependent lookups, which induces
* rtree lookup load dependencies.
* - The lookup may fail, so there is an extra branch to check for
* failure.
*/
emap_full_alloc_ctx_t full_alloc_ctx;
bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
ptr, &full_alloc_ctx);
if (missing) {
return 0;
}
if (full_alloc_ctx.edata == NULL) {
return 0;
}
assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active);
/* Only slab members should be looked up via interior pointers. */
assert(edata_addr_get(full_alloc_ctx.edata) == ptr
|| edata_slab_get(full_alloc_ctx.edata));
assert(full_alloc_ctx.szind != SC_NSIZES);
return sz_index2size(full_alloc_ctx.szind);
}
static inline void
arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, NULL, true);
} else {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
ptr);
if (large_dalloc_safety_checks(edata, ptr, szind)) {
/* See the comment in isfree. */
return;
}
large_dalloc(tsdn, edata);
}
}
static inline void
arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
assert(ptr != NULL);
emap_alloc_ctx_t alloc_ctx;
emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
if (config_debug) {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
ptr);
assert(alloc_ctx.szind == edata_szind_get(edata));
assert(alloc_ctx.szind < SC_NSIZES);
assert(alloc_ctx.slab == edata_slab_get(edata));
}
if (likely(alloc_ctx.slab)) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
bool slow_path) {
if (szind < nhbins) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
}
} else {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
ptr);
if (large_dalloc_safety_checks(edata, ptr, szind)) {
/* See the comment in isfree. */
return;
}
large_dalloc(tsdn, edata);
}
}
/* Find the region index of a pointer. */
JEMALLOC_ALWAYS_INLINE size_t
arena_slab_regind_impl(div_info_t* div_info, szind_t binind,
edata_t *slab, const void *ptr) {
size_t diff, regind;
/* Freeing a pointer outside the slab can cause assertion failure. */
assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
/* Freeing an interior pointer can cause assertion failure. */
assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
(uintptr_t)bin_infos[binind].reg_size == 0);
diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
/* Avoid doing division with a variable divisor. */
regind = div_compute(div_info, diff);
assert(regind < bin_infos[binind].nregs);
return regind;
}
/* Checks whether ptr is currently active in the arena. */
JEMALLOC_ALWAYS_INLINE bool
arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
if (!config_debug) {
return false;
}
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
szind_t binind = edata_szind_get(edata);
div_info_t div_info = arena_binind_div_info[binind];
/*
* Calls the internal function arena_slab_regind_impl because the
* safety check does not require a lock.
*/
size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
slab_data_t *slab_data = edata_slab_data_get(edata);
const bin_info_t *bin_info = &bin_infos[binind];
assert(edata_nfree_get(edata) < bin_info->nregs);
if (unlikely(!bitmap_get(slab_data->bitmap, &bin_info->bitmap_info,
regind))) {
safety_check_fail(
"Invalid deallocation detected: the pointer being freed (%p) not "
"currently active, possibly caused by double free bugs.\n", ptr);
return true;
}
return false;
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
if (unlikely(tcache == NULL)) {
arena_dalloc_no_tcache(tsdn, ptr);
return;
}
emap_alloc_ctx_t alloc_ctx;
if (caller_alloc_ctx != NULL) {
alloc_ctx = *caller_alloc_ctx;
} else {
util_assume(!tsdn_null(tsdn));
emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
&alloc_ctx);
}
if (config_debug) {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
ptr);
assert(alloc_ctx.szind == edata_szind_get(edata));
assert(alloc_ctx.szind < SC_NSIZES);
assert(alloc_ctx.slab == edata_slab_get(edata));
}
if (likely(alloc_ctx.slab)) {
/* Small allocation. */
if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
return;
}
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
alloc_ctx.szind, slow_path);
} else {
arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
slow_path);
}
}
static inline void
arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
assert(ptr != NULL);
assert(size <= SC_LARGE_MAXCLASS);
emap_alloc_ctx_t alloc_ctx;
if (!config_prof || !opt_prof) {
/*
* There is no risk of being confused by a promoted sampled
* object, so base szind and slab on the given size.
*/
alloc_ctx.szind = sz_size2index(size);
alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
}
if ((config_prof && opt_prof) || config_debug) {
emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
&alloc_ctx);
assert(alloc_ctx.szind == sz_size2index(size));
assert((config_prof && opt_prof)
|| alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
if (config_debug) {
edata_t *edata = emap_edata_lookup(tsdn,
&arena_emap_global, ptr);
assert(alloc_ctx.szind == edata_szind_get(edata));
assert(alloc_ctx.slab == edata_slab_get(edata));
}
}
if (likely(alloc_ctx.slab)) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
assert(size <= SC_LARGE_MAXCLASS);
if (unlikely(tcache == NULL)) {
arena_sdalloc_no_tcache(tsdn, ptr, size);
return;
}
emap_alloc_ctx_t alloc_ctx;
if (config_prof && opt_prof) {
if (caller_alloc_ctx == NULL) {
/* Uncommon case and should be a static check. */
emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
&alloc_ctx);
assert(alloc_ctx.szind == sz_size2index(size));
} else {
alloc_ctx = *caller_alloc_ctx;
}
} else {
/*
* There is no risk of being confused by a promoted sampled
* object, so base szind and slab on the given size.
*/
alloc_ctx.szind = sz_size2index(size);
alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
}
if (config_debug) {
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
ptr);
assert(alloc_ctx.szind == edata_szind_get(edata));
assert(alloc_ctx.slab == edata_slab_get(edata));
}
if (likely(alloc_ctx.slab)) {
/* Small allocation. */
if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
return;
}
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
alloc_ctx.szind, slow_path);
} else {
arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
slow_path);
}
}
static inline void
arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
size_t alignment) {
assert(edata_base_get(edata) == edata_addr_get(edata));
if (alignment < PAGE) {
unsigned lg_range = LG_PAGE -
lg_floor(CACHELINE_CEILING(alignment));
size_t r;
if (!tsdn_null(tsdn)) {
tsd_t *tsd = tsdn_tsd(tsdn);
r = (size_t)prng_lg_range_u64(
tsd_prng_statep_get(tsd), lg_range);
} else {
uint64_t stack_value = (uint64_t)(uintptr_t)&r;
r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
}
uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
lg_range);
edata->e_addr = (void *)((uintptr_t)edata->e_addr +
random_offset);
assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
edata->e_addr);
}
}
/*
* The dalloc bin info contains just the information that the common paths need
* during tcache flushes. By force-inlining these paths, and using local copies
* of data (so that the compiler knows it's constant), we avoid a whole bunch of
* redundant loads and stores by leaving this information in registers.
*/
typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
struct arena_dalloc_bin_locked_info_s {
div_info_t div_info;
uint32_t nregs;
uint64_t ndalloc;
};
JEMALLOC_ALWAYS_INLINE size_t
arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
edata_t *slab, const void *ptr) {
size_t regind = arena_slab_regind_impl(&info->div_info, binind, slab, ptr);
return regind;
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
szind_t binind) {
info->div_info = arena_binind_div_info[binind];
info->nregs = bin_infos[binind].nregs;
info->ndalloc = 0;
}
/*
* Does the deallocation work associated with freeing a single pointer (a
* "step") in between a arena_dalloc_bin_locked begin and end call.
*
* Returns true if arena_slab_dalloc must be called on slab. Doesn't do
* stats updates, which happen during finish (this lets running counts get left
* in a register).
*/
JEMALLOC_ALWAYS_INLINE bool
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
void *ptr) {
const bin_info_t *bin_info = &bin_infos[binind];
size_t regind = arena_slab_regind(info, binind, slab, ptr);
slab_data_t *slab_data = edata_slab_data_get(slab);
assert(edata_nfree_get(slab) < bin_info->nregs);
/* Freeing an unallocated pointer can cause assertion failure. */
assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
edata_nfree_inc(slab);
if (config_stats) {
info->ndalloc++;
}
unsigned nfree = edata_nfree_get(slab);
if (nfree == bin_info->nregs) {
arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
bin);
return true;
} else if (nfree == 1 && slab != bin->slabcur) {
arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
bin);
}
return false;
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
arena_dalloc_bin_locked_info_t *info) {
if (config_stats) {
bin->stats.ndalloc += info->ndalloc;
assert(bin->stats.curregs >= (size_t)info->ndalloc);
bin->stats.curregs -= (size_t)info->ndalloc;
}
}
static inline bin_t *
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
return shard0 + binshard;
}
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */