Reduce the memory overhead of sampled small allocations
Previously, small allocations which were sampled as part of heap profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory usage becomes problematic when the page size is increased, as noted in #2358. Small allocations are now rounded up to the nearest multiple of `PAGE` instead, reducing the memory overhead by a factor of 4 in the most extreme cases.
This commit is contained in:
committed by
Qi Wang
parent
e1338703ef
commit
5a858c64d6
@@ -65,10 +65,11 @@ void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
|
||||
const unsigned nfill);
|
||||
|
||||
void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
|
||||
szind_t ind, bool zero);
|
||||
szind_t ind, bool zero, bool slab);
|
||||
void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
|
||||
size_t alignment, bool zero, tcache_t *tcache);
|
||||
void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
|
||||
size_t alignment, bool zero, bool slab, tcache_t *tcache);
|
||||
void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize,
|
||||
size_t bumped_usize);
|
||||
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
|
||||
bool slow_path);
|
||||
void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
|
||||
@@ -81,7 +82,7 @@ void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
|
||||
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
size_t extra, bool zero, size_t *newsize);
|
||||
void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
|
||||
size_t size, size_t alignment, bool zero, tcache_t *tcache,
|
||||
size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
|
||||
hook_ralloc_args_t *hook_args);
|
||||
dss_prec_t arena_dss_prec_get(arena_t *arena);
|
||||
ehooks_t *arena_get_ehooks(arena_t *arena);
|
||||
|
@@ -182,23 +182,22 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
|
||||
tcache_t *tcache, bool slow_path) {
|
||||
bool slab, tcache_t *tcache, bool slow_path) {
|
||||
assert(!tsdn_null(tsdn) || tcache == NULL);
|
||||
|
||||
if (likely(tcache != NULL)) {
|
||||
if (likely(size <= SC_SMALL_MAXCLASS)) {
|
||||
if (likely(slab)) {
|
||||
assert(sz_can_use_slab(size));
|
||||
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
|
||||
tcache, size, ind, zero, slow_path);
|
||||
}
|
||||
if (likely(size <= tcache_maxclass)) {
|
||||
} else if (likely(size <= tcache_maxclass)) {
|
||||
return tcache_alloc_large(tsdn_tsd(tsdn), arena,
|
||||
tcache, size, ind, zero, slow_path);
|
||||
}
|
||||
/* (size > tcache_maxclass) case falls through. */
|
||||
assert(size > tcache_maxclass);
|
||||
}
|
||||
|
||||
return arena_malloc_hard(tsdn, arena, size, ind, zero);
|
||||
return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE arena_t *
|
||||
|
@@ -52,10 +52,12 @@ isalloc(tsdn_t *tsdn, const void *ptr) {
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
|
||||
bool is_internal, arena_t *arena, bool slow_path) {
|
||||
iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
|
||||
bool slab, tcache_t *tcache, bool is_internal, arena_t *arena,
|
||||
bool slow_path) {
|
||||
void *ret;
|
||||
|
||||
assert(!slab || sz_can_use_slab(size)); /* slab && large is illegal */
|
||||
assert(!is_internal || tcache == NULL);
|
||||
assert(!is_internal || arena == NULL || arena_is_auto(arena));
|
||||
if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
|
||||
@@ -63,13 +65,21 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
|
||||
WITNESS_RANK_CORE, 0);
|
||||
}
|
||||
|
||||
ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
|
||||
ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path);
|
||||
if (config_stats && is_internal && likely(ret != NULL)) {
|
||||
arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
|
||||
bool is_internal, arena_t *arena, bool slow_path) {
|
||||
bool slab = sz_can_use_slab(size);
|
||||
return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache,
|
||||
is_internal, arena, slow_path);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
|
||||
return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
|
||||
@@ -77,10 +87,11 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
tcache_t *tcache, bool is_internal, arena_t *arena) {
|
||||
ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
|
||||
void *ret;
|
||||
|
||||
assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
|
||||
assert(usize != 0);
|
||||
assert(usize == sz_sa2u(usize, alignment));
|
||||
assert(!is_internal || tcache == NULL);
|
||||
@@ -88,7 +99,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
|
||||
WITNESS_RANK_CORE, 0);
|
||||
|
||||
ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
|
||||
ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
|
||||
assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
|
||||
if (config_stats && is_internal && likely(ret != NULL)) {
|
||||
arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
|
||||
@@ -96,12 +107,26 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
tcache_t *tcache, bool is_internal, arena_t *arena) {
|
||||
return ipallocztm_explicit_slab(tsdn, usize, alignment, zero,
|
||||
sz_can_use_slab(usize), tcache, is_internal, arena);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
|
||||
tcache_t *tcache, arena_t *arena) {
|
||||
return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
|
||||
bool zero, bool slab, tcache_t *tcache, arena_t *arena) {
|
||||
return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab,
|
||||
tcache, false, arena);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
|
||||
return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
|
||||
@@ -146,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
|
||||
size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
|
||||
hook_ralloc_args_t *hook_args) {
|
||||
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
|
||||
WITNESS_RANK_CORE, 0);
|
||||
@@ -157,7 +182,8 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
|
||||
return NULL;
|
||||
}
|
||||
p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
|
||||
p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
|
||||
tcache, arena);
|
||||
if (p == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -184,8 +210,9 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
* passed-around anywhere.
|
||||
*/
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
|
||||
bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
|
||||
iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
|
||||
hook_ralloc_args_t *hook_args)
|
||||
{
|
||||
assert(ptr != NULL);
|
||||
assert(size != 0);
|
||||
@@ -199,18 +226,28 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
|
||||
* and copy.
|
||||
*/
|
||||
return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
|
||||
zero, tcache, arena, hook_args);
|
||||
zero, slab, tcache, arena, hook_args);
|
||||
}
|
||||
|
||||
return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
|
||||
tcache, hook_args);
|
||||
slab, tcache, hook_args);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
|
||||
size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
|
||||
hook_ralloc_args_t *hook_args)
|
||||
{
|
||||
bool slab = sz_can_use_slab(usize);
|
||||
return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
|
||||
slab, tcache, arena, hook_args);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
|
||||
bool zero, hook_ralloc_args_t *hook_args) {
|
||||
return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
|
||||
tcache_get(tsd), NULL, hook_args);
|
||||
size_t usize, bool zero, hook_ralloc_args_t *hook_args) {
|
||||
return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, usize,
|
||||
zero, tcache_get(tsd), NULL, hook_args);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
@@ -405,7 +442,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
prof_sample_aligned(const void *ptr) {
|
||||
return ((uintptr_t)ptr & PAGE_MASK) == 0;
|
||||
return ((uintptr_t)ptr & PROF_SAMPLE_ALIGNMENT_MASK) == 0;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
|
@@ -1,6 +1,9 @@
|
||||
#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
|
||||
#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
|
||||
|
||||
/* Actual operating system page size, detected during bootstrap, <= PAGE. */
|
||||
extern size_t os_page;
|
||||
|
||||
/* Page size. LG_PAGE is determined by the configure script. */
|
||||
#ifdef PAGE_MASK
|
||||
# undef PAGE_MASK
|
||||
|
@@ -239,14 +239,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
prof_sample_align(size_t orig_align) {
|
||||
prof_sample_align(size_t usize, size_t orig_align) {
|
||||
/*
|
||||
* Enforce page alignment, so that sampled allocations can be identified
|
||||
* Enforce alignment, so that sampled allocations can be identified
|
||||
* w/o metadata lookup.
|
||||
*/
|
||||
assert(opt_prof);
|
||||
return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
|
||||
orig_align;
|
||||
return (orig_align < PROF_SAMPLE_ALIGNMENT &&
|
||||
(sz_can_use_slab(usize) || opt_cache_oblivious)) ?
|
||||
PROF_SAMPLE_ALIGNMENT : orig_align;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
|
@@ -80,4 +80,12 @@ typedef struct prof_recent_s prof_recent_t;
|
||||
/* Thread name storage size limit. */
|
||||
#define PROF_THREAD_NAME_MAX_LEN 16
|
||||
|
||||
/*
|
||||
* Minimum required alignment for sampled allocations. Over-aligning sampled
|
||||
* allocations allows us to quickly identify them on the dalloc path without
|
||||
* resorting to metadata lookup.
|
||||
*/
|
||||
#define PROF_SAMPLE_ALIGNMENT PAGE
|
||||
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
|
||||
|
@@ -3,6 +3,8 @@
|
||||
|
||||
#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
|
||||
|
||||
#include "jemalloc/internal/pages.h"
|
||||
|
||||
void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
|
||||
size_t true_size, size_t input_size);
|
||||
void safety_check_fail(const char *format, ...);
|
||||
@@ -12,22 +14,50 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
|
||||
/* Can set to NULL for a default. */
|
||||
void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
|
||||
|
||||
#define REDZONE_SIZE ((size_t) 32)
|
||||
#define REDZONE_FILL_VALUE 0xBC
|
||||
|
||||
/*
|
||||
* Normally the redzone extends `REDZONE_SIZE` bytes beyond the end of
|
||||
* the allocation. However, we don't let the redzone extend onto another
|
||||
* OS page because this would impose additional overhead if that page was
|
||||
* not already resident in memory.
|
||||
*/
|
||||
JEMALLOC_ALWAYS_INLINE const unsigned char *
|
||||
compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
|
||||
const unsigned char *ptr = (const unsigned char *) _ptr;
|
||||
const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
|
||||
&ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
|
||||
const unsigned char *page_end = (const unsigned char *)
|
||||
ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
|
||||
return redzone_end < page_end ? redzone_end : page_end;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
|
||||
assert(usize < bumped_usize);
|
||||
for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
|
||||
*((unsigned char *)ptr + i) = 0xBC;
|
||||
assert(usize <= bumped_usize);
|
||||
const unsigned char *redzone_end =
|
||||
compute_redzone_end(ptr, usize, bumped_usize);
|
||||
for (unsigned char *curr = &((unsigned char *)ptr)[usize];
|
||||
curr < redzone_end; curr++) {
|
||||
*curr = REDZONE_FILL_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
|
||||
{
|
||||
for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
|
||||
if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
|
||||
const unsigned char *redzone_end =
|
||||
compute_redzone_end(ptr, usize, bumped_usize);
|
||||
for (const unsigned char *curr= &((const unsigned char *)ptr)[usize];
|
||||
curr < redzone_end; curr++) {
|
||||
if (unlikely(*curr != REDZONE_FILL_VALUE)) {
|
||||
safety_check_fail("Use after free error\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef REDZONE_SIZE
|
||||
#undef REDZONE_FILL_VALUE
|
||||
|
||||
#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
|
||||
|
@@ -365,6 +365,21 @@ sz_sa2u(size_t size, size_t alignment) {
|
||||
return usize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Under normal circumstances, whether or not to use a slab
|
||||
* to satisfy an allocation depends solely on the allocation's
|
||||
* effective size. However, this is *not* the case when an allocation
|
||||
* is sampled for profiling, in which case you *must not* use a slab
|
||||
* regardless of the effective size. Thus `sz_can_use_slab` is called
|
||||
* on the common path, but there exist `*_explicit_slab` variants of
|
||||
* several functions for handling the aforementioned case of
|
||||
* sampled allocations.
|
||||
*/
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
sz_can_use_slab(size_t size) {
|
||||
return size <= SC_SMALL_MAXCLASS;
|
||||
}
|
||||
|
||||
size_t sz_psz_quantize_floor(size_t size);
|
||||
size_t sz_psz_quantize_ceil(size_t size);
|
||||
|
||||
|
@@ -60,7 +60,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
if (unlikely(tcache_small_bin_disabled(binind, bin))) {
|
||||
/* stats and zero are handled directly by the arena. */
|
||||
return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
|
||||
binind, zero);
|
||||
binind, zero, /* slab */ true);
|
||||
}
|
||||
tcache_bin_flush_stashed(tsd, tcache, bin, binind,
|
||||
/* is_small */ true);
|
||||
|
Reference in New Issue
Block a user