Reduce the memory overhead of sampled small allocations
Previously, small allocations which were sampled as part of heap profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory usage becomes problematic when the page size is increased, as noted in #2358. Small allocations are now rounded up to the nearest multiple of `PAGE` instead, reducing the memory overhead by a factor of 4 in the most extreme cases.
This commit is contained in:
committed by
Qi Wang
parent
e1338703ef
commit
5a858c64d6
64
src/arena.c
64
src/arena.c
@@ -1191,7 +1191,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
|
||||
|
||||
void *
|
||||
arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
|
||||
bool zero) {
|
||||
bool zero, bool slab) {
|
||||
assert(!tsdn_null(tsdn) || arena != NULL);
|
||||
|
||||
if (likely(!tsdn_null(tsdn))) {
|
||||
@@ -1201,18 +1201,19 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (likely(size <= SC_SMALL_MAXCLASS)) {
|
||||
if (likely(slab)) {
|
||||
assert(sz_can_use_slab(size));
|
||||
return arena_malloc_small(tsdn, arena, ind, zero);
|
||||
} else {
|
||||
return large_malloc(tsdn, arena, sz_index2size(ind), zero);
|
||||
}
|
||||
return large_malloc(tsdn, arena, sz_index2size(ind), zero);
|
||||
}
|
||||
|
||||
void *
|
||||
arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
|
||||
bool zero, tcache_t *tcache) {
|
||||
void *ret;
|
||||
|
||||
if (usize <= SC_SMALL_MAXCLASS) {
|
||||
bool zero, bool slab, tcache_t *tcache) {
|
||||
if (slab) {
|
||||
assert(sz_can_use_slab(usize));
|
||||
/* Small; alignment doesn't require special slab placement. */
|
||||
|
||||
/* usize should be a result of sz_sa2u() */
|
||||
@@ -1223,27 +1224,26 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
|
||||
*/
|
||||
assert(alignment <= PAGE);
|
||||
|
||||
ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
|
||||
zero, tcache, true);
|
||||
return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
|
||||
zero, slab, tcache, true);
|
||||
} else {
|
||||
if (likely(alignment <= CACHELINE)) {
|
||||
ret = large_malloc(tsdn, arena, usize, zero);
|
||||
return large_malloc(tsdn, arena, usize, zero);
|
||||
} else {
|
||||
ret = large_palloc(tsdn, arena, usize, alignment, zero);
|
||||
return large_palloc(tsdn, arena, usize, alignment, zero);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
|
||||
arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
|
||||
cassert(config_prof);
|
||||
assert(ptr != NULL);
|
||||
assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
|
||||
assert(usize <= SC_SMALL_MAXCLASS);
|
||||
assert(isalloc(tsdn, ptr) == bumped_usize);
|
||||
assert(sz_can_use_slab(usize));
|
||||
|
||||
if (config_opt_safety_checks) {
|
||||
safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
|
||||
safety_check_set_redzone(ptr, usize, bumped_usize);
|
||||
}
|
||||
|
||||
edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
|
||||
@@ -1259,13 +1259,19 @@ static size_t
|
||||
arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
|
||||
cassert(config_prof);
|
||||
assert(ptr != NULL);
|
||||
size_t usize = isalloc(tsdn, ptr);
|
||||
size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
|
||||
assert(bumped_usize <= SC_LARGE_MINCLASS &&
|
||||
PAGE_CEILING(bumped_usize) == bumped_usize);
|
||||
assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
|
||||
szind_t szind = sz_size2index(bumped_usize);
|
||||
|
||||
edata_szind_set(edata, SC_NBINS);
|
||||
emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
|
||||
edata_szind_set(edata, szind);
|
||||
emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
|
||||
|
||||
assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
|
||||
assert(isalloc(tsdn, ptr) == bumped_usize);
|
||||
|
||||
return SC_LARGE_MINCLASS;
|
||||
return bumped_usize;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1282,10 +1288,10 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
|
||||
* Currently, we only do redzoning for small sampled
|
||||
* allocations.
|
||||
*/
|
||||
assert(bumped_usize == SC_LARGE_MINCLASS);
|
||||
safety_check_verify_redzone(ptr, usize, bumped_usize);
|
||||
}
|
||||
if (bumped_usize <= tcache_maxclass && tcache != NULL) {
|
||||
if (bumped_usize >= SC_LARGE_MINCLASS &&
|
||||
bumped_usize <= tcache_maxclass && tcache != NULL) {
|
||||
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
|
||||
sz_size2index(bumped_usize), slow_path);
|
||||
} else {
|
||||
@@ -1443,28 +1449,30 @@ done:
|
||||
|
||||
static void *
|
||||
arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
|
||||
size_t alignment, bool zero, tcache_t *tcache) {
|
||||
size_t alignment, bool zero, bool slab, tcache_t *tcache) {
|
||||
if (alignment == 0) {
|
||||
return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
|
||||
zero, tcache, true);
|
||||
zero, slab, tcache, true);
|
||||
}
|
||||
usize = sz_sa2u(usize, alignment);
|
||||
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
|
||||
return NULL;
|
||||
}
|
||||
return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
|
||||
return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
|
||||
tcache, arena);
|
||||
}
|
||||
|
||||
void *
|
||||
arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
|
||||
size_t size, size_t alignment, bool zero, tcache_t *tcache,
|
||||
size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
|
||||
hook_ralloc_args_t *hook_args) {
|
||||
size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
|
||||
if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (likely(usize <= SC_SMALL_MAXCLASS)) {
|
||||
if (likely(slab)) {
|
||||
assert(sz_can_use_slab(usize));
|
||||
/* Try to avoid moving the allocation. */
|
||||
UNUSED size_t newsize;
|
||||
if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
|
||||
@@ -1488,7 +1496,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
|
||||
* object. In that case, fall back to allocating new space and copying.
|
||||
*/
|
||||
void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
|
||||
zero, tcache);
|
||||
zero, slab, tcache);
|
||||
if (ret == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -2360,7 +2360,7 @@ arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
|
||||
/* ind is ignored if dopts->alignment > 0. */
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
|
||||
size_t size, size_t usize, szind_t ind) {
|
||||
size_t size, size_t usize, szind_t ind, bool slab) {
|
||||
/* Fill in the tcache. */
|
||||
tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
|
||||
sopts->slow, /* is_alloc */ true);
|
||||
@@ -2372,12 +2372,12 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
|
||||
}
|
||||
|
||||
if (unlikely(dopts->alignment != 0)) {
|
||||
return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
|
||||
dopts->zero, tcache, arena);
|
||||
return ipalloct_explicit_slab(tsd_tsdn(tsd), usize,
|
||||
dopts->alignment, dopts->zero, slab, tcache, arena);
|
||||
}
|
||||
|
||||
return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
|
||||
arena, sopts->slow);
|
||||
return iallocztm_explicit_slab(tsd_tsdn(tsd), size, ind, dopts->zero,
|
||||
slab, tcache, false, arena, sopts->slow);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void *
|
||||
@@ -2385,28 +2385,26 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
|
||||
size_t usize, szind_t ind) {
|
||||
void *ret;
|
||||
|
||||
dopts->alignment = prof_sample_align(usize, dopts->alignment);
|
||||
/*
|
||||
* For small allocations, sampling bumps the usize. If so, we allocate
|
||||
* from the ind_large bucket.
|
||||
* If the allocation is small enough that it would normally be allocated
|
||||
* on a slab, we need to take additional steps to ensure that it gets
|
||||
* its own extent instead.
|
||||
*/
|
||||
szind_t ind_large;
|
||||
|
||||
dopts->alignment = prof_sample_align(dopts->alignment);
|
||||
if (usize <= SC_SMALL_MAXCLASS) {
|
||||
assert(((dopts->alignment == 0) ?
|
||||
sz_s2u(SC_LARGE_MINCLASS) :
|
||||
sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
|
||||
== SC_LARGE_MINCLASS);
|
||||
ind_large = sz_size2index(SC_LARGE_MINCLASS);
|
||||
size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
|
||||
if (sz_can_use_slab(usize)) {
|
||||
assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
|
||||
size_t bumped_usize = sz_sa2u(usize, dopts->alignment);
|
||||
szind_t bumped_ind = sz_size2index(bumped_usize);
|
||||
dopts->tcache_ind = TCACHE_IND_NONE;
|
||||
ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
|
||||
bumped_usize, ind_large);
|
||||
bumped_usize, bumped_ind, /* slab */ false);
|
||||
if (unlikely(ret == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
arena_prof_promote(tsd_tsdn(tsd), ret, usize);
|
||||
arena_prof_promote(tsd_tsdn(tsd), ret, usize, bumped_usize);
|
||||
} else {
|
||||
ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
|
||||
ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind,
|
||||
/* slab */ false);
|
||||
}
|
||||
assert(prof_sample_aligned(ret));
|
||||
|
||||
@@ -2532,9 +2530,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
|
||||
emap_alloc_ctx_t alloc_ctx;
|
||||
if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
|
||||
alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
|
||||
alloc_ctx.slab = sz_can_use_slab(usize);
|
||||
allocation = imalloc_no_sample(
|
||||
sopts, dopts, tsd, usize, usize, ind);
|
||||
sopts, dopts, tsd, usize, usize, ind,
|
||||
alloc_ctx.slab);
|
||||
} else if ((uintptr_t)tctx > (uintptr_t)1U) {
|
||||
allocation = imalloc_sample(
|
||||
sopts, dopts, tsd, usize, ind);
|
||||
@@ -2551,7 +2550,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
|
||||
} else {
|
||||
assert(!opt_prof);
|
||||
allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
|
||||
ind);
|
||||
ind, sz_can_use_slab(usize));
|
||||
if (unlikely(allocation == NULL)) {
|
||||
goto label_oom;
|
||||
}
|
||||
@@ -3314,18 +3313,25 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
alignment = prof_sample_align(alignment);
|
||||
if (usize <= SC_SMALL_MAXCLASS) {
|
||||
p = iralloct(tsdn, old_ptr, old_usize,
|
||||
SC_LARGE_MINCLASS, alignment, zero, tcache,
|
||||
arena, hook_args);
|
||||
alignment = prof_sample_align(usize, alignment);
|
||||
/*
|
||||
* If the allocation is small enough that it would normally be allocated
|
||||
* on a slab, we need to take additional steps to ensure that it gets
|
||||
* its own extent instead.
|
||||
*/
|
||||
if (sz_can_use_slab(usize)) {
|
||||
size_t bumped_usize = sz_sa2u(usize, alignment);
|
||||
p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
|
||||
bumped_usize, alignment, zero, /* slab */ false,
|
||||
tcache, arena, hook_args);
|
||||
if (p == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
arena_prof_promote(tsdn, p, usize);
|
||||
arena_prof_promote(tsdn, p, usize, bumped_usize);
|
||||
} else {
|
||||
p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
|
||||
tcache, arena, hook_args);
|
||||
p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, usize,
|
||||
alignment, zero, /* slab */ false, tcache, arena,
|
||||
hook_args);
|
||||
}
|
||||
assert(prof_sample_aligned(p));
|
||||
|
||||
@@ -3348,7 +3354,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
|
||||
usize, alignment, zero, tcache, arena, tctx, hook_args);
|
||||
} else {
|
||||
p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
|
||||
zero, tcache, arena, hook_args);
|
||||
usize, zero, tcache, arena, hook_args);
|
||||
}
|
||||
if (unlikely(p == NULL)) {
|
||||
prof_alloc_rollback(tsd, tctx);
|
||||
@@ -3407,7 +3413,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
|
||||
}
|
||||
} else {
|
||||
p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
|
||||
zero, tcache, arena, &hook_args);
|
||||
usize, zero, tcache, arena, &hook_args);
|
||||
if (unlikely(p == NULL)) {
|
||||
goto label_oom;
|
||||
}
|
||||
|
@@ -33,7 +33,7 @@
|
||||
/* Data. */
|
||||
|
||||
/* Actual operating system page size, detected during bootstrap, <= PAGE. */
|
||||
static size_t os_page;
|
||||
size_t os_page;
|
||||
|
||||
#ifndef _WIN32
|
||||
# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
|
||||
|
Reference in New Issue
Block a user