Remove support for non-prof-promote heap profiling metadata.

Make promotion of sampled small objects to large objects mandatory, so
that profiling metadata can always be stored in the chunk map, rather
than requiring one pointer per small region in each small-region page
run.  In practice the non-prof-promote code was only useful when using
jemalloc to track all objects and report them as leaks at program exit.
However, Valgrind is at least as good a tool for this particular use
case.

Furthermore, the non-prof-promote code is getting in the way of
some optimizations that will make heap profiling much cheaper for the
predominant use case (sampling a small representative proportion of all
allocations).
This commit is contained in:
Jason Evans 2014-04-11 14:24:51 -07:00
parent f4e026f525
commit 9b0cbf0850
8 changed files with 28 additions and 110 deletions

View File

@ -110,7 +110,6 @@ struct arena_chunk_map_s {
* p : run page offset
* s : run size
* n : binind for size class; large objects set these to BININD_INVALID
* except for promoted allocations (see prof_promote)
* x : don't care
* - : 0
* + : 1
@ -216,8 +215,6 @@ struct arena_run_s {
* | ... |
* bitmap_offset | bitmap |
* | ... |
* ctx0_offset | ctx map |
* | ... |
* |--------------------|
* | redzone |
* reg0_offset | region 0 |
@ -270,12 +267,6 @@ struct arena_bin_info_s {
*/
bitmap_info_t bitmap_info;
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (config_prof == false || opt_prof == false).
*/
uint32_t ctx0_offset;
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
};
@ -492,7 +483,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr);
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
size_t arena_salloc(const void *ptr, bool demote);
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
@ -879,31 +870,16 @@ arena_prof_ctx_get(const void *ptr)
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote)
ret = (prof_ctx_t *)(uintptr_t)1U;
else {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
LG_PAGE));
size_t binind = arena_ptr_small_binind_get(ptr,
mapbits);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind;
regind = arena_run_regind(run, bin_info, ptr);
ret = *(prof_ctx_t **)((uintptr_t)run +
bin_info->ctx0_offset + (regind *
sizeof(prof_ctx_t *)));
}
} else
if ((mapbits & CHUNK_MAP_LARGE) == 0)
ret = (prof_ctx_t *)(uintptr_t)1U;
else
ret = arena_mapp_get(chunk, pageind)->prof_ctx;
return (ret);
}
JEMALLOC_INLINE void
arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind;
@ -916,31 +892,8 @@ arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
if (usize > SMALL_MAXCLASS || (prof_promote &&
((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk,
pageind) != 0))) {
assert(arena_mapbits_large_get(chunk, pageind) != 0);
if (arena_mapbits_large_get(chunk, pageind) != 0)
arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
} else {
assert(arena_mapbits_large_get(chunk, pageind) == 0);
if (prof_promote == false) {
size_t mapbits = arena_mapbits_get(chunk, pageind);
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
LG_PAGE));
size_t binind;
arena_bin_info_t *bin_info;
unsigned regind;
binind = arena_ptr_small_binind_get(ptr, mapbits);
bin_info = &arena_bin_info[binind];
regind = arena_run_regind(run, bin_info, ptr);
*((prof_ctx_t **)((uintptr_t)run +
bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t
*)))) = ctx;
}
}
}
JEMALLOC_ALWAYS_INLINE void *
@ -989,7 +942,7 @@ arena_salloc(const void *ptr, bool demote)
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
binind = arena_mapbits_binind_get(chunk, pageind);
if (binind == BININD_INVALID || (config_prof && demote == false &&
prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
arena_mapbits_large_get(chunk, pageind) != 0)) {
/*
* Large allocation. In the common case (demote == true), and
* as this is an inline function, most callers will only end up
@ -1007,10 +960,7 @@ arena_salloc(const void *ptr, bool demote)
assert(arena_mapbits_dirty_get(chunk, pageind) ==
arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
} else {
/*
* Small allocation (possibly promoted to a large object due to
* prof_promote).
*/
/* Small allocation (possibly promoted to a large object). */
assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
pageind)) == binind);

View File

@ -303,7 +303,6 @@ prof_mdump
prof_postfork_child
prof_postfork_parent
prof_prefork
prof_promote
prof_realloc
prof_sample_accum_update
prof_sample_threshold_update

View File

@ -220,12 +220,6 @@ extern char opt_prof_prefix[
*/
extern uint64_t prof_interval;
/*
* If true, promote small sampled objects to large objects, since small run
* headers do not have embedded profile context pointers.
*/
extern bool prof_promote;
void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt, unsigned nignore);
prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
@ -308,7 +302,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
prof_tdata_t *prof_tdata_get(bool create);
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
bool prof_sample_accum_update(size_t size);
void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
@ -405,7 +399,7 @@ prof_ctx_get(const void *ptr)
}
JEMALLOC_INLINE void
prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
@ -415,7 +409,7 @@ prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
arena_prof_ctx_set(ptr, usize, ctx);
arena_prof_ctx_set(ptr, ctx);
} else
huge_prof_ctx_set(ptr, ctx);
}
@ -471,7 +465,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
prof_ctx_set(ptr, usize, cnt->ctx);
prof_ctx_set(ptr, cnt->ctx);
cnt->epoch++;
/*********/
@ -491,7 +485,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
mb_write();
/*********/
} else
prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
}
JEMALLOC_INLINE void
@ -539,10 +533,10 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U) {
prof_ctx_set(ptr, usize, cnt->ctx);
prof_ctx_set(ptr, cnt->ctx);
cnt->epoch++;
} else if (ptr != NULL)
prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
/*********/
mb_write();
/*********/

View File

@ -94,9 +94,8 @@ cat <<EOF
/*
* The small_size2bin lookup table uses uint8_t to encode each bin index, so we
* cannot support more than 256 small size classes. Further constrain NBINS to
* 255 to support prof_promote, since all small size classes, plus a "not
* small" size class must be stored in 8 bits of arena_chunk_map_t's bits
* field.
* 255 since all small size classes, plus a "not small" size class must be
* stored in 8 bits of arena_chunk_map_t's bits field.
*/
#if (NBINS > 255)
# error "Too many small size classes"

View File

@ -354,7 +354,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
if (ret == NULL)
return (NULL);
} else {
if (config_prof && prof_promote && size == PAGE) {
if (config_prof && size == PAGE) {
arena_chunk_t *chunk =
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>

View File

@ -2373,7 +2373,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
uint32_t try_bitmap_offset, good_bitmap_offset;
uint32_t try_ctx0_offset, good_ctx0_offset;
uint32_t try_redzone0_offset, good_redzone0_offset;
assert(min_run_size >= PAGE);
@ -2428,14 +2427,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
try_bitmap_offset = try_hdr_size;
/* Add space for bitmap. */
try_hdr_size += bitmap_size(try_nregs);
if (config_prof && opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/* Add space for one (prof_ctx_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
} else
try_ctx0_offset = 0;
try_redzone0_offset = try_run_size - (try_nregs *
bin_info->reg_interval) - pad_size;
} while (try_hdr_size > try_redzone0_offset);
@ -2449,7 +2440,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
good_bitmap_offset = try_bitmap_offset;
good_ctx0_offset = try_ctx0_offset;
good_redzone0_offset = try_redzone0_offset;
/* Try more aggressive settings. */
@ -2469,16 +2459,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
try_bitmap_offset = try_hdr_size;
/* Add space for bitmap. */
try_hdr_size += bitmap_size(try_nregs);
if (config_prof && opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/*
* Add space for one (prof_ctx_t *) per region.
*/
try_hdr_size += try_nregs *
sizeof(prof_ctx_t *);
}
try_redzone0_offset = try_run_size - (try_nregs *
bin_info->reg_interval) - pad_size;
} while (try_hdr_size > try_redzone0_offset);
@ -2494,7 +2474,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
bin_info->run_size = good_run_size;
bin_info->nregs = good_nregs;
bin_info->bitmap_offset = good_bitmap_offset;
bin_info->ctx0_offset = good_ctx0_offset;
bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs

View File

@ -853,7 +853,7 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
p = imalloc(SMALL_MAXCLASS+1);
if (p == NULL)
return (NULL);
@ -952,7 +952,7 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment,
false);
@ -1086,7 +1086,7 @@ icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
p = icalloc(SMALL_MAXCLASS+1);
if (p == NULL)
return (NULL);
@ -1183,7 +1183,7 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt)
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
if (p == NULL)
return (NULL);
@ -1395,7 +1395,7 @@ imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
size_t usize_promoted = (alignment == 0) ?
s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment);
assert(usize_promoted != 0);
@ -1492,7 +1492,7 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
if (cnt == NULL)
return (NULL);
if (prof_promote && usize <= SMALL_MAXCLASS) {
if (usize <= SMALL_MAXCLASS) {
p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena);
@ -1639,8 +1639,8 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
if (cnt == NULL)
return (old_usize);
/* Use minimum usize to determine whether promotion may happen. */
if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size,
alignment)) <= SMALL_MAXCLASS) {
if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <=
SMALL_MAXCLASS) {
if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1),
alignment, zero))

View File

@ -32,7 +32,6 @@ char opt_prof_prefix[
1];
uint64_t prof_interval = 0;
bool prof_promote;
/*
* Table of mutexes that are shared among ctx's. These are leaf locks, so
@ -1300,8 +1299,8 @@ prof_boot1(void)
cassert(config_prof);
/*
* opt_prof and prof_promote must be in their final state before any
* arenas are initialized, so this function must be executed early.
* opt_prof must be in its final state before any arenas are
* initialized, so this function must be executed early.
*/
if (opt_prof_leak && opt_prof == false) {
@ -1317,8 +1316,6 @@ prof_boot1(void)
opt_lg_prof_interval);
}
}
prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
}
bool