From 9b0cbf0850b130a9b0a8c58bd10b2926b2083510 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Apr 2014 14:24:51 -0700 Subject: [PATCH] Remove support for non-prof-promote heap profiling metadata. Make promotion of sampled small objects to large objects mandatory, so that profiling metadata can always be stored in the chunk map, rather than requiring one pointer per small region in each small-region page run. In practice the non-prof-promote code was only useful when using jemalloc to track all objects and report them as leaks at program exit. However, Valgrind is at least as good a tool for this particular use case. Furthermore, the non-prof-promote code is getting in the way of some optimizations that will make heap profiling much cheaper for the predominant use case (sampling a small representative proportion of all allocations). --- include/jemalloc/internal/arena.h | 66 +++---------------- include/jemalloc/internal/private_symbols.txt | 1 - include/jemalloc/internal/prof.h | 20 ++---- include/jemalloc/internal/size_classes.sh | 5 +- include/jemalloc/internal/tcache.h | 2 +- src/arena.c | 21 ------ src/jemalloc.c | 16 ++--- src/prof.c | 7 +- 8 files changed, 28 insertions(+), 110 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b899888d..0e14c2c4 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -110,7 +110,6 @@ struct arena_chunk_map_s { * p : run page offset * s : run size * n : binind for size class; large objects set these to BININD_INVALID - * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -216,8 +215,6 @@ struct arena_run_s { * | ... | * bitmap_offset | bitmap | * | ... | - * ctx0_offset | ctx map | - * | ... | * |--------------------| * | redzone | * reg0_offset | region 0 | @@ -270,12 +267,6 @@ struct arena_bin_info_s { */ bitmap_info_t bitmap_info; - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (config_prof == false || opt_prof == false). - */ - uint32_t ctx0_offset; - /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; }; @@ -492,7 +483,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); @@ -879,31 +870,16 @@ arena_prof_ctx_get(const void *ptr) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else + if ((mapbits & CHUNK_MAP_LARGE) == 0) + ret = (prof_ctx_t *)(uintptr_t)1U; + else ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind; @@ -916,31 +892,8 @@ arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (usize > SMALL_MAXCLASS || (prof_promote && - ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, - pageind) != 0))) { - assert(arena_mapbits_large_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) arena_mapp_get(chunk, pageind)->prof_ctx = ctx; - } else { - assert(arena_mapbits_large_get(chunk, pageind) == 0); - if (prof_promote == false) { - size_t mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t - *)))) = ctx; - } - } } JEMALLOC_ALWAYS_INLINE void * @@ -989,7 +942,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); binind = arena_mapbits_binind_get(chunk, pageind); if (binind == BININD_INVALID || (config_prof && demote == false && - prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + arena_mapbits_large_get(chunk, pageind) != 0)) { /* * Large allocation. In the common case (demote == true), and * as this is an inline function, most callers will only end up @@ -1007,10 +960,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); } else { - /* - * Small allocation (possibly promoted to a large object due to - * prof_promote). - */ + /* Small allocation (possibly promoted to a large object). */ assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 93516d24..f52d49f9 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -303,7 +303,6 @@ prof_mdump prof_postfork_child prof_postfork_parent prof_prefork -prof_promote prof_realloc prof_sample_accum_update prof_sample_threshold_update diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 6f162d21..56014f18 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -220,12 +220,6 @@ extern char opt_prof_prefix[ */ extern uint64_t prof_interval; -/* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. - */ -extern bool prof_promote; - void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); @@ -308,7 +302,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, @@ -405,7 +399,7 @@ prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -415,7 +409,7 @@ prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, usize, ctx); + arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); } @@ -471,7 +465,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) } if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); + prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; /*********/ @@ -491,7 +485,7 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) mb_write(); /*********/ } else - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void @@ -539,10 +533,10 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); + prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 29c80c1f..960674aa 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -94,9 +94,8 @@ cat < 255) # error "Too many small size classes" diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c3d4b58d..51974136 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -354,7 +354,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { - if (config_prof && prof_promote && size == PAGE) { + if (config_prof && size == PAGE) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> diff --git a/src/arena.c b/src/arena.c index 3cb62601..d5741000 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2373,7 +2373,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; - uint32_t try_ctx0_offset, good_ctx0_offset; uint32_t try_redzone0_offset, good_redzone0_offset; assert(min_run_size >= PAGE); @@ -2428,14 +2427,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; try_redzone0_offset = try_run_size - (try_nregs * bin_info->reg_interval) - pad_size; } while (try_hdr_size > try_redzone0_offset); @@ -2449,7 +2440,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; - good_ctx0_offset = try_ctx0_offset; good_redzone0_offset = try_redzone0_offset; /* Try more aggressive settings. */ @@ -2469,16 +2459,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); - if (config_prof && opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } try_redzone0_offset = try_run_size - (try_nregs * bin_info->reg_interval) - pad_size; } while (try_hdr_size > try_redzone0_offset); @@ -2494,7 +2474,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; - bin_info->ctx0_offset = good_ctx0_offset; bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs diff --git a/src/jemalloc.c b/src/jemalloc.c index 558dbb20..816a12e0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -853,7 +853,7 @@ imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = imalloc(SMALL_MAXCLASS+1); if (p == NULL) return (NULL); @@ -952,7 +952,7 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment, false); @@ -1086,7 +1086,7 @@ icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = icalloc(SMALL_MAXCLASS+1); if (p == NULL) return (NULL); @@ -1183,7 +1183,7 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); if (p == NULL) return (NULL); @@ -1395,7 +1395,7 @@ imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); @@ -1492,7 +1492,7 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, if (cnt == NULL) return (NULL); - if (prof_promote && usize <= SMALL_MAXCLASS) { + if (usize <= SMALL_MAXCLASS) { p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero, try_tcache_alloc, try_tcache_dalloc, arena); @@ -1639,8 +1639,8 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, if (cnt == NULL) return (old_usize); /* Use minimum usize to determine whether promotion may happen. */ - if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment)) <= SMALL_MAXCLASS) { + if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= + SMALL_MAXCLASS) { if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero)) diff --git a/src/prof.c b/src/prof.c index 7722b7b4..1b1f7a84 100644 --- a/src/prof.c +++ b/src/prof.c @@ -32,7 +32,6 @@ char opt_prof_prefix[ 1]; uint64_t prof_interval = 0; -bool prof_promote; /* * Table of mutexes that are shared among ctx's. These are leaf locks, so @@ -1300,8 +1299,8 @@ prof_boot1(void) cassert(config_prof); /* - * opt_prof and prof_promote must be in their final state before any - * arenas are initialized, so this function must be executed early. + * opt_prof must be in its final state before any arenas are + * initialized, so this function must be executed early. */ if (opt_prof_leak && opt_prof == false) { @@ -1317,8 +1316,6 @@ prof_boot1(void) opt_lg_prof_interval); } } - - prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); } bool