From e4f7846f1fd279a039ffa2a41707348187219de4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 22 Oct 2010 10:45:59 -0700 Subject: [PATCH] Fix heap profiling bugs. Fix a regression due to the recent heap profiling accuracy improvements: prof_{m,re}alloc() must set the object's profiling context regardless of whether it is sampled. Fix management of the CHUNK_MAP_CLASS chunk map bits, such that all large object (re-)allocation paths correctly initialize the bits. Prior to this fix, in-place realloc() cleared the bits, resulting in incorrect reported object size from arena_salloc_demote(). After this fix the non-demoted bit pattern is all zeros (instead of all ones), which makes it easier to assure that the bits are properly set. --- jemalloc/include/jemalloc/internal/arena.h | 40 ++++++++++++++++-- jemalloc/include/jemalloc/internal/prof.h | 19 ++++----- jemalloc/include/jemalloc/internal/tcache.h | 3 +- jemalloc/src/arena.c | 45 +++------------------ jemalloc/src/jemalloc.c | 37 +++++++++++++---- jemalloc/src/prof.c | 24 +++-------- 6 files changed, 85 insertions(+), 83 deletions(-) diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index 240d96b3..9556c2c6 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -121,7 +121,7 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : size class (used only if prof_promote is true) + * c : (binind+1) for size class (used only if prof_promote is true) * x : don't care * - : 0 * + : 1 @@ -144,7 +144,7 @@ struct arena_chunk_map_s { * pppppppp pppppppp pppp---- ----d--a * * Large: - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la * xxxxxxxx xxxxxxxx xxxx---- ----xxxx * -------- -------- -------- ----D-la * @@ -152,7 +152,7 @@ struct arena_chunk_map_s { * ssssssss ssssssss sssscccc ccccD-la * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la */ size_t bits; #ifdef JEMALLOC_PROF @@ -444,7 +444,6 @@ size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); @@ -470,6 +469,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, size_t size); # ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); # endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -574,6 +574,38 @@ arena_prof_ctx_get(const void *ptr) return (ret); } + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote == false) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; + } else + assert((uintptr_t)ctx == (uintptr_t)1U); + } else + chunk->map[pageind-map_bias].prof_ctx = ctx; +} #endif JEMALLOC_INLINE void diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index 51d04879..3e85bdab 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -232,8 +232,8 @@ void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx); -void prof_free(const void *ptr); + size_t old_size, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) @@ -400,9 +400,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * be a difference between the size passed to * prof_alloc_prep() and prof_malloc(). */ - assert(false); - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - return; + assert((uintptr_t)cnt == (uintptr_t)1U); } } @@ -432,7 +430,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) JEMALLOC_INLINE void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx) + size_t old_size, prof_ctx_t *old_ctx) { prof_thr_cnt_t *told_cnt; @@ -445,13 +443,12 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, /* * Don't sample. The size passed to * prof_alloc_prep() was larger than what - * actually got allocated., so a backtrace was + * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross * the sample threshold. */ - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - return; + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } } @@ -506,12 +503,12 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, } JEMALLOC_INLINE void -prof_free(const void *ptr) +prof_free(const void *ptr, size_t size) { prof_ctx_t *ctx = prof_ctx_get(ptr); if ((uintptr_t)ctx > (uintptr_t)1) { - size_t size = isalloc(ptr); + assert(size == isalloc(ptr)); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index bcd6d44c..168a3069 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -282,8 +282,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> PAGE_SHIFT); - chunk->map[pageind-map_bias].bits |= - CHUNK_MAP_CLASS_MASK; + chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; #endif if (zero == false) { #ifdef JEMALLOC_FILL diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 2a8ea62b..d811f658 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -408,11 +408,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, */ chunk->map[run_ind+need_pages-1-map_bias].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_LARGE | -#ifdef JEMALLOC_PROF - CHUNK_MAP_CLASS_MASK | -#endif - CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } else { assert(zero == false); /* @@ -1724,7 +1721,7 @@ arena_prof_promoted(const void *ptr, size_t size) binind = small_size2bin[size]; assert(binind < nbins); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & - ~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT); + ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } size_t @@ -1754,9 +1751,9 @@ arena_salloc_demote(const void *ptr) assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; if (prof_promote && ret == PAGE_SIZE && (mapbits & - CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) { + CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT); + CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); ret = chunk->arena->bins[binind].reg_size; } @@ -1765,38 +1762,6 @@ arena_salloc_demote(const void *ptr) return (ret); } - -void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote == false) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - - *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - chunk->map[pageind-map_bias].prof_ctx = ctx; -} #endif static void diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index f3cba15a..dedf011a 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1268,7 +1268,7 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof) - prof_realloc(ret, usize, cnt, ptr, old_size, old_ctx); + prof_realloc(ret, usize, cnt, old_size, old_ctx); #endif #ifdef JEMALLOC_STATS if (ret != NULL) { @@ -1285,15 +1285,26 @@ JEMALLOC_P(free)(void *ptr) { if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + assert(malloc_initialized || malloc_initializer == pthread_self()); +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF - if (opt_prof) - prof_free(ptr); + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } #endif #ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, isalloc(ptr)); + ALLOCATED_ADD(0, usize); #endif idalloc(ptr); } @@ -1566,7 +1577,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, goto ERR; usize = isalloc(q); } - prof_realloc(q, usize, cnt, p, old_size, old_ctx); + prof_realloc(q, usize, cnt, old_size, old_ctx); } else #endif { @@ -1635,16 +1646,26 @@ JEMALLOC_ATTR(visibility("default")) int JEMALLOC_P(dallocm)(void *ptr, int flags) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif assert(ptr != NULL); assert(malloc_initialized || malloc_initializer == pthread_self()); +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF - if (opt_prof) - prof_free(ptr); + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } #endif #ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, isalloc(ptr)); + ALLOCATED_ADD(0, usize); #endif idalloc(ptr); diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 4a49e901..fb0e7659 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -239,38 +239,26 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { -#define NIGNORE 3 #define BT_FRAME(i) \ - if ((i) < NIGNORE + max) { \ + if ((i) < nignore + max) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ - if (i >= NIGNORE) { \ - bt->vec[(i) - NIGNORE] = p; \ - bt->len = (i) - NIGNORE + 1; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ } \ } else \ return; assert(max <= (1U << opt_lg_prof_bt_max)); - /* - * Ignore the first three frames, since they are: - * - * 0: prof_backtrace() - * 1: prof_alloc_prep() - * 2: malloc(), calloc(), etc. - */ -#if 1 - assert(nignore + 1 == NIGNORE); -#else BT_FRAME(0) BT_FRAME(1) BT_FRAME(2) -#endif BT_FRAME(3) BT_FRAME(4) BT_FRAME(5) @@ -491,8 +479,8 @@ prof_lookup(prof_bt_t *bt) == (ZU(1) << opt_lg_prof_tcmax)) { assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* - * Flush the least least recently used cnt in order to - * keep bt2cnt from becoming too large. + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL);