From 81b4e6eb6f06cac048e3743787a70676f1534269 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 20 Oct 2010 20:52:00 -0700 Subject: [PATCH] Fix a heap profiling regression. Call prof_ctx_set() in all paths through prof_{m,re}alloc(). Inline arena_prof_ctx_get(). --- jemalloc/include/jemalloc/internal/arena.h | 107 ++++++++++++++++++++- jemalloc/include/jemalloc/internal/prof.h | 5 +- jemalloc/src/arena.c | 99 ------------------- 3 files changed, 110 insertions(+), 101 deletions(-) diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index 893f0706..240d96b3 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -444,7 +444,6 @@ size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, @@ -467,10 +466,116 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, + const void *ptr, size_t size); +# ifdef JEMALLOC_PROF +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +# endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, + size_t size) +{ + unsigned shift, diff, regind; + + assert(run->magic == ARENA_RUN_MAGIC); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + shift = ffs(size) - 1; + diff >>= shift; + size >>= shift; + + if (size == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT 21 +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned size_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) + regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; + else + regind = diff / size; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * size); + assert(regind < bin->nregs); + + return (regind); +} + +#ifdef JEMALLOC_PROF +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = chunk->map[pageind-map_bias].prof_ctx; + + return (ret); +} +#endif + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index a6bd797f..51d04879 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -389,7 +389,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) { assert(ptr != NULL); - assert(size == s2u(size)); + assert(size == isalloc(ptr)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { @@ -401,6 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * prof_alloc_prep() and prof_malloc(). */ assert(false); + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); return; } } @@ -438,6 +439,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { + assert(size == isalloc(ptr)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { /* @@ -448,6 +450,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, * its actual size was insufficient to cross * the sample threshold. */ + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); return; } } diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index a54a0905..2a8ea62b 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -1766,105 +1766,6 @@ arena_salloc_demote(const void *ptr) return (ret); } -static inline unsigned -arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, - size_t size) -{ - unsigned shift, diff, regind; - - assert(run->magic == ARENA_RUN_MAGIC); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - shift = ffs(size) - 1; - diff >>= shift; - size >>= shift; - - if (size == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * size); - assert(regind < bin->nregs); - - return (regind); -} - -prof_ctx_t * -arena_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - unsigned regind; - - assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else - ret = chunk->map[pageind-map_bias].prof_ctx; - - return (ret); -} - void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) {