diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 075c263a..28540a47 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -474,7 +474,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, @@ -886,7 +886,7 @@ arena_prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -899,7 +899,14 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { + + if (usize > SMALL_MAXCLASS || (prof_promote && + ((uintptr_t)ctx != (uintptr_t)1U || ((mapbits & CHUNK_MAP_LARGE) != + 0)))) { + assert((mapbits & CHUNK_MAP_LARGE) != 0); + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + } else { + assert((mapbits & CHUNK_MAP_LARGE) == 0); if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << @@ -912,12 +919,11 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; + *((prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t + *)))) = ctx; + } + } } JEMALLOC_ALWAYS_INLINE void * diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 28ad37af..8b240999 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -289,11 +289,11 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx); +void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx); void prof_free(const void *ptr, size_t size); #endif @@ -386,7 +386,7 @@ prof_ctx_get(const void *ptr) } JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -396,7 +396,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - arena_prof_ctx_set(ptr, ctx); + arena_prof_ctx_set(ptr, usize, ctx); } else huge_prof_ctx_set(ptr, ctx); } @@ -431,20 +431,20 @@ prof_sample_accum_update(size_t size) } JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* * Don't sample. For malloc()-like allocation, it is * always possible to tell in advance how large an * object's usable size will be, so there should never - * be a difference between the size passed to + * be a difference between the usize passed to * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); @@ -452,17 +452,17 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) } if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; /*********/ mb_write(); /*********/ cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } /*********/ mb_write(); @@ -472,12 +472,12 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) mb_write(); /*********/ } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx) +prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, + size_t old_usize, prof_ctx_t *old_ctx) { prof_thr_cnt_t *told_cnt; @@ -485,15 +485,15 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr, true)); + assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { + if (prof_sample_accum_update(usize)) { /* - * Don't sample. The size passed to + * Don't sample. The usize passed to * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though - * its actual size was insufficient to cross + * its actual usize was insufficient to cross * the sample threshold. */ cnt = (prof_thr_cnt_t *)(uintptr_t)1U; @@ -510,7 +510,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, */ malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; + old_ctx->cnt_merged.curbytes -= old_usize; malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } @@ -520,23 +520,23 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); + prof_ctx_set(ptr, usize, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) { told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; + told_cnt->cnts.curbytes -= old_usize; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; + cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; + cnt->cnts.accumbytes += usize; } } /*********/ diff --git a/src/jemalloc.c b/src/jemalloc.c index f8c8119d..b8a4fb07 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1105,7 +1105,7 @@ je_realloc(void *ptr, size_t size) { void *ret; size_t usize JEMALLOC_CC_SILENCE_INIT(0); - size_t old_size = 0; + size_t old_usize = 0; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); @@ -1115,16 +1115,16 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(p). */ assert(malloc_initialized || IS_INITIALIZER); if (config_prof) { - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); } else if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); @@ -1142,16 +1142,16 @@ je_realloc(void *ptr, size_t size) malloc_thread_init(); if (config_prof) { - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); } else if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (config_prof && opt_prof) { usize = s2u(size); @@ -1236,16 +1236,17 @@ label_oom: label_return: if (config_prof && opt_prof) - prof_realloc(ret, usize, cnt, old_size, old_ctx); + prof_realloc(ret, usize, cnt, old_usize, old_ctx); if (config_stats && ret != NULL) { thread_allocated_t *ta; assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } UTRACE(ptr, size, ret); - JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize, + false); return (ret); } @@ -1431,8 +1432,7 @@ void * je_rallocx(void *ptr, size_t size, int flags) { void *p; - size_t usize; - size_t old_size; + size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); @@ -1465,7 +1465,7 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); PROF_ALLOC_PREP(1, usize, cnt); @@ -1487,15 +1487,28 @@ je_rallocx(void *ptr, size_t size, int flags) if (p == NULL) goto label_oom; } - prof_realloc(p, usize, cnt, old_size, old_ctx); + if (p == ptr && alignment != 0) { + /* + * The allocation did not move, so it is possible that + * the size class is smaller than would guarantee the + * requested alignment, and that the alignment + * constraint was serendipitously satisfied. + * Additionally, old_usize may not be the same as the + * current usize because of in-place large + * reallocation. Therefore, query the actual value of + * usize. + */ + usize = isalloc(p, true); + } + prof_realloc(p, usize, cnt, old_usize, old_ctx); } else { if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } p = iralloct(ptr, size, 0, alignment, zero, false, try_tcache_alloc, try_tcache_dalloc, arena); @@ -1509,10 +1522,10 @@ je_rallocx(void *ptr, size_t size, int flags) thread_allocated_t *ta; ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } UTRACE(ptr, size, p); - JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_size, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero); return (p); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1526,8 +1539,7 @@ label_oom: size_t je_xallocx(void *ptr, size_t size, size_t extra, int flags) { - size_t usize; - size_t old_size; + size_t usize, old_usize; UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); @@ -1568,12 +1580,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment); prof_ctx_t *old_ctx = prof_ctx_get(ptr); - old_size = isalloc(ptr, true); + old_usize = isalloc(ptr, true); if (config_valgrind && opt_valgrind) old_rzsize = p2rz(ptr); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) { - usize = isalloc(ptr, config_prof); + usize = old_usize; goto label_not_moved; } /* @@ -1585,32 +1597,35 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, true, try_tcache_alloc, - try_tcache_dalloc, arena) == NULL) + try_tcache_dalloc, arena) == NULL) { + usize = old_usize; goto label_not_moved; - if (max_usize < PAGE) { - usize = max_usize; + } + usize = isalloc(ptr, true); + if (max_usize < PAGE) arena_prof_promoted(ptr, usize); - } else - usize = isalloc(ptr, config_prof); } else { if (iralloct(ptr, size, extra, alignment, zero, true, - try_tcache_alloc, try_tcache_dalloc, arena) == NULL) + try_tcache_alloc, try_tcache_dalloc, arena) == + NULL) { + usize = old_usize; goto label_not_moved; - usize = isalloc(ptr, config_prof); + } + usize = isalloc(ptr, true); } - prof_realloc(ptr, usize, cnt, old_size, old_ctx); + prof_realloc(ptr, usize, cnt, old_usize, old_ctx); } else { if (config_stats) { - old_size = isalloc(ptr, false); + old_usize = isalloc(ptr, false); if (config_valgrind && opt_valgrind) - old_rzsize = u2rz(old_size); + old_rzsize = u2rz(old_usize); } else if (config_valgrind && opt_valgrind) { - old_size = isalloc(ptr, false); - old_rzsize = u2rz(old_size); + old_usize = isalloc(ptr, false); + old_rzsize = u2rz(old_usize); } if (iralloct(ptr, size, extra, alignment, zero, true, try_tcache_alloc, try_tcache_dalloc, arena) == NULL) { - usize = isalloc(ptr, config_prof); + usize = old_usize; goto label_not_moved; } usize = isalloc(ptr, config_prof); @@ -1620,9 +1635,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) thread_allocated_t *ta; ta = thread_allocated_tsd_get(); ta->allocated += usize; - ta->deallocated += old_size; + ta->deallocated += old_usize; } - JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_size, old_rzsize, zero); + JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero); label_not_moved: UTRACE(ptr, size, ptr); return (usize);