Fix heap profiling bugs.

Fix a regression due to the recent heap profiling accuracy improvements:
prof_{m,re}alloc() must set the object's profiling context regardless of
whether it is sampled.

Fix management of the CHUNK_MAP_CLASS chunk map bits, such that all
large object (re-)allocation paths correctly initialize the bits.  Prior
to this fix, in-place realloc() cleared the bits, resulting in incorrect
reported object size from arena_salloc_demote().  After this fix the
non-demoted bit pattern is all zeros (instead of all ones), which makes
it easier to assure that the bits are properly set.
This commit is contained in:
Jason Evans 2010-10-22 10:45:59 -07:00
parent 81b4e6eb6f
commit e4f7846f1f
6 changed files with 85 additions and 83 deletions

View File

@ -121,7 +121,7 @@ struct arena_chunk_map_s {
*
* p : run page offset
* s : run size
* c : size class (used only if prof_promote is true)
* c : (binind+1) for size class (used only if prof_promote is true)
* x : don't care
* - : 0
* + : 1
@ -144,7 +144,7 @@ struct arena_chunk_map_s {
* pppppppp pppppppp pppp---- ----d--a
*
* Large:
* ssssssss ssssssss ssss++++ ++++D-la
* ssssssss ssssssss ssss---- ----D-la
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx
* -------- -------- -------- ----D-la
*
@ -152,7 +152,7 @@ struct arena_chunk_map_s {
* ssssssss ssssssss sssscccc ccccD-la
*
* Large (not sampled, size == PAGE_SIZE):
* ssssssss ssssssss ssss++++ ++++D-la
* ssssssss ssssssss ssss---- ----D-la
*/
size_t bits;
#ifdef JEMALLOC_PROF
@ -444,7 +444,6 @@ size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm);
@ -470,6 +469,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
const void *ptr, size_t size);
# ifdef JEMALLOC_PROF
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
# endif
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif
@ -574,6 +574,38 @@ arena_prof_ctx_get(const void *ptr)
return (ret);
}
JEMALLOC_INLINE void
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
mapbits = chunk->map[pageind-map_bias].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote == false) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
arena_bin_t *bin = run->bin;
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
assert((uintptr_t)ctx == (uintptr_t)1U);
} else
chunk->map[pageind-map_bias].prof_ctx = ctx;
}
#endif
JEMALLOC_INLINE void

View File

@ -232,8 +232,8 @@ void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
bool prof_sample_accum_update(size_t size);
void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr);
size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr, size_t size);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
@ -400,9 +400,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
* be a difference between the size passed to
* prof_alloc_prep() and prof_malloc().
*/
assert(false);
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
return;
assert((uintptr_t)cnt == (uintptr_t)1U);
}
}
@ -432,7 +430,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
JEMALLOC_INLINE void
prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx)
size_t old_size, prof_ctx_t *old_ctx)
{
prof_thr_cnt_t *told_cnt;
@ -445,13 +443,12 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
/*
* Don't sample. The size passed to
* prof_alloc_prep() was larger than what
* actually got allocated., so a backtrace was
* actually got allocated, so a backtrace was
* captured for this allocation, even though
* its actual size was insufficient to cross
* the sample threshold.
*/
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
return;
cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
}
}
}
@ -506,12 +503,12 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
}
JEMALLOC_INLINE void
prof_free(const void *ptr)
prof_free(const void *ptr, size_t size)
{
prof_ctx_t *ctx = prof_ctx_get(ptr);
if ((uintptr_t)ctx > (uintptr_t)1) {
size_t size = isalloc(ptr);
assert(size == isalloc(ptr));
prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
if (tcnt != NULL) {

View File

@ -282,8 +282,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
PAGE_SHIFT);
chunk->map[pageind-map_bias].bits |=
CHUNK_MAP_CLASS_MASK;
chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK;
#endif
if (zero == false) {
#ifdef JEMALLOC_FILL

View File

@ -408,11 +408,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
*/
chunk->map[run_ind+need_pages-1-map_bias].bits =
CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty;
chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_LARGE |
#ifdef JEMALLOC_PROF
CHUNK_MAP_CLASS_MASK |
#endif
CHUNK_MAP_ALLOCATED | flag_dirty;
chunk->map[run_ind-map_bias].bits = size | flag_dirty |
CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
} else {
assert(zero == false);
/*
@ -1724,7 +1721,7 @@ arena_prof_promoted(const void *ptr, size_t size)
binind = small_size2bin[size];
assert(binind < nbins);
chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT);
~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
}
size_t
@ -1754,9 +1751,9 @@ arena_salloc_demote(const void *ptr)
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
if (prof_promote && ret == PAGE_SIZE && (mapbits &
CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) {
CHUNK_MAP_CLASS_MASK) != 0) {
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
CHUNK_MAP_CLASS_SHIFT);
CHUNK_MAP_CLASS_SHIFT) - 1;
assert(binind < nbins);
ret = chunk->arena->bins[binind].reg_size;
}
@ -1765,38 +1762,6 @@ arena_salloc_demote(const void *ptr)
return (ret);
}
void
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
mapbits = chunk->map[pageind-map_bias].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote == false) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
arena_bin_t *bin = run->bin;
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
assert((uintptr_t)ctx == (uintptr_t)1U);
} else
chunk->map[pageind-map_bias].prof_ctx = ctx;
}
#endif
static void

View File

@ -1268,7 +1268,7 @@ RETURN:
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_realloc(ret, usize, cnt, ptr, old_size, old_ctx);
prof_realloc(ret, usize, cnt, old_size, old_ctx);
#endif
#ifdef JEMALLOC_STATS
if (ret != NULL) {
@ -1285,15 +1285,26 @@ JEMALLOC_P(free)(void *ptr)
{
if (ptr != NULL) {
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize;
#endif
assert(malloc_initialized || malloc_initializer ==
pthread_self());
#ifdef JEMALLOC_STATS
usize = isalloc(ptr);
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_free(ptr);
if (opt_prof) {
# ifndef JEMALLOC_STATS
usize = isalloc(ptr);
# endif
prof_free(ptr, usize);
}
#endif
#ifdef JEMALLOC_STATS
ALLOCATED_ADD(0, isalloc(ptr));
ALLOCATED_ADD(0, usize);
#endif
idalloc(ptr);
}
@ -1566,7 +1577,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
goto ERR;
usize = isalloc(q);
}
prof_realloc(q, usize, cnt, p, old_size, old_ctx);
prof_realloc(q, usize, cnt, old_size, old_ctx);
} else
#endif
{
@ -1635,16 +1646,26 @@ JEMALLOC_ATTR(visibility("default"))
int
JEMALLOC_P(dallocm)(void *ptr, int flags)
{
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize;
#endif
assert(ptr != NULL);
assert(malloc_initialized || malloc_initializer == pthread_self());
#ifdef JEMALLOC_STATS
usize = isalloc(ptr);
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_free(ptr);
if (opt_prof) {
# ifndef JEMALLOC_STATS
usize = isalloc(ptr);
# endif
prof_free(ptr, usize);
}
#endif
#ifdef JEMALLOC_STATS
ALLOCATED_ADD(0, isalloc(ptr));
ALLOCATED_ADD(0, usize);
#endif
idalloc(ptr);

View File

@ -239,38 +239,26 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
void
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
{
#define NIGNORE 3
#define BT_FRAME(i) \
if ((i) < NIGNORE + max) { \
if ((i) < nignore + max) { \
void *p; \
if (__builtin_frame_address(i) == 0) \
return; \
p = __builtin_return_address(i); \
if (p == NULL) \
return; \
if (i >= NIGNORE) { \
bt->vec[(i) - NIGNORE] = p; \
bt->len = (i) - NIGNORE + 1; \
if (i >= nignore) { \
bt->vec[(i) - nignore] = p; \
bt->len = (i) - nignore + 1; \
} \
} else \
return;
assert(max <= (1U << opt_lg_prof_bt_max));
/*
* Ignore the first three frames, since they are:
*
* 0: prof_backtrace()
* 1: prof_alloc_prep()
* 2: malloc(), calloc(), etc.
*/
#if 1
assert(nignore + 1 == NIGNORE);
#else
BT_FRAME(0)
BT_FRAME(1)
BT_FRAME(2)
#endif
BT_FRAME(3)
BT_FRAME(4)
BT_FRAME(5)
@ -491,8 +479,8 @@ prof_lookup(prof_bt_t *bt)
== (ZU(1) << opt_lg_prof_tcmax)) {
assert(ckh_count(&prof_tdata->bt2cnt) > 0);
/*
* Flush the least least recently used cnt in order to
* keep bt2cnt from becoming too large.
* Flush the least recently used cnt in order to keep
* bt2cnt from becoming too large.
*/
ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
assert(ret.v != NULL);