Fix threads-related profiling bugs.

Initialize bt2cnt_tsd so that cleanup at thread exit actually happens.

Associate (prof_ctx_t *) with allocated objects, rather than
(prof_thr_cnt_t *).  Each thread must always operate on its own
(prof_thr_cnt_t *), and an object may outlive the thread that allocated it.
This commit is contained in:
Jason Evans 2010-04-13 16:13:54 -07:00
parent 1bb602125c
commit 5065156f3f
8 changed files with 117 additions and 81 deletions

View File

@ -98,7 +98,7 @@ struct arena_chunk_map_s {
#ifdef JEMALLOC_PROF
/* Profile counters, used for large object runs. */
prof_thr_cnt_t *prof_cnt;
prof_ctx_t *prof_ctx;
#endif
/*
@ -246,10 +246,10 @@ struct arena_bin_s {
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_cnt_t *) in a run header for this bin's size
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t cnt0_offset;
uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
@ -438,8 +438,8 @@ size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm);

View File

@ -19,7 +19,7 @@ struct extent_node_s {
#ifdef JEMALLOC_PROF
/* Profile counters, used for huge objects. */
prof_thr_cnt_t *prof_cnt;
prof_ctx_t *prof_ctx;
#endif
/* Pointer to the extent that this tree node is responsible for. */

View File

@ -25,8 +25,8 @@ void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
void huge_dalloc(void *ptr);
size_t huge_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr);
void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
prof_ctx_t *huge_prof_ctx_get(const void *ptr);
void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
bool huge_boot(void);

View File

@ -98,6 +98,9 @@ struct prof_thr_cnt_s {
};
struct prof_ctx_s {
/* Associated backtrace. */
prof_bt_t *bt;
/* Protects cnt_merged and sets_ql. */
malloc_mutex_t lock;
@ -151,10 +154,10 @@ bool prof_init(prof_t *prof, bool master);
void prof_destroy(prof_t *prof);
prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_thr_cnt_t *prof_cnt_get(const void *ptr);
prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
size_t old_size, prof_thr_cnt_t *old_cnt);
size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr);
void prof_idump(void);
bool prof_mdump(const char *filename);

View File

@ -1198,7 +1198,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
#ifdef JEMALLOC_PROF
uint32_t try_cnt0_offset, good_cnt0_offset;
uint32_t try_ctx0_offset, good_ctx0_offset;
#endif
uint32_t try_reg0_offset, good_reg0_offset;
@ -1225,11 +1225,11 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size;
/* Add space for one (prof_thr_cnt_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *);
try_ctx0_offset = try_hdr_size;
/* Add space for one (prof_ctx_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
} else
try_cnt0_offset = 0;
try_ctx0_offset = 0;
#endif
try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
} while (try_hdr_size > try_reg0_offset);
@ -1243,7 +1243,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
#ifdef JEMALLOC_PROF
good_cnt0_offset = try_cnt0_offset;
good_ctx0_offset = try_ctx0_offset;
#endif
good_reg0_offset = try_reg0_offset;
@ -1258,13 +1258,12 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size;
try_ctx0_offset = try_hdr_size;
/*
* Add space for one (prof_thr_cnt_t *) per
* region.
* Add space for one (prof_ctx_t *) per region.
*/
try_hdr_size += try_nregs *
sizeof(prof_thr_cnt_t *);
sizeof(prof_ctx_t *);
}
#endif
try_reg0_offset = try_run_size - (try_nregs *
@ -1282,7 +1281,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
bin->run_size = good_run_size;
bin->nregs = good_nregs;
#ifdef JEMALLOC_PROF
bin->cnt0_offset = good_cnt0_offset;
bin->ctx0_offset = good_ctx0_offset;
#endif
bin->reg0_offset = good_reg0_offset;
@ -1639,10 +1638,10 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
return (regind);
}
prof_thr_cnt_t *
arena_prof_cnt_get(const void *ptr)
prof_ctx_t *
arena_prof_ctx_get(const void *ptr)
{
prof_thr_cnt_t *ret;
prof_ctx_t *ret;
arena_chunk_t *chunk;
size_t pageind, mapbits;
@ -1655,7 +1654,7 @@ arena_prof_cnt_get(const void *ptr)
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote)
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
ret = (prof_ctx_t *)(uintptr_t)1U;
else {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
@ -1665,18 +1664,18 @@ arena_prof_cnt_get(const void *ptr)
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
ret = *(prof_thr_cnt_t **)((uintptr_t)run +
bin->cnt0_offset + (regind *
sizeof(prof_thr_cnt_t *)));
ret = *(prof_ctx_t **)((uintptr_t)run +
bin->ctx0_offset + (regind *
sizeof(prof_ctx_t *)));
}
} else
ret = chunk->map[pageind].prof_cnt;
ret = chunk->map[pageind].prof_ctx;
return (ret);
}
void
arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
@ -1699,12 +1698,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
*((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset
+ (regind * sizeof(prof_thr_cnt_t *)))) = cnt;
*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
assert((uintptr_t)cnt == (uintptr_t)1U);
assert((uintptr_t)ctx == (uintptr_t)1U);
} else
chunk->map[pageind].prof_cnt = cnt;
chunk->map[pageind].prof_ctx = ctx;
}
#endif

View File

@ -241,10 +241,10 @@ huge_salloc(const void *ptr)
}
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *
huge_prof_cnt_get(const void *ptr)
prof_ctx_t *
huge_prof_ctx_get(const void *ptr)
{
prof_thr_cnt_t *ret;
prof_ctx_t *ret;
extent_node_t *node, key;
malloc_mutex_lock(&huge_mtx);
@ -254,7 +254,7 @@ huge_prof_cnt_get(const void *ptr)
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
ret = node->prof_cnt;
ret = node->prof_ctx;
malloc_mutex_unlock(&huge_mtx);
@ -262,7 +262,7 @@ huge_prof_cnt_get(const void *ptr)
}
void
huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
extent_node_t *node, key;
@ -273,7 +273,7 @@ huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
node->prof_cnt = cnt;
node->prof_ctx = ctx;
malloc_mutex_unlock(&huge_mtx);
}

View File

@ -1060,7 +1060,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
void *ret;
#ifdef JEMALLOC_PROF
size_t old_size;
prof_thr_cnt_t *cnt, *old_cnt;
prof_thr_cnt_t *cnt;
prof_ctx_t *old_ctx;
#endif
if (size == 0) {
@ -1074,7 +1075,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = isalloc(ptr);
old_cnt = prof_cnt_get(ptr);
old_ctx = prof_ctx_get(ptr);
cnt = NULL;
}
#endif
@ -1083,7 +1084,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
else if (opt_prof) {
old_size = 0;
old_cnt = NULL;
old_ctx = NULL;
cnt = NULL;
}
#endif
@ -1100,7 +1101,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = isalloc(ptr);
old_cnt = prof_cnt_get(ptr);
old_ctx = prof_ctx_get(ptr);
if ((cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL;
goto OOM;
@ -1133,7 +1134,7 @@ OOM:
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = 0;
old_cnt = NULL;
old_ctx = NULL;
}
#endif
if (malloc_init()) {
@ -1181,7 +1182,7 @@ RETURN:
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
prof_realloc(ret, cnt, ptr, old_size, old_cnt);
prof_realloc(ret, cnt, ptr, old_size, old_ctx);
#endif
return (ret);
}

View File

@ -48,7 +48,7 @@ static malloc_mutex_t bt2ctx_mtx;
static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
/*
* Same contents as b2cnt, but initialized such that the TSD destructor is
* Same contents as b2cnt_tls, but initialized such that the TSD destructor is
* called when a thread exits, so that bt2cnt_tls contents can be merged,
* unlinked, and deallocated.
*/
@ -100,7 +100,7 @@ static _Unwind_Reason_Code prof_unwind_callback(
#endif
static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
static bool prof_flush(bool propagate_err);
static bool prof_write(const char *s, bool propagate_err);
static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
@ -450,6 +450,7 @@ prof_lookup(prof_bt_t *bt)
return (NULL);
}
bt2cnt_tls = bt2cnt;
pthread_setspecific(bt2cnt_tsd, bt2cnt);
}
if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
@ -475,6 +476,7 @@ prof_lookup(prof_bt_t *bt)
idalloc(ctx);
return (NULL);
}
ctx->bt = btkey;
if (malloc_mutex_init(&ctx->lock)) {
prof_leave();
idalloc(btkey);
@ -580,10 +582,10 @@ prof_alloc_prep(size_t size)
return (ret);
}
prof_thr_cnt_t *
prof_cnt_get(const void *ptr)
prof_ctx_t *
prof_ctx_get(const void *ptr)
{
prof_thr_cnt_t *ret;
prof_ctx_t *ret;
arena_chunk_t *chunk;
assert(ptr != NULL);
@ -593,15 +595,15 @@ prof_cnt_get(const void *ptr)
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
ret = arena_prof_cnt_get(ptr);
ret = arena_prof_ctx_get(ptr);
} else
ret = huge_prof_cnt_get(ptr);
ret = huge_prof_ctx_get(ptr);
return (ret);
}
static void
prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
@ -612,9 +614,9 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
arena_prof_cnt_set(ptr, cnt);
arena_prof_ctx_set(ptr, ctx);
} else
huge_prof_cnt_set(ptr, cnt);
huge_prof_ctx_set(ptr, ctx);
}
static inline void
@ -649,7 +651,7 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
assert(ptr != NULL);
prof_cnt_set(ptr, cnt);
prof_ctx_set(ptr, cnt->ctx);
prof_sample_accum_update(size);
if ((uintptr_t)cnt > (uintptr_t)1U) {
@ -673,25 +675,43 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
void
prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
size_t old_size, prof_thr_cnt_t *old_cnt)
size_t old_size, prof_ctx_t *old_ctx)
{
size_t size = isalloc(ptr);
prof_thr_cnt_t *told_cnt;
if (ptr != NULL) {
prof_cnt_set(ptr, cnt);
prof_ctx_set(ptr, cnt->ctx);
prof_sample_accum_update(size);
}
if ((uintptr_t)old_cnt > (uintptr_t)1U)
old_cnt->epoch++;
if ((uintptr_t)old_ctx > (uintptr_t)1U) {
told_cnt = prof_lookup(old_ctx->bt);
if (told_cnt == NULL) {
/*
* It's too late to propagate OOM for this realloc(),
* so operate directly on old_cnt->ctx->cnt_merged.
*/
malloc_printf("XXX BANG A\n");
malloc_mutex_lock(&old_ctx->lock);
old_ctx->cnt_merged.curobjs--;
old_ctx->cnt_merged.curbytes -= old_size;
malloc_mutex_unlock(&old_ctx->lock);
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
}
} else
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
mb_write();
/*********/
if ((uintptr_t)old_cnt > (uintptr_t)1U) {
old_cnt->cnts.curobjs--;
old_cnt->cnts.curbytes -= old_size;
if ((uintptr_t)told_cnt > (uintptr_t)1U) {
told_cnt->cnts.curobjs--;
told_cnt->cnts.curbytes -= old_size;
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
cnt->cnts.curobjs++;
@ -702,8 +722,8 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
/*********/
mb_write();
/*********/
if ((uintptr_t)old_cnt > (uintptr_t)1U)
old_cnt->epoch++;
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
@ -713,24 +733,37 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
void
prof_free(const void *ptr)
{
prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
prof_ctx_t *ctx = prof_ctx_get(ptr);
if ((uintptr_t)cnt > (uintptr_t)1) {
if ((uintptr_t)ctx > (uintptr_t)1) {
size_t size = isalloc(ptr);
prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
cnt->epoch++;
if (tcnt != NULL) {
tcnt->epoch++;
/*********/
mb_write();
/*********/
cnt->cnts.curobjs--;
cnt->cnts.curbytes -= size;
tcnt->cnts.curobjs--;
tcnt->cnts.curbytes -= size;
/*********/
mb_write();
/*********/
cnt->epoch++;
tcnt->epoch++;
/*********/
mb_write();
/*********/
} else {
/*
* OOM during free() cannot be propagated, so operate
* directly on cnt->ctx->cnt_merged.
*/
malloc_printf("XXX BANG B\n");
malloc_mutex_lock(&ctx->lock);
ctx->cnt_merged.curobjs--;
ctx->cnt_merged.curbytes -= size;
malloc_mutex_unlock(&ctx->lock);
}
}
}