diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index bb4ce2a5..c1955f19 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -98,7 +98,7 @@ struct arena_chunk_map_s { #ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* @@ -246,10 +246,10 @@ struct arena_bin_s { #ifdef JEMALLOC_PROF /* - * Offset of first (prof_cnt_t *) in a run header for this bin's size + * Offset of first (prof_ctx_t *) in a run header for this bin's size * class, or 0 if (opt_prof == false). */ - uint32_t cnt0_offset; + uint32_t ctx0_offset; #endif /* Offset of first region in a run for this bin's size class. */ @@ -438,8 +438,8 @@ size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); -void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); diff --git a/jemalloc/include/jemalloc/internal/extent.h b/jemalloc/include/jemalloc/internal/extent.h index 33a4e9a3..6fe9702b 100644 --- a/jemalloc/include/jemalloc/internal/extent.h +++ b/jemalloc/include/jemalloc/internal/extent.h @@ -19,7 +19,7 @@ struct extent_node_s { #ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* Pointer to the extent that this tree node is responsible for. */ diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h index 3cf32f75..0c0582f2 100644 --- a/jemalloc/include/jemalloc/internal/huge.h +++ b/jemalloc/include/jemalloc/internal/huge.h @@ -25,8 +25,8 @@ void *huge_ralloc(void *ptr, size_t size, size_t oldsize); void huge_dalloc(void *ptr); size_t huge_salloc(const void *ptr); #ifdef JEMALLOC_PROF -prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr); -void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif bool huge_boot(void); diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index 6e71552d..fb55fb90 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -98,6 +98,9 @@ struct prof_thr_cnt_s { }; struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + /* Protects cnt_merged and sets_ql. */ malloc_mutex_t lock; @@ -151,10 +154,10 @@ bool prof_init(prof_t *prof, bool master); void prof_destroy(prof_t *prof); prof_thr_cnt_t *prof_alloc_prep(size_t size); -prof_thr_cnt_t *prof_cnt_get(const void *ptr); +prof_ctx_t *prof_ctx_get(const void *ptr); void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt); + size_t old_size, prof_ctx_t *old_ctx); void prof_free(const void *ptr); void prof_idump(void); bool prof_mdump(const char *filename); diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index e74b4701..222ec25f 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -1198,7 +1198,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; #ifdef JEMALLOC_PROF - uint32_t try_cnt0_offset, good_cnt0_offset; + uint32_t try_ctx0_offset, good_ctx0_offset; #endif uint32_t try_reg0_offset, good_reg0_offset; @@ -1225,11 +1225,11 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; - /* Add space for one (prof_thr_cnt_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else - try_cnt0_offset = 0; + try_ctx0_offset = 0; #endif try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -1243,7 +1243,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; #ifdef JEMALLOC_PROF - good_cnt0_offset = try_cnt0_offset; + good_ctx0_offset = try_ctx0_offset; #endif good_reg0_offset = try_reg0_offset; @@ -1258,13 +1258,12 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; + try_ctx0_offset = try_hdr_size; /* - * Add space for one (prof_thr_cnt_t *) per - * region. + * Add space for one (prof_ctx_t *) per region. */ try_hdr_size += try_nregs * - sizeof(prof_thr_cnt_t *); + sizeof(prof_ctx_t *); } #endif try_reg0_offset = try_run_size - (try_nregs * @@ -1282,7 +1281,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) bin->run_size = good_run_size; bin->nregs = good_nregs; #ifdef JEMALLOC_PROF - bin->cnt0_offset = good_cnt0_offset; + bin->ctx0_offset = good_ctx0_offset; #endif bin->reg0_offset = good_reg0_offset; @@ -1639,10 +1638,10 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, return (regind); } -prof_thr_cnt_t * -arena_prof_cnt_get(const void *ptr) +prof_ctx_t * +arena_prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1655,7 +1654,7 @@ arena_prof_cnt_get(const void *ptr) assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) - ret = (prof_thr_cnt_t *)(uintptr_t)1U; + ret = (prof_ctx_t *)(uintptr_t)1U; else { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << @@ -1665,18 +1664,18 @@ arena_prof_cnt_get(const void *ptr) assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin, ptr, bin->reg_size); - ret = *(prof_thr_cnt_t **)((uintptr_t)run + - bin->cnt0_offset + (regind * - sizeof(prof_thr_cnt_t *))); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind].prof_cnt; + ret = chunk->map[pageind].prof_ctx; return (ret); } void -arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1699,12 +1698,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin, ptr, bin->reg_size); - *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset - + (regind * sizeof(prof_thr_cnt_t *)))) = cnt; + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; } else - assert((uintptr_t)cnt == (uintptr_t)1U); + assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind].prof_cnt = cnt; + chunk->map[pageind].prof_ctx = ctx; } #endif diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c index d35aa5cd..49962ea0 100644 --- a/jemalloc/src/huge.c +++ b/jemalloc/src/huge.c @@ -241,10 +241,10 @@ huge_salloc(const void *ptr) } #ifdef JEMALLOC_PROF -prof_thr_cnt_t * -huge_prof_cnt_get(const void *ptr) +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; extent_node_t *node, key; malloc_mutex_lock(&huge_mtx); @@ -254,7 +254,7 @@ huge_prof_cnt_get(const void *ptr) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - ret = node->prof_cnt; + ret = node->prof_ctx; malloc_mutex_unlock(&huge_mtx); @@ -262,7 +262,7 @@ huge_prof_cnt_get(const void *ptr) } void -huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { extent_node_t *node, key; @@ -273,7 +273,7 @@ huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - node->prof_cnt = cnt; + node->prof_ctx = ctx; malloc_mutex_unlock(&huge_mtx); } diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index e01de0d5..aeab1408 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1060,7 +1060,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) void *ret; #ifdef JEMALLOC_PROF size_t old_size; - prof_thr_cnt_t *cnt, *old_cnt; + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; #endif if (size == 0) { @@ -1074,7 +1075,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); + old_ctx = prof_ctx_get(ptr); cnt = NULL; } #endif @@ -1083,7 +1084,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF else if (opt_prof) { old_size = 0; - old_cnt = NULL; + old_ctx = NULL; cnt = NULL; } #endif @@ -1100,7 +1101,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); + old_ctx = prof_ctx_get(ptr); if ((cnt = prof_alloc_prep(size)) == NULL) { ret = NULL; goto OOM; @@ -1133,7 +1134,7 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { old_size = 0; - old_cnt = NULL; + old_ctx = NULL; } #endif if (malloc_init()) { @@ -1181,7 +1182,7 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof) - prof_realloc(ret, cnt, ptr, old_size, old_cnt); + prof_realloc(ret, cnt, ptr, old_size, old_ctx); #endif return (ret); } diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 6326188e..c13bc04f 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -48,7 +48,7 @@ static malloc_mutex_t bt2ctx_mtx; static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); /* - * Same contents as b2cnt, but initialized such that the TSD destructor is + * Same contents as b2cnt_tls, but initialized such that the TSD destructor is * called when a thread exits, so that bt2cnt_tls contents can be merged, * unlinked, and deallocated. */ @@ -100,7 +100,7 @@ static _Unwind_Reason_Code prof_unwind_callback( #endif static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); static bool prof_flush(bool propagate_err); static bool prof_write(const char *s, bool propagate_err); static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, @@ -450,6 +450,7 @@ prof_lookup(prof_bt_t *bt) return (NULL); } bt2cnt_tls = bt2cnt; + pthread_setspecific(bt2cnt_tsd, bt2cnt); } if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { @@ -475,6 +476,7 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx); return (NULL); } + ctx->bt = btkey; if (malloc_mutex_init(&ctx->lock)) { prof_leave(); idalloc(btkey); @@ -580,10 +582,10 @@ prof_alloc_prep(size_t size) return (ret); } -prof_thr_cnt_t * -prof_cnt_get(const void *ptr) +prof_ctx_t * +prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; arena_chunk_t *chunk; assert(ptr != NULL); @@ -593,15 +595,15 @@ prof_cnt_get(const void *ptr) /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_prof_cnt_get(ptr); + ret = arena_prof_ctx_get(ptr); } else - ret = huge_prof_cnt_get(ptr); + ret = huge_prof_ctx_get(ptr); return (ret); } static void -prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -612,9 +614,9 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); - arena_prof_cnt_set(ptr, cnt); + arena_prof_ctx_set(ptr, ctx); } else - huge_prof_cnt_set(ptr, cnt); + huge_prof_ctx_set(ptr, ctx); } static inline void @@ -649,7 +651,7 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) assert(ptr != NULL); - prof_cnt_set(ptr, cnt); + prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); if ((uintptr_t)cnt > (uintptr_t)1U) { @@ -673,25 +675,43 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt) + size_t old_size, prof_ctx_t *old_ctx) { size_t size = isalloc(ptr); + prof_thr_cnt_t *told_cnt; if (ptr != NULL) { - prof_cnt_set(ptr, cnt); + prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); } - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_printf("XXX BANG A\n"); + malloc_mutex_lock(&old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(&old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ mb_write(); /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) { - old_cnt->cnts.curobjs--; - old_cnt->cnts.curbytes -= old_size; + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; @@ -702,8 +722,8 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, /*********/ mb_write(); /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ @@ -713,24 +733,37 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, void prof_free(const void *ptr) { - prof_thr_cnt_t *cnt = prof_cnt_get(ptr); + prof_ctx_t *ctx = prof_ctx_get(ptr); - if ((uintptr_t)cnt > (uintptr_t)1) { + if ((uintptr_t)ctx > (uintptr_t)1) { size_t size = isalloc(ptr); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs--; - cnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_printf("XXX BANG B\n"); + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(&ctx->lock); + } } }