From 1bb602125cc6ea471ecc305dd61849ef60349091 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Apr 2010 16:06:22 -0700 Subject: [PATCH 1/6] Update stale JEMALLOC_FILL code. Fix a compilation error due to stale data structure access code in tcache_dalloc_large() for junk filling. --- jemalloc/include/jemalloc/internal/tcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index c76597fa..fa6c53f2 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -353,7 +353,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #ifdef JEMALLOC_FILL if (opt_junk) - memset(ptr, 0x5a, bin->reg_size); + memset(ptr, 0x5a, arena->bins[binind].reg_size); #endif tbin = &tcache->tbins[binind]; From 5065156f3f90e421ba2b1a914e47eeb30d83d994 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Apr 2010 16:13:54 -0700 Subject: [PATCH 2/6] Fix threads-related profiling bugs. Initialize bt2cnt_tsd so that cleanup at thread exit actually happens. Associate (prof_ctx_t *) with allocated objects, rather than (prof_thr_cnt_t *). Each thread must always operate on its own (prof_thr_cnt_t *), and an object may outlive the thread that allocated it. --- jemalloc/include/jemalloc/internal/arena.h | 10 +- jemalloc/include/jemalloc/internal/extent.h | 2 +- jemalloc/include/jemalloc/internal/huge.h | 4 +- jemalloc/include/jemalloc/internal/prof.h | 7 +- jemalloc/src/arena.c | 47 +++++---- jemalloc/src/huge.c | 12 +-- jemalloc/src/jemalloc.c | 13 +-- jemalloc/src/prof.c | 103 +++++++++++++------- 8 files changed, 117 insertions(+), 81 deletions(-) diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index bb4ce2a5..c1955f19 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -98,7 +98,7 @@ struct arena_chunk_map_s { #ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* @@ -246,10 +246,10 @@ struct arena_bin_s { #ifdef JEMALLOC_PROF /* - * Offset of first (prof_cnt_t *) in a run header for this bin's size + * Offset of first (prof_ctx_t *) in a run header for this bin's size * class, or 0 if (opt_prof == false). */ - uint32_t cnt0_offset; + uint32_t ctx0_offset; #endif /* Offset of first region in a run for this bin's size class. */ @@ -438,8 +438,8 @@ size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); -void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); diff --git a/jemalloc/include/jemalloc/internal/extent.h b/jemalloc/include/jemalloc/internal/extent.h index 33a4e9a3..6fe9702b 100644 --- a/jemalloc/include/jemalloc/internal/extent.h +++ b/jemalloc/include/jemalloc/internal/extent.h @@ -19,7 +19,7 @@ struct extent_node_s { #ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ - prof_thr_cnt_t *prof_cnt; + prof_ctx_t *prof_ctx; #endif /* Pointer to the extent that this tree node is responsible for. */ diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h index 3cf32f75..0c0582f2 100644 --- a/jemalloc/include/jemalloc/internal/huge.h +++ b/jemalloc/include/jemalloc/internal/huge.h @@ -25,8 +25,8 @@ void *huge_ralloc(void *ptr, size_t size, size_t oldsize); void huge_dalloc(void *ptr); size_t huge_salloc(const void *ptr); #ifdef JEMALLOC_PROF -prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr); -void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif bool huge_boot(void); diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index 6e71552d..fb55fb90 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -98,6 +98,9 @@ struct prof_thr_cnt_s { }; struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + /* Protects cnt_merged and sets_ql. */ malloc_mutex_t lock; @@ -151,10 +154,10 @@ bool prof_init(prof_t *prof, bool master); void prof_destroy(prof_t *prof); prof_thr_cnt_t *prof_alloc_prep(size_t size); -prof_thr_cnt_t *prof_cnt_get(const void *ptr); +prof_ctx_t *prof_ctx_get(const void *ptr); void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt); + size_t old_size, prof_ctx_t *old_ctx); void prof_free(const void *ptr); void prof_idump(void); bool prof_mdump(const char *filename); diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index e74b4701..222ec25f 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -1198,7 +1198,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; #ifdef JEMALLOC_PROF - uint32_t try_cnt0_offset, good_cnt0_offset; + uint32_t try_ctx0_offset, good_ctx0_offset; #endif uint32_t try_reg0_offset, good_reg0_offset; @@ -1225,11 +1225,11 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; - /* Add space for one (prof_thr_cnt_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else - try_cnt0_offset = 0; + try_ctx0_offset = 0; #endif try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -1243,7 +1243,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; #ifdef JEMALLOC_PROF - good_cnt0_offset = try_cnt0_offset; + good_ctx0_offset = try_ctx0_offset; #endif good_reg0_offset = try_reg0_offset; @@ -1258,13 +1258,12 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) if (opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_cnt0_offset = try_hdr_size; + try_ctx0_offset = try_hdr_size; /* - * Add space for one (prof_thr_cnt_t *) per - * region. + * Add space for one (prof_ctx_t *) per region. */ try_hdr_size += try_nregs * - sizeof(prof_thr_cnt_t *); + sizeof(prof_ctx_t *); } #endif try_reg0_offset = try_run_size - (try_nregs * @@ -1282,7 +1281,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) bin->run_size = good_run_size; bin->nregs = good_nregs; #ifdef JEMALLOC_PROF - bin->cnt0_offset = good_cnt0_offset; + bin->ctx0_offset = good_ctx0_offset; #endif bin->reg0_offset = good_reg0_offset; @@ -1639,10 +1638,10 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, return (regind); } -prof_thr_cnt_t * -arena_prof_cnt_get(const void *ptr) +prof_ctx_t * +arena_prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1655,7 +1654,7 @@ arena_prof_cnt_get(const void *ptr) assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) - ret = (prof_thr_cnt_t *)(uintptr_t)1U; + ret = (prof_ctx_t *)(uintptr_t)1U; else { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << @@ -1665,18 +1664,18 @@ arena_prof_cnt_get(const void *ptr) assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin, ptr, bin->reg_size); - ret = *(prof_thr_cnt_t **)((uintptr_t)run + - bin->cnt0_offset + (regind * - sizeof(prof_thr_cnt_t *))); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind].prof_cnt; + ret = chunk->map[pageind].prof_ctx; return (ret); } void -arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1699,12 +1698,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin, ptr, bin->reg_size); - *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset - + (regind * sizeof(prof_thr_cnt_t *)))) = cnt; + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; } else - assert((uintptr_t)cnt == (uintptr_t)1U); + assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind].prof_cnt = cnt; + chunk->map[pageind].prof_ctx = ctx; } #endif diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c index d35aa5cd..49962ea0 100644 --- a/jemalloc/src/huge.c +++ b/jemalloc/src/huge.c @@ -241,10 +241,10 @@ huge_salloc(const void *ptr) } #ifdef JEMALLOC_PROF -prof_thr_cnt_t * -huge_prof_cnt_get(const void *ptr) +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; extent_node_t *node, key; malloc_mutex_lock(&huge_mtx); @@ -254,7 +254,7 @@ huge_prof_cnt_get(const void *ptr) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - ret = node->prof_cnt; + ret = node->prof_ctx; malloc_mutex_unlock(&huge_mtx); @@ -262,7 +262,7 @@ huge_prof_cnt_get(const void *ptr) } void -huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { extent_node_t *node, key; @@ -273,7 +273,7 @@ huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) node = extent_tree_ad_search(&huge, &key); assert(node != NULL); - node->prof_cnt = cnt; + node->prof_ctx = ctx; malloc_mutex_unlock(&huge_mtx); } diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index e01de0d5..aeab1408 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1060,7 +1060,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) void *ret; #ifdef JEMALLOC_PROF size_t old_size; - prof_thr_cnt_t *cnt, *old_cnt; + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; #endif if (size == 0) { @@ -1074,7 +1075,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); + old_ctx = prof_ctx_get(ptr); cnt = NULL; } #endif @@ -1083,7 +1084,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF else if (opt_prof) { old_size = 0; - old_cnt = NULL; + old_ctx = NULL; cnt = NULL; } #endif @@ -1100,7 +1101,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { old_size = isalloc(ptr); - old_cnt = prof_cnt_get(ptr); + old_ctx = prof_ctx_get(ptr); if ((cnt = prof_alloc_prep(size)) == NULL) { ret = NULL; goto OOM; @@ -1133,7 +1134,7 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { old_size = 0; - old_cnt = NULL; + old_ctx = NULL; } #endif if (malloc_init()) { @@ -1181,7 +1182,7 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof) - prof_realloc(ret, cnt, ptr, old_size, old_cnt); + prof_realloc(ret, cnt, ptr, old_size, old_ctx); #endif return (ret); } diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 6326188e..c13bc04f 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -48,7 +48,7 @@ static malloc_mutex_t bt2ctx_mtx; static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); /* - * Same contents as b2cnt, but initialized such that the TSD destructor is + * Same contents as b2cnt_tls, but initialized such that the TSD destructor is * called when a thread exits, so that bt2cnt_tls contents can be merged, * unlinked, and deallocated. */ @@ -100,7 +100,7 @@ static _Unwind_Reason_Code prof_unwind_callback( #endif static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); +static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); static bool prof_flush(bool propagate_err); static bool prof_write(const char *s, bool propagate_err); static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, @@ -450,6 +450,7 @@ prof_lookup(prof_bt_t *bt) return (NULL); } bt2cnt_tls = bt2cnt; + pthread_setspecific(bt2cnt_tsd, bt2cnt); } if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { @@ -475,6 +476,7 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx); return (NULL); } + ctx->bt = btkey; if (malloc_mutex_init(&ctx->lock)) { prof_leave(); idalloc(btkey); @@ -580,10 +582,10 @@ prof_alloc_prep(size_t size) return (ret); } -prof_thr_cnt_t * -prof_cnt_get(const void *ptr) +prof_ctx_t * +prof_ctx_get(const void *ptr) { - prof_thr_cnt_t *ret; + prof_ctx_t *ret; arena_chunk_t *chunk; assert(ptr != NULL); @@ -593,15 +595,15 @@ prof_cnt_get(const void *ptr) /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_prof_cnt_get(ptr); + ret = arena_prof_ctx_get(ptr); } else - ret = huge_prof_cnt_get(ptr); + ret = huge_prof_ctx_get(ptr); return (ret); } static void -prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; @@ -612,9 +614,9 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt) /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); - arena_prof_cnt_set(ptr, cnt); + arena_prof_ctx_set(ptr, ctx); } else - huge_prof_cnt_set(ptr, cnt); + huge_prof_ctx_set(ptr, ctx); } static inline void @@ -649,7 +651,7 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) assert(ptr != NULL); - prof_cnt_set(ptr, cnt); + prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); if ((uintptr_t)cnt > (uintptr_t)1U) { @@ -673,25 +675,43 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_thr_cnt_t *old_cnt) + size_t old_size, prof_ctx_t *old_ctx) { size_t size = isalloc(ptr); + prof_thr_cnt_t *told_cnt; if (ptr != NULL) { - prof_cnt_set(ptr, cnt); + prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); } - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_printf("XXX BANG A\n"); + malloc_mutex_lock(&old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(&old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ mb_write(); /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) { - old_cnt->cnts.curobjs--; - old_cnt->cnts.curbytes -= old_size; + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; @@ -702,8 +722,8 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, /*********/ mb_write(); /*********/ - if ((uintptr_t)old_cnt > (uintptr_t)1U) - old_cnt->epoch++; + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ @@ -713,24 +733,37 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, void prof_free(const void *ptr) { - prof_thr_cnt_t *cnt = prof_cnt_get(ptr); + prof_ctx_t *ctx = prof_ctx_get(ptr); - if ((uintptr_t)cnt > (uintptr_t)1) { + if ((uintptr_t)ctx > (uintptr_t)1) { size_t size = isalloc(ptr); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs--; - cnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_printf("XXX BANG B\n"); + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(&ctx->lock); + } } } From 8d4203c72de878c3976adc5db1e09d7ec0618d63 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Apr 2010 20:53:21 -0700 Subject: [PATCH 3/6] Fix arena chunk purge/dealloc race conditions. Fix arena_chunk_dealloc() to put the new spare in a consistent state before dropping the arena mutex to deallocate the previous spare. Fix arena_run_dalloc() to insert a newly dirtied chunk into the chunks_dirty list before potentially deallocating the chunk, so that dirty page accounting is self-consistent. --- jemalloc/src/arena.c | 56 ++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 222ec25f..e4142267 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -470,23 +470,6 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { arena_avail_tree_t *runs_avail; - while (arena->spare != NULL) { - arena_chunk_t *spare = arena->spare; - - arena->spare = NULL; - if (spare->dirtied) { - ql_remove(&chunk->arena->chunks_dirty, spare, - link_dirty); - arena->ndirty -= spare->ndirty; - } - malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); - malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.mapped -= chunksize; -#endif - } - /* * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. @@ -499,7 +482,23 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove(runs_avail, &chunk->map[arena_chunk_header_npages]); - arena->spare = chunk; + if (arena->spare != NULL) { + arena_chunk_t *spare = arena->spare; + + arena->spare = chunk; + if (spare->dirtied) { + ql_remove(&chunk->arena->chunks_dirty, spare, + link_dirty); + arena->ndirty -= spare->ndirty; + } + malloc_mutex_unlock(&arena->lock); + chunk_dealloc((void *)spare, chunksize); + malloc_mutex_lock(&arena->lock); +#ifdef JEMALLOC_STATS + arena->stats.mapped -= chunksize; +#endif + } else + arena->spare = chunk; } static arena_run_t * @@ -925,6 +924,18 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Insert into runs_avail, now that coalescing is complete. */ arena_avail_tree_insert(runs_avail, &chunk->map[run_ind]); + if (dirty) { + /* + * Insert into chunks_dirty before potentially calling + * arena_chunk_dealloc(), so that chunks_dirty and + * arena->ndirty are consistent. + */ + if (chunk->dirtied == false) { + ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = true; + } + } + /* * Deallocate chunk if it is now completely unused. The bit * manipulation checks whether the first run is unallocated and extends @@ -935,19 +946,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_dealloc(arena, chunk); /* - * It is okay to do dirty page processing even if the chunk was + * It is okay to do dirty page processing here even if the chunk was * deallocated above, since in that case it is the spare. Waiting * until after possible chunk deallocation to do dirty processing * allows for an old spare to be fully deallocated, thus decreasing the * chances of spuriously crossing the dirty page purging threshold. */ - if (dirty) { - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } + if (dirty) arena_maybe_purge(arena); - } } static void From 6d68ed6492e387804a63cff1705be9cf0d91211d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Apr 2010 22:01:55 -0700 Subject: [PATCH 4/6] Remove autom4te.cache in distclean (not relclean). --- jemalloc/Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index ce705887..ac9b7822 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -112,6 +112,7 @@ clean: rm -f $(DSOS) distclean: clean + rm -rf @objroot@autom4te.cache rm -f @objroot@config.log rm -f @objroot@config.status rm -f @objroot@cfghdrs.stamp @@ -120,7 +121,6 @@ distclean: clean rm -f @cfgoutputs_out@ relclean: distclean - rm -rf @objroot@autom4te.cache rm -f @objroot@configure rm -f @srcroot@VERSION From 38cda690ddd6c9db0321b06abaa4d19f884326b6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 14 Apr 2010 11:24:45 -0700 Subject: [PATCH 5/6] Fix profiling regression caused by bugfix. Properly set the context associated with each allocated object, even when the object is not sampled. Remove debug print code that slipped in. --- jemalloc/src/prof.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index c13bc04f..93904b8c 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -651,10 +651,11 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) assert(ptr != NULL); - prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + cnt->epoch++; /*********/ mb_write(); @@ -670,7 +671,8 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) /*********/ mb_write(); /*********/ - } + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); } void @@ -680,10 +682,8 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, size_t size = isalloc(ptr); prof_thr_cnt_t *told_cnt; - if (ptr != NULL) { - prof_ctx_set(ptr, cnt->ctx); + if (ptr != NULL) prof_sample_accum_update(size); - } if ((uintptr_t)old_ctx > (uintptr_t)1U) { told_cnt = prof_lookup(old_ctx->bt); @@ -692,7 +692,6 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ - malloc_printf("XXX BANG A\n"); malloc_mutex_lock(&old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; @@ -704,8 +703,11 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ @@ -758,7 +760,6 @@ prof_free(const void *ptr) * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ - malloc_printf("XXX BANG B\n"); malloc_mutex_lock(&ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; From 5055f4516c8852e67668b0e746863a7d6a1c148e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 14 Apr 2010 11:27:13 -0700 Subject: [PATCH 6/6] Fix tcache crash during thread cleanup. Properly maintain tcache_bin_t's avail pointer such that it is NULL if no objects are cached. This only caused problems during thread cache destruction, since cache flushing otherwise never occurs on an empty bin. --- jemalloc/src/tcache.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index ce6ec996..ace24cea 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -55,12 +55,14 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem { void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; + bool first_pass; assert(binind < nbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL; - flush = deferred, nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; @@ -110,12 +112,9 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&bin->lock); - if (flush != NULL) { - /* - * This was the first pass, and rem cached objects - * remain. - */ + if (first_pass) { tbin->avail = flush; + first_pass = false; } } @@ -133,12 +132,14 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem { void *flush, *deferred, *ptr; unsigned i, nflush, ndeferred; + bool first_pass; assert(binind < nhbins); assert(rem <= tbin->ncached); + assert(tbin->ncached > 0 || tbin->avail == NULL); - for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL; - flush = deferred, nflush = ndeferred) { + for (flush = tbin->avail, nflush = tbin->ncached - rem, first_pass = + true; flush != NULL; flush = deferred, nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush); arena_t *arena = chunk->arena; @@ -183,12 +184,9 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&arena->lock); - if (flush != NULL) { - /* - * This was the first pass, and rem cached objects - * remain. - */ + if (first_pass) { tbin->avail = flush; + first_pass = false; } }