From 04ca1efe35349a6114523b37abbd4ca066cd17fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 30 Jul 2011 17:58:07 -0700 Subject: [PATCH 01/11] Adjust relative #include for private_namespace.h. --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e9d60da7..a44f0978 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -33,7 +33,7 @@ #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" -#include "private_namespace.h" +#include "jemalloc/internal/private_namespace.h" #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include From 41b954ed36f90e5a479bbe2118b5ce85189a55ee Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 8 Aug 2011 17:10:07 -0700 Subject: [PATCH 02/11] Use prof_tdata_cleanup() argument. Use the argument to prof_tdata_cleanup(), rather than calling PROF_TCACHE_GET(). This fixes a bug in the NO_TLS case. --- src/prof.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/src/prof.c b/src/prof.c index 65493759..a268e585 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1109,7 +1109,6 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1127,33 +1126,29 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_tdata_t *prof_tdata; + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata != NULL) { - prof_thr_cnt_t *cnt; + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - idalloc(cnt); - } - - idalloc(prof_tdata->vec); - - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + idalloc(cnt); } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } void From 0cdd42eb3204cdd2646561c90ec202716cd3c344 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Aug 2011 19:06:06 -0700 Subject: [PATCH 03/11] Clean up prof-related comments. Clean up some prof-related comments to more accurately reflect how the code works. Simplify OOM handling code in a couple of prof-related error paths. --- src/prof.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/src/prof.c b/src/prof.c index a268e585..dea6d11c 100644 --- a/src/prof.c +++ b/src/prof.c @@ -503,11 +503,8 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -518,11 +515,8 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); idalloc(ret.v); return (NULL); } @@ -644,11 +638,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function, and prof_ctx_merge() - * artificially raises ctx->cnt_merged.curobjs in order to avoid a race - * between the main body of prof_ctx_merge() and entry into this - * function. + * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in + * order to avoid a race condition with this function, as does + * prof_ctx_merge() in order to avoid a race between the main body of + * prof_ctx_merge() and entry into this function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -665,7 +658,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* Compensate for increment in prof_ctx_merge(). */ + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1130,18 +1126,15 @@ prof_tdata_cleanup(void *arg) prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. + * Delete the hash table. All of its contents can still be iterated + * over via the LRU. */ ckh_delete(&prof_tdata->bt2cnt); - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ + /* Iteratively merge cnt's into the global stats and delete them. */ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); idalloc(cnt); } From 183ba50c1940a95080f6cf890ae4ae40200301e7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 11 Aug 2011 22:51:00 -0700 Subject: [PATCH 04/11] Fix two prof-related bugs in rallocm(). Properly handle boundary conditions for sampled region promotion in rallocm(). Prior to this fix, some combinations of 'size' and 'extra' values could cause erroneous behavior. Additionally, size class recording for promoted regions was incorrect. --- src/arena.c | 1 + src/jemalloc.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/arena.c b/src/arena.c index e00dccc3..e749c1d5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1657,6 +1657,7 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); + assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; diff --git a/src/jemalloc.c b/src/jemalloc.c index e2875169..afba0e1d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1670,15 +1670,22 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, old_ctx = prof_ctx_get(p); if ((cnt = prof_alloc_prep(max_usize)) == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize - <= small_maxclass) { + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL)) <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; usize = isalloc(q); - arena_prof_promoted(q, usize); + if (max_usize < PAGE_SIZE) { + usize = max_usize; + arena_prof_promoted(q, usize); + } } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) From b493ce22a4b545c17d5942d0fc65f413dd97743a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 11:28:47 -0700 Subject: [PATCH 05/11] Conditionalize an isalloc() call in rallocm(). Conditionalize an isalloc() call in rallocm() that be unnecessary. --- src/jemalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index afba0e1d..4d10e90a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1681,11 +1681,11 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, alignment, zero, no_move); if (q == NULL) goto ERR; - usize = isalloc(q); if (max_usize < PAGE_SIZE) { usize = max_usize; arena_prof_promoted(q, usize); - } + } else + usize = isalloc(q); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) From 745e30b157f700d848cb5ed38eb6c17203760b7c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 11:40:55 -0700 Subject: [PATCH 06/11] Document swap.fds mallctl as read-write. Fix the manual page to document the swap.fds mallctl as read-write, rather than read-only. --- doc/jemalloc.xml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 13f3aae0..7a32879a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2025,7 +2025,7 @@ malloc_conf = "xmalloc:true";]]> swap.fds (int *) - r- + rw [] When written to, the files associated with the From a507004d294ad0c78b4d01559479620ebb272a49 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 13:48:27 -0700 Subject: [PATCH 07/11] Fix off-by-one backtracing issues. Rewrite prof_alloc_prep() as a cpp macro, PROF_ALLOC_PREP(), in order to remove any doubt as to whether an additional stack frame is created. Prior to this change, it was assumed that inlining would reduce the total number of frames in the backtrace, but in practice behavior wasn't completely predictable. Create imemalign() and call it from posix_memalign(), memalign(), and valloc(), so that all entry points require the same number of stack frames to be ignored during backtracing. --- include/jemalloc/internal/private_namespace.h | 1 - include/jemalloc/internal/prof.h | 122 ++++++++---------- src/jemalloc.c | 49 +++++-- 3 files changed, 90 insertions(+), 82 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 2cfb1716..d4f5f96d 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -145,7 +145,6 @@ #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) #define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) #define prof_boot1 JEMALLOC_N(prof_boot1) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f9438735..e9064ba6 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -227,9 +227,60 @@ bool prof_boot2(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = PROF_TCACHE_GET(); \ + if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_init(); \ + if (prof_tdata == NULL) { \ + ret = NULL; \ + break; \ + } \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prn_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + #ifndef JEMALLOC_ENABLE_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); @@ -272,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) + (uint64_t)1U; } -JEMALLOC_INLINE prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ -#ifdef JEMALLOC_ENABLE_INLINE - /* This function does not have its own stack frame, because it is inlined. */ -# define NIGNORE 1 -#else -# define NIGNORE 2 -#endif - prof_thr_cnt_t *ret; - prof_tdata_t *prof_tdata; - prof_bt_t bt; - - assert(size == s2u(size)); - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_tdata->threshold == 0) { - /* - * Initialize. Seed the prng differently for each - * thread. - */ - prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(prof_tdata); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_tdata->threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -#undef NIGNORE -} - JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -415,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the size passed to - * prof_alloc_prep() and prof_malloc(). + * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } @@ -459,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if (prof_sample_accum_update(size)) { /* * Don't sample. The size passed to - * prof_alloc_prep() was larger than what + * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross diff --git a/src/jemalloc.c b/src/jemalloc.c index 4d10e90a..14a0c7c2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -84,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); +static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -939,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -988,9 +990,15 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_ATTR(noinline) +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size) { int ret; size_t usize @@ -1057,7 +1065,8 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(2, usize, cnt); + if (cnt == NULL) { result = NULL; ret = EINVAL; } else { @@ -1110,6 +1119,15 @@ RETURN: return (ret); } +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + + return imemalign(memptr, alignment, size); +} + JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1165,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto RETURN; } @@ -1278,7 +1297,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -1327,7 +1347,8 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1432,7 +1453,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + imemalign(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1451,7 +1472,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1573,7 +1594,8 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { @@ -1660,7 +1682,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a + * use that in PROF_ALLOC_PREP() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ @@ -1668,7 +1690,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, sa2u(size+extra, alignment, NULL); old_size = isalloc(p); old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(max_usize)) == NULL) + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) goto OOM; /* * Use minimum usize to determine whether promotion may happen. From 749c2a0ab62089891d8e40840fbf384f12cb8401 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 12 Aug 2011 18:37:54 -0700 Subject: [PATCH 08/11] Add missing prof_malloc() call in allocm(). Add a missing prof_malloc() call in allocm(). Before this fix, negative object/byte counts could be observed in heap profiles for applications that use allocm(). --- src/jemalloc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 14a0c7c2..b13b1bf9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1587,8 +1587,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, - NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); if (usize == 0) goto OOM; @@ -1612,7 +1611,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (p == NULL) goto OOM; } - + prof_malloc(p, usize, cnt); if (rsize != NULL) *rsize = usize; } else From 46405e670f9b4831da9c24c15f0f3a537ef2606b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 30 Aug 2011 23:37:29 -0700 Subject: [PATCH 09/11] Fix a prof-related bug in realloc(). Fix realloc() such that it only records the object passed in as freed if no OOM error occurs. --- src/jemalloc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index b13b1bf9..fd8bf52f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1299,6 +1299,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { + old_ctx = NULL; ret = NULL; goto OOM; } @@ -1308,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - } else + else + old_ctx = NULL; + } else { ret = iralloc(ptr, size, 0, 0, false, false); + if (ret == NULL) + old_ctx = NULL; + } } else #endif { @@ -1666,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1687,8 +1692,8 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); + prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - old_ctx = prof_ctx_get(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) goto OOM; From a9076c9483a8efcb216b9f1303bf310f80ee3401 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 30 Aug 2011 23:40:11 -0700 Subject: [PATCH 10/11] Fix a prof-related race condition. Fix prof_lookup() to artificially raise curobjs for all paths through the code that creates a new entry in the per thread bt2cnt hash table. This fixes a race condition that could corrupt memory if prof_accum were false, and a non-default lg_prof_tcmax were used and/or threads were destroyed. --- src/prof.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/prof.c b/src/prof.c index dea6d11c..8a144b4e 100644 --- a/src/prof.c +++ b/src/prof.c @@ -474,11 +474,23 @@ prof_lookup(prof_bt_t *bt) /* * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). + * + * No locking is necessary for ctx here because no other + * threads have had the opportunity to fetch it from + * bt2ctx yet. */ ctx.p->cnt_merged.curobjs++; new_ctx = true; - } else + } else { + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs++; + malloc_mutex_unlock(&ctx.p->lock); new_ctx = false; + } prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -491,8 +503,9 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, - NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + assert(false); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -523,8 +536,7 @@ prof_lookup(prof_bt_t *bt) ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - if (new_ctx) - ctx.p->cnt_merged.curobjs--; + ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -650,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + assert(false); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); From c67e4fdc712aa5b818d69b7ef8e3963441febb16 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 31 Aug 2011 15:19:13 -0700 Subject: [PATCH 11/11] Update ChangeLog for 2.2.3. --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index fd9ee545..66032b26 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,20 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.3 (August 31, 2011) + + This version fixes numerous bugs related to heap profiling. + + Bug fixes: + - Fix a prof-related race condition. This bug could cause memory corruption, + but only occurred in non-default configurations (prof_accum:false). + - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is + excluded from backtraces). + - Fix a prof-related bug in realloc() (only triggered by OOM errors). + - Fix prof-related bugs in allocm() and rallocm(). + - Fix prof_tdata_cleanup() for --disable-tls builds. + - Fix a relative include path, to fix objdir builds. + * 2.2.2 (July 30, 2011) Bug fixes: