diff --git a/ChangeLog b/ChangeLog index fd9ee545..66032b26 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,20 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.3 (August 31, 2011) + + This version fixes numerous bugs related to heap profiling. + + Bug fixes: + - Fix a prof-related race condition. This bug could cause memory corruption, + but only occurred in non-default configurations (prof_accum:false). + - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is + excluded from backtraces). + - Fix a prof-related bug in realloc() (only triggered by OOM errors). + - Fix prof-related bugs in allocm() and rallocm(). + - Fix prof_tdata_cleanup() for --disable-tls builds. + - Fix a relative include path, to fix objdir builds. + * 2.2.2 (July 30, 2011) Bug fixes: diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 13f3aae0..7a32879a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2025,7 +2025,7 @@ malloc_conf = "xmalloc:true";]]> swap.fds (int *) - r- + rw [] When written to, the files associated with the diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e9d60da7..a44f0978 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -33,7 +33,7 @@ #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" -#include "private_namespace.h" +#include "jemalloc/internal/private_namespace.h" #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 2cfb1716..d4f5f96d 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -145,7 +145,6 @@ #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) #define pow2_ceil JEMALLOC_N(pow2_ceil) -#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) #define prof_boot1 JEMALLOC_N(prof_boot1) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f9438735..e9064ba6 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -227,9 +227,60 @@ bool prof_boot2(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = PROF_TCACHE_GET(); \ + if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_init(); \ + if (prof_tdata == NULL) { \ + ret = NULL; \ + break; \ + } \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prn_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore, prof_bt_max); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + #ifndef JEMALLOC_ENABLE_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); @@ -272,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) + (uint64_t)1U; } -JEMALLOC_INLINE prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ -#ifdef JEMALLOC_ENABLE_INLINE - /* This function does not have its own stack frame, because it is inlined. */ -# define NIGNORE 1 -#else -# define NIGNORE 2 -#endif - prof_thr_cnt_t *ret; - prof_tdata_t *prof_tdata; - prof_bt_t bt; - - assert(size == s2u(size)); - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_tdata->threshold == 0) { - /* - * Initialize. Seed the prng differently for each - * thread. - */ - prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(prof_tdata); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_tdata->threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -#undef NIGNORE -} - JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { @@ -415,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the size passed to - * prof_alloc_prep() and prof_malloc(). + * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } @@ -459,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, if (prof_sample_accum_update(size)) { /* * Don't sample. The size passed to - * prof_alloc_prep() was larger than what + * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual size was insufficient to cross diff --git a/src/arena.c b/src/arena.c index e00dccc3..e749c1d5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1657,6 +1657,7 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); + assert(size <= small_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; diff --git a/src/jemalloc.c b/src/jemalloc.c index e2875169..fd8bf52f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -84,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); +static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ @@ -939,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto OOM; } @@ -988,9 +990,15 @@ RETURN: } JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_ATTR(noinline) +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size) { int ret; size_t usize @@ -1057,7 +1065,8 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(2, usize, cnt); + if (cnt == NULL) { result = NULL; ret = EINVAL; } else { @@ -1110,6 +1119,15 @@ RETURN: return (ret); } +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + + return imemalign(memptr, alignment, size); +} + JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * @@ -1165,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(num_size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { ret = NULL; goto RETURN; } @@ -1278,7 +1297,9 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(usize)) == NULL) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { + old_ctx = NULL; ret = NULL; goto OOM; } @@ -1288,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) false, false); if (ret != NULL) arena_prof_promoted(ret, usize); - } else + else + old_ctx = NULL; + } else { ret = iralloc(ptr, size, 0, 0, false, false); + if (ret == NULL) + old_ctx = NULL; + } } else #endif { @@ -1327,7 +1353,8 @@ OOM: #ifdef JEMALLOC_PROF if (opt_prof) { usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != @@ -1432,7 +1459,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + imemalign(&ret, alignment, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1451,7 +1478,7 @@ JEMALLOC_P(valloc)(size_t size) #ifdef JEMALLOC_CC_SILENCE int result = #endif - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size); #ifdef JEMALLOC_CC_SILENCE if (result != 0) return (NULL); @@ -1566,14 +1593,14 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto OOM; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, - NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); if (usize == 0) goto OOM; #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { @@ -1590,7 +1617,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (p == NULL) goto OOM; } - + prof_malloc(p, usize, cnt); if (rsize != NULL) *rsize = usize; } else @@ -1645,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; #endif assert(ptr != NULL); @@ -1660,25 +1686,33 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a + * use that in PROF_ALLOC_PREP() to decide whether to capture a * backtrace. prof_realloc() will use the actual usize to * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); + prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p); - old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(max_usize)) == NULL) + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize - <= small_maxclass) { + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL)) <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; - usize = isalloc(q); - arena_prof_promoted(q, usize); + if (max_usize < PAGE_SIZE) { + usize = max_usize; + arena_prof_promoted(q, usize); + } else + usize = isalloc(q); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) diff --git a/src/prof.c b/src/prof.c index 65493759..8a144b4e 100644 --- a/src/prof.c +++ b/src/prof.c @@ -474,11 +474,23 @@ prof_lookup(prof_bt_t *bt) /* * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). + * + * No locking is necessary for ctx here because no other + * threads have had the opportunity to fetch it from + * bt2ctx yet. */ ctx.p->cnt_merged.curobjs++; new_ctx = true; - } else + } else { + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs++; + malloc_mutex_unlock(&ctx.p->lock); new_ctx = false; + } prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -491,8 +503,9 @@ prof_lookup(prof_bt_t *bt) */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); - ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, - NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + assert(false); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ @@ -503,11 +516,8 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -518,19 +528,15 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } + if (new_ctx) + prof_ctx_destroy(ctx.p); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - if (new_ctx) - ctx.p->cnt_merged.curobjs--; + ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -644,11 +650,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function, and prof_ctx_merge() - * artificially raises ctx->cnt_merged.curobjs in order to avoid a race - * between the main body of prof_ctx_merge() and entry into this - * function. + * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in + * order to avoid a race condition with this function, as does + * prof_ctx_merge() in order to avoid a race between the main body of + * prof_ctx_merge() and entry into this function. */ prof_enter(); malloc_mutex_lock(&ctx->lock); @@ -657,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ - ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + assert(false); prof_leave(); /* Destroy ctx. */ malloc_mutex_unlock(&ctx->lock); @@ -665,7 +671,10 @@ prof_ctx_destroy(prof_ctx_t *ctx) malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { - /* Compensate for increment in prof_ctx_merge(). */ + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ ctx->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx->lock); prof_leave(); @@ -1109,7 +1118,6 @@ prof_tdata_init(void) prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); if (prof_tdata->vec == NULL) { - ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); @@ -1127,33 +1135,26 @@ prof_tdata_init(void) static void prof_tdata_cleanup(void *arg) { - prof_tdata_t *prof_tdata; + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata != NULL) { - prof_thr_cnt_t *cnt; + /* + * Delete the hash table. All of its contents can still be iterated + * over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - idalloc(cnt); - } - - idalloc(prof_tdata->vec); - - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + /* Iteratively merge cnt's into the global stats and delete them. */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); } void