diff --git a/ChangeLog b/ChangeLog
index fd9ee545..66032b26 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,20 @@ found in the git revision history:
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
git://canonware.com/jemalloc.git
+* 2.2.3 (August 31, 2011)
+
+ This version fixes numerous bugs related to heap profiling.
+
+ Bug fixes:
+ - Fix a prof-related race condition. This bug could cause memory corruption,
+ but only occurred in non-default configurations (prof_accum:false).
+ - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is
+ excluded from backtraces).
+ - Fix a prof-related bug in realloc() (only triggered by OOM errors).
+ - Fix prof-related bugs in allocm() and rallocm().
+ - Fix prof_tdata_cleanup() for --disable-tls builds.
+ - Fix a relative include path, to fix objdir builds.
+
* 2.2.2 (July 30, 2011)
Bug fixes:
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 13f3aae0..7a32879a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2025,7 +2025,7 @@ malloc_conf = "xmalloc:true";]]>
swap.fds
(int *)
- r-
+ rw
[]
When written to, the files associated with the
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e9d60da7..a44f0978 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -33,7 +33,7 @@
#define JEMALLOC_MANGLE
#include "../jemalloc@install_suffix@.h"
-#include "private_namespace.h"
+#include "jemalloc/internal/private_namespace.h"
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
#include
diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h
index 2cfb1716..d4f5f96d 100644
--- a/include/jemalloc/internal/private_namespace.h
+++ b/include/jemalloc/internal/private_namespace.h
@@ -145,7 +145,6 @@
#define malloc_write JEMALLOC_N(malloc_write)
#define mb_write JEMALLOC_N(mb_write)
#define pow2_ceil JEMALLOC_N(pow2_ceil)
-#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep)
#define prof_backtrace JEMALLOC_N(prof_backtrace)
#define prof_boot0 JEMALLOC_N(prof_boot0)
#define prof_boot1 JEMALLOC_N(prof_boot1)
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index f9438735..e9064ba6 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -227,9 +227,60 @@ bool prof_boot2(void);
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#define PROF_ALLOC_PREP(nignore, size, ret) do { \
+ prof_tdata_t *prof_tdata; \
+ prof_bt_t bt; \
+ \
+ assert(size == s2u(size)); \
+ \
+ prof_tdata = PROF_TCACHE_GET(); \
+ if (prof_tdata == NULL) { \
+ prof_tdata = prof_tdata_init(); \
+ if (prof_tdata == NULL) { \
+ ret = NULL; \
+ break; \
+ } \
+ } \
+ \
+ if (opt_prof_active == false) { \
+ /* Sampling is currently inactive, so avoid sampling. */\
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
+ } else if (opt_lg_prof_sample == 0) { \
+ /* Don't bother with sampling logic, since sampling */\
+ /* interval is 1. */\
+ bt_init(&bt, prof_tdata->vec); \
+ prof_backtrace(&bt, nignore, prof_bt_max); \
+ ret = prof_lookup(&bt); \
+ } else { \
+ if (prof_tdata->threshold == 0) { \
+ /* Initialize. Seed the prng differently for */\
+ /* each thread. */\
+ prof_tdata->prn_state = \
+ (uint64_t)(uintptr_t)&size; \
+ prof_sample_threshold_update(prof_tdata); \
+ } \
+ \
+ /* Determine whether to capture a backtrace based on */\
+ /* whether size is enough for prof_accum to reach */\
+ /* prof_tdata->threshold. However, delay updating */\
+ /* these variables until prof_{m,re}alloc(), because */\
+ /* we don't know for sure that the allocation will */\
+ /* succeed. */\
+ /* */\
+ /* Use subtraction rather than addition to avoid */\
+ /* potential integer overflow. */\
+ if (size >= prof_tdata->threshold - \
+ prof_tdata->accum) { \
+ bt_init(&bt, prof_tdata->vec); \
+ prof_backtrace(&bt, nignore, prof_bt_max); \
+ ret = prof_lookup(&bt); \
+ } else \
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
+ } \
+} while (0)
+
#ifndef JEMALLOC_ENABLE_INLINE
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
bool prof_sample_accum_update(size_t size);
@@ -272,71 +323,6 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+ (uint64_t)1U;
}
-JEMALLOC_INLINE prof_thr_cnt_t *
-prof_alloc_prep(size_t size)
-{
-#ifdef JEMALLOC_ENABLE_INLINE
- /* This function does not have its own stack frame, because it is inlined. */
-# define NIGNORE 1
-#else
-# define NIGNORE 2
-#endif
- prof_thr_cnt_t *ret;
- prof_tdata_t *prof_tdata;
- prof_bt_t bt;
-
- assert(size == s2u(size));
-
- prof_tdata = PROF_TCACHE_GET();
- if (prof_tdata == NULL) {
- prof_tdata = prof_tdata_init();
- if (prof_tdata == NULL)
- return (NULL);
- }
-
- if (opt_prof_active == false) {
- /* Sampling is currently inactive, so avoid sampling. */
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- } else if (opt_lg_prof_sample == 0) {
- /*
- * Don't bother with sampling logic, since sampling interval is
- * 1.
- */
- bt_init(&bt, prof_tdata->vec);
- prof_backtrace(&bt, NIGNORE, prof_bt_max);
- ret = prof_lookup(&bt);
- } else {
- if (prof_tdata->threshold == 0) {
- /*
- * Initialize. Seed the prng differently for each
- * thread.
- */
- prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
- prof_sample_threshold_update(prof_tdata);
- }
-
- /*
- * Determine whether to capture a backtrace based on whether
- * size is enough for prof_accum to reach
- * prof_tdata->threshold. However, delay updating these
- * variables until prof_{m,re}alloc(), because we don't know
- * for sure that the allocation will succeed.
- *
- * Use subtraction rather than addition to avoid potential
- * integer overflow.
- */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- bt_init(&bt, prof_tdata->vec);
- prof_backtrace(&bt, NIGNORE, prof_bt_max);
- ret = prof_lookup(&bt);
- } else
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
-
- return (ret);
-#undef NIGNORE
-}
-
JEMALLOC_INLINE prof_ctx_t *
prof_ctx_get(const void *ptr)
{
@@ -415,7 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
* always possible to tell in advance how large an
* object's usable size will be, so there should never
* be a difference between the size passed to
- * prof_alloc_prep() and prof_malloc().
+ * PROF_ALLOC_PREP() and prof_malloc().
*/
assert((uintptr_t)cnt == (uintptr_t)1U);
}
@@ -459,7 +445,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
if (prof_sample_accum_update(size)) {
/*
* Don't sample. The size passed to
- * prof_alloc_prep() was larger than what
+ * PROF_ALLOC_PREP() was larger than what
* actually got allocated, so a backtrace was
* captured for this allocation, even though
* its actual size was insufficient to cross
diff --git a/src/arena.c b/src/arena.c
index e00dccc3..e749c1d5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1657,6 +1657,7 @@ arena_prof_promoted(const void *ptr, size_t size)
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
assert(isalloc(ptr) == PAGE_SIZE);
+ assert(size <= small_maxclass);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2875169..fd8bf52f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -84,6 +84,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen,
const char *v, size_t vlen);
static void malloc_conf_init(void);
static bool malloc_init_hard(void);
+static int imemalign(void **memptr, size_t alignment, size_t size);
/******************************************************************************/
/* malloc_message() setup. */
@@ -939,7 +940,8 @@ JEMALLOC_P(malloc)(size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(size);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
ret = NULL;
goto OOM;
}
@@ -988,9 +990,15 @@ RETURN:
}
JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
-int
-JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+#ifdef JEMALLOC_PROF
+/*
+ * Avoid any uncertainty as to how many backtrace frames to ignore in
+ * PROF_ALLOC_PREP().
+ */
+JEMALLOC_ATTR(noinline)
+#endif
+static int
+imemalign(void **memptr, size_t alignment, size_t size)
{
int ret;
size_t usize
@@ -1057,7 +1065,8 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(2, usize, cnt);
+ if (cnt == NULL) {
result = NULL;
ret = EINVAL;
} else {
@@ -1110,6 +1119,15 @@ RETURN:
return (ret);
}
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+{
+
+ return imemalign(memptr, alignment, size);
+}
+
JEMALLOC_ATTR(malloc)
JEMALLOC_ATTR(visibility("default"))
void *
@@ -1165,7 +1183,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(num_size);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
ret = NULL;
goto RETURN;
}
@@ -1278,7 +1297,9 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
if (opt_prof) {
usize = s2u(size);
old_ctx = prof_ctx_get(ptr);
- if ((cnt = prof_alloc_prep(usize)) == NULL) {
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL) {
+ old_ctx = NULL;
ret = NULL;
goto OOM;
}
@@ -1288,8 +1309,13 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
false, false);
if (ret != NULL)
arena_prof_promoted(ret, usize);
- } else
+ else
+ old_ctx = NULL;
+ } else {
ret = iralloc(ptr, size, 0, 0, false, false);
+ if (ret == NULL)
+ old_ctx = NULL;
+ }
} else
#endif
{
@@ -1327,7 +1353,8 @@ OOM:
#ifdef JEMALLOC_PROF
if (opt_prof) {
usize = s2u(size);
- if ((cnt = prof_alloc_prep(usize)) == NULL)
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL)
ret = NULL;
else {
if (prof_promote && (uintptr_t)cnt !=
@@ -1432,7 +1459,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size)
#ifdef JEMALLOC_CC_SILENCE
int result =
#endif
- JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+ imemalign(&ret, alignment, size);
#ifdef JEMALLOC_CC_SILENCE
if (result != 0)
return (NULL);
@@ -1451,7 +1478,7 @@ JEMALLOC_P(valloc)(size_t size)
#ifdef JEMALLOC_CC_SILENCE
int result =
#endif
- JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+ imemalign(&ret, PAGE_SIZE, size);
#ifdef JEMALLOC_CC_SILENCE
if (result != 0)
return (NULL);
@@ -1566,14 +1593,14 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
if (malloc_init())
goto OOM;
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
- NULL);
+ usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
if (usize == 0)
goto OOM;
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(usize)) == NULL)
+ PROF_ALLOC_PREP(1, usize, cnt);
+ if (cnt == NULL)
goto OOM;
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) {
@@ -1590,7 +1617,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
if (p == NULL)
goto OOM;
}
-
+ prof_malloc(p, usize, cnt);
if (rsize != NULL)
*rsize = usize;
} else
@@ -1645,7 +1672,6 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
bool no_move = flags & ALLOCM_NO_MOVE;
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt;
- prof_ctx_t *old_ctx;
#endif
assert(ptr != NULL);
@@ -1660,25 +1686,33 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
/*
* usize isn't knowable before iralloc() returns when extra is
* non-zero. Therefore, compute its maximum possible value and
- * use that in prof_alloc_prep() to decide whether to capture a
+ * use that in PROF_ALLOC_PREP() to decide whether to capture a
* backtrace. prof_realloc() will use the actual usize to
* decide whether to sample.
*/
size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment, NULL);
+ prof_ctx_t *old_ctx = prof_ctx_get(p);
old_size = isalloc(p);
- old_ctx = prof_ctx_get(p);
- if ((cnt = prof_alloc_prep(max_usize)) == NULL)
+ PROF_ALLOC_PREP(1, max_usize, cnt);
+ if (cnt == NULL)
goto OOM;
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
- <= small_maxclass) {
+ /*
+ * Use minimum usize to determine whether promotion may happen.
+ */
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
+ && ((alignment == 0) ? s2u(size) : sa2u(size,
+ alignment, NULL)) <= small_maxclass) {
q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
size+extra) ? 0 : size+extra - (small_maxclass+1),
alignment, zero, no_move);
if (q == NULL)
goto ERR;
- usize = isalloc(q);
- arena_prof_promoted(q, usize);
+ if (max_usize < PAGE_SIZE) {
+ usize = max_usize;
+ arena_prof_promoted(q, usize);
+ } else
+ usize = isalloc(q);
} else {
q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL)
diff --git a/src/prof.c b/src/prof.c
index 65493759..8a144b4e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -474,11 +474,23 @@ prof_lookup(prof_bt_t *bt)
/*
* Artificially raise curobjs, in order to avoid a race
* condition with prof_ctx_merge()/prof_ctx_destroy().
+ *
+ * No locking is necessary for ctx here because no other
+ * threads have had the opportunity to fetch it from
+ * bt2ctx yet.
*/
ctx.p->cnt_merged.curobjs++;
new_ctx = true;
- } else
+ } else {
+ /*
+ * Artificially raise curobjs, in order to avoid a race
+ * condition with prof_ctx_merge()/prof_ctx_destroy().
+ */
+ malloc_mutex_lock(&ctx.p->lock);
+ ctx.p->cnt_merged.curobjs++;
+ malloc_mutex_unlock(&ctx.p->lock);
new_ctx = false;
+ }
prof_leave();
/* Link a prof_thd_cnt_t into ctx for this thread. */
@@ -491,8 +503,9 @@ prof_lookup(prof_bt_t *bt)
*/
ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
assert(ret.v != NULL);
- ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
- NULL);
+ if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
+ NULL, NULL))
+ assert(false);
ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
prof_ctx_merge(ret.p->ctx, ret.p);
/* ret can now be re-used. */
@@ -503,11 +516,8 @@ prof_lookup(prof_bt_t *bt)
/* Allocate and partially initialize a new cnt. */
ret.v = imalloc(sizeof(prof_thr_cnt_t));
if (ret.p == NULL) {
- if (new_ctx) {
- malloc_mutex_lock(&ctx.p->lock);
- ctx.p->cnt_merged.curobjs--;
- malloc_mutex_unlock(&ctx.p->lock);
- }
+ if (new_ctx)
+ prof_ctx_destroy(ctx.p);
return (NULL);
}
ql_elm_new(ret.p, cnts_link);
@@ -518,19 +528,15 @@ prof_lookup(prof_bt_t *bt)
ret.p->epoch = 0;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
- if (new_ctx) {
- malloc_mutex_lock(&ctx.p->lock);
- ctx.p->cnt_merged.curobjs--;
- malloc_mutex_unlock(&ctx.p->lock);
- }
+ if (new_ctx)
+ prof_ctx_destroy(ctx.p);
idalloc(ret.v);
return (NULL);
}
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
malloc_mutex_lock(&ctx.p->lock);
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
- if (new_ctx)
- ctx.p->cnt_merged.curobjs--;
+ ctx.p->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx.p->lock);
} else {
/* Move ret to the front of the LRU. */
@@ -644,11 +650,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
/*
* Check that ctx is still unused by any thread cache before destroying
- * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
- * avoid a race condition with this function, and prof_ctx_merge()
- * artificially raises ctx->cnt_merged.curobjs in order to avoid a race
- * between the main body of prof_ctx_merge() and entry into this
- * function.
+ * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in
+ * order to avoid a race condition with this function, as does
+ * prof_ctx_merge() in order to avoid a race between the main body of
+ * prof_ctx_merge() and entry into this function.
*/
prof_enter();
malloc_mutex_lock(&ctx->lock);
@@ -657,7 +662,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
assert(ctx->cnt_merged.accumobjs == 0);
assert(ctx->cnt_merged.accumbytes == 0);
/* Remove ctx from bt2ctx. */
- ckh_remove(&bt2ctx, ctx->bt, NULL, NULL);
+ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+ assert(false);
prof_leave();
/* Destroy ctx. */
malloc_mutex_unlock(&ctx->lock);
@@ -665,7 +671,10 @@ prof_ctx_destroy(prof_ctx_t *ctx)
malloc_mutex_destroy(&ctx->lock);
idalloc(ctx);
} else {
- /* Compensate for increment in prof_ctx_merge(). */
+ /*
+ * Compensate for increment in prof_ctx_merge() or
+ * prof_lookup().
+ */
ctx->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx->lock);
prof_leave();
@@ -1109,7 +1118,6 @@ prof_tdata_init(void)
prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
if (prof_tdata->vec == NULL) {
-
ckh_delete(&prof_tdata->bt2cnt);
idalloc(prof_tdata);
return (NULL);
@@ -1127,33 +1135,26 @@ prof_tdata_init(void)
static void
prof_tdata_cleanup(void *arg)
{
- prof_tdata_t *prof_tdata;
+ prof_thr_cnt_t *cnt;
+ prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
- prof_tdata = PROF_TCACHE_GET();
- if (prof_tdata != NULL) {
- prof_thr_cnt_t *cnt;
+ /*
+ * Delete the hash table. All of its contents can still be iterated
+ * over via the LRU.
+ */
+ ckh_delete(&prof_tdata->bt2cnt);
- /*
- * Delete the hash table. All of its contents can still be
- * iterated over via the LRU.
- */
- ckh_delete(&prof_tdata->bt2cnt);
-
- /*
- * Iteratively merge cnt's into the global stats and delete
- * them.
- */
- while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
- prof_ctx_merge(cnt->ctx, cnt);
- ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
- idalloc(cnt);
- }
-
- idalloc(prof_tdata->vec);
-
- idalloc(prof_tdata);
- PROF_TCACHE_SET(NULL);
+ /* Iteratively merge cnt's into the global stats and delete them. */
+ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+ ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+ prof_ctx_merge(cnt->ctx, cnt);
+ idalloc(cnt);
}
+
+ idalloc(prof_tdata->vec);
+
+ idalloc(prof_tdata);
+ PROF_TCACHE_SET(NULL);
}
void