Merge branch 'dev'

This commit is contained in:
Jason Evans 2010-10-29 20:21:45 -07:00
commit 0a36622dd1
3 changed files with 67 additions and 34 deletions

View File

@ -6,6 +6,15 @@ found in the git revision history:
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
git://canonware.com/jemalloc.git git://canonware.com/jemalloc.git
* 2.0.1
Bug fixes:
- Fix a race condition in heap profiling that could cause undefined behavior
if opt.prof_accum were disabled.
- Add missing mutex unlocks for some OOM error paths in the heap profiling
code.
- Fix a compilation error for non-C99 builds.
* 2.0.0 * 2.0.0
This version focuses on the experimental *allocm() API, and on improved This version focuses on the experimental *allocm() API, and on improved
@ -13,35 +22,34 @@ found in the git revision history:
improvements are also included. improvements are also included.
New features: New features:
- Implement the experimental {,r,s,d}allocm() API, which provides a superset - Implement the experimental {,r,s,d}allocm() API, which provides a superset
of the functionality available via malloc(), calloc(), posix_memalign(), of the functionality available via malloc(), calloc(), posix_memalign(),
realloc(), malloc_usable_size(), and free(). These functions can be used realloc(), malloc_usable_size(), and free(). These functions can be used to
to allocate/reallocate aligned zeroed memory, ask for optional extra allocate/reallocate aligned zeroed memory, ask for optional extra memory
memory during reallocation, prevent object movement during reallocation, during reallocation, prevent object movement during reallocation, etc.
etc. - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is more human-readable, and more flexible. For example:
more human-readable, and more flexible. For example: JEMALLOC_OPTIONS=AJP
JEMALLOC_OPTIONS=AJP is now:
is now: MALLOC_CONF=abort:true,fill:true,stats_print:true
MALLOC_CONF=abort:true,fill:true,stats_print:true - Port to Apple OS X. Sponsored by Mozilla.
- Port to Apple OS X. Sponsored by Mozilla. - Make it possible for the application to control thread-->arena mappings via
- Make it possible for the application to control thread-->arena mappings the "thread.arena" mallctl.
via the "thread.arena" mallctl. - Add compile-time support for all TLS-related functionality via pthreads TSD.
- Add compile-time support for all TLS-related functionality via pthreads This is mainly of interest for OS X, which does not support TLS, but has a
TSD. This is mainly of interest for OS X, which does not support TLS, but TSD implementation with similar performance.
has a TSD implementation with similar performance. - Override memalign() and valloc() if they are provided by the system.
- Override memalign() and valloc() if they are provided by the system. - Add the "arenas.purge" mallctl, which can be used to synchronously purge all
- Add the "arenas.purge" mallctl, which can be used to synchronously purge dirty unused pages.
all dirty unused pages. - Make cumulative heap profiling data optional, so that it is possible to
- Make cumulative heap profiling data optional, so that it is possible to limit the amount of memory consumed by heap profiling data structures.
limit the amount of memory consumed by heap profiling data structures. - Add per thread allocation counters that can be accessed via the
- Add per thread allocation counters that can be accessed via the "thread.allocated" and "thread.deallocated" mallctls.
"thread.allocated" and "thread.deallocated" mallctls.
Incompatible changes: Incompatible changes:
- Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
- Increase default backtrace depth from 4 to 128 for heap profiling. - Increase default backtrace depth from 4 to 128 for heap profiling.
- Disable interval-based profile dumps by default. - Disable interval-based profile dumps by default.
Bug fixes: Bug fixes:
- Remove bad assertions in fork handler functions. These assertions could - Remove bad assertions in fork handler functions. These assertions could

View File

@ -304,8 +304,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
static inline void static inline void
arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
{ {
size_t i;
size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
assert(p[i] == 0); assert(p[i] == 0);
} }
#endif #endif

View File

@ -255,6 +255,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
} else \ } else \
return; return;
assert(nignore <= 3);
assert(max <= (1U << opt_lg_prof_bt_max)); assert(max <= (1U << opt_lg_prof_bt_max));
BT_FRAME(0) BT_FRAME(0)
@ -398,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
BT_FRAME(126) BT_FRAME(126)
BT_FRAME(127) BT_FRAME(127)
/* Extras to compensate for NIGNORE. */ /* Extras to compensate for nignore. */
BT_FRAME(128) BT_FRAME(128)
BT_FRAME(129) BT_FRAME(129)
BT_FRAME(130) BT_FRAME(130)
@ -496,8 +497,10 @@ prof_lookup(prof_bt_t *bt)
opt_lg_prof_tcmax)); opt_lg_prof_tcmax));
/* Allocate and partially initialize a new cnt. */ /* Allocate and partially initialize a new cnt. */
ret.v = imalloc(sizeof(prof_thr_cnt_t)); ret.v = imalloc(sizeof(prof_thr_cnt_t));
if (ret.p == NULL) if (ret.p == NULL) {
malloc_mutex_unlock(&ctx.p->lock);
return (NULL); return (NULL);
}
ql_elm_new(ret.p, cnts_link); ql_elm_new(ret.p, cnts_link);
ql_elm_new(ret.p, lru_link); ql_elm_new(ret.p, lru_link);
} }
@ -506,6 +509,7 @@ prof_lookup(prof_bt_t *bt)
ret.p->epoch = 0; ret.p->epoch = 0;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
malloc_mutex_unlock(&ctx.p->lock);
idalloc(ret.v); idalloc(ret.v);
return (NULL); return (NULL);
} }
@ -625,11 +629,14 @@ prof_ctx_destroy(prof_ctx_t *ctx)
/* /*
* Check that ctx is still unused by any thread cache before destroying * Check that ctx is still unused by any thread cache before destroying
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
* avoid a race condition with this function. * avoid a race condition with this function, and prof_ctx_merge()
* artificially raises ctx->cnt_merged.curobjs in order to avoid a race
* between the main body of prof_ctx_merge() and entry into this
* function.
*/ */
prof_enter(); prof_enter();
malloc_mutex_lock(&ctx->lock); malloc_mutex_lock(&ctx->lock);
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) { if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.curbytes == 0);
assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumobjs == 0);
assert(ctx->cnt_merged.accumbytes == 0); assert(ctx->cnt_merged.accumbytes == 0);
@ -642,6 +649,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
malloc_mutex_destroy(&ctx->lock); malloc_mutex_destroy(&ctx->lock);
idalloc(ctx); idalloc(ctx);
} else { } else {
/* Compensate for increment in prof_ctx_merge(). */
ctx->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(&ctx->lock);
prof_leave(); prof_leave();
} }
@ -660,9 +669,23 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
ql_remove(&ctx->cnts_ql, cnt, cnts_link); ql_remove(&ctx->cnts_ql, cnt, cnts_link);
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
ctx->cnt_merged.curobjs == 0) ctx->cnt_merged.curobjs == 0) {
/*
* Artificially raise ctx->cnt_merged.curobjs in order to keep
* another thread from winning the race to destroy ctx while
* this one has ctx->lock dropped. Without this, it would be
* possible for another thread to:
*
* 1) Sample an allocation associated with ctx.
* 2) Deallocate the sampled object.
* 3) Successfully prof_ctx_destroy(ctx).
*
* The result would be that ctx no longer exists by the time
* this thread accesses it in prof_ctx_destroy().
*/
ctx->cnt_merged.curobjs++;
destroy = true; destroy = true;
else } else
destroy = false; destroy = false;
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(&ctx->lock);
if (destroy) if (destroy)