Merge branch 'dev'

This commit is contained in:
Jason Evans 2010-10-29 20:21:45 -07:00
commit 0a36622dd1
3 changed files with 67 additions and 34 deletions

View File

@ -6,6 +6,15 @@ found in the git revision history:
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
git://canonware.com/jemalloc.git
* 2.0.1
Bug fixes:
- Fix a race condition in heap profiling that could cause undefined behavior
if opt.prof_accum were disabled.
- Add missing mutex unlocks for some OOM error paths in the heap profiling
code.
- Fix a compilation error for non-C99 builds.
* 2.0.0
This version focuses on the experimental *allocm() API, and on improved
@ -13,35 +22,34 @@ found in the git revision history:
improvements are also included.
New features:
- Implement the experimental {,r,s,d}allocm() API, which provides a superset
of the functionality available via malloc(), calloc(), posix_memalign(),
realloc(), malloc_usable_size(), and free(). These functions can be used
to allocate/reallocate aligned zeroed memory, ask for optional extra
memory during reallocation, prevent object movement during reallocation,
etc.
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
more human-readable, and more flexible. For example:
JEMALLOC_OPTIONS=AJP
is now:
MALLOC_CONF=abort:true,fill:true,stats_print:true
- Port to Apple OS X. Sponsored by Mozilla.
- Make it possible for the application to control thread-->arena mappings
via the "thread.arena" mallctl.
- Add compile-time support for all TLS-related functionality via pthreads
TSD. This is mainly of interest for OS X, which does not support TLS, but
has a TSD implementation with similar performance.
- Override memalign() and valloc() if they are provided by the system.
- Add the "arenas.purge" mallctl, which can be used to synchronously purge
all dirty unused pages.
- Make cumulative heap profiling data optional, so that it is possible to
limit the amount of memory consumed by heap profiling data structures.
- Add per thread allocation counters that can be accessed via the
"thread.allocated" and "thread.deallocated" mallctls.
- Implement the experimental {,r,s,d}allocm() API, which provides a superset
of the functionality available via malloc(), calloc(), posix_memalign(),
realloc(), malloc_usable_size(), and free(). These functions can be used to
allocate/reallocate aligned zeroed memory, ask for optional extra memory
during reallocation, prevent object movement during reallocation, etc.
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
more human-readable, and more flexible. For example:
JEMALLOC_OPTIONS=AJP
is now:
MALLOC_CONF=abort:true,fill:true,stats_print:true
- Port to Apple OS X. Sponsored by Mozilla.
- Make it possible for the application to control thread-->arena mappings via
the "thread.arena" mallctl.
- Add compile-time support for all TLS-related functionality via pthreads TSD.
This is mainly of interest for OS X, which does not support TLS, but has a
TSD implementation with similar performance.
- Override memalign() and valloc() if they are provided by the system.
- Add the "arenas.purge" mallctl, which can be used to synchronously purge all
dirty unused pages.
- Make cumulative heap profiling data optional, so that it is possible to
limit the amount of memory consumed by heap profiling data structures.
- Add per thread allocation counters that can be accessed via the
"thread.allocated" and "thread.deallocated" mallctls.
Incompatible changes:
- Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
- Increase default backtrace depth from 4 to 128 for heap profiling.
- Disable interval-based profile dumps by default.
- Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
- Increase default backtrace depth from 4 to 128 for heap profiling.
- Disable interval-based profile dumps by default.
Bug fixes:
- Remove bad assertions in fork handler functions. These assertions could

View File

@ -304,8 +304,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
static inline void
arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
{
size_t i;
size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
assert(p[i] == 0);
}
#endif

View File

@ -255,6 +255,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
} else \
return;
assert(nignore <= 3);
assert(max <= (1U << opt_lg_prof_bt_max));
BT_FRAME(0)
@ -398,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
BT_FRAME(126)
BT_FRAME(127)
/* Extras to compensate for NIGNORE. */
/* Extras to compensate for nignore. */
BT_FRAME(128)
BT_FRAME(129)
BT_FRAME(130)
@ -496,8 +497,10 @@ prof_lookup(prof_bt_t *bt)
opt_lg_prof_tcmax));
/* Allocate and partially initialize a new cnt. */
ret.v = imalloc(sizeof(prof_thr_cnt_t));
if (ret.p == NULL)
if (ret.p == NULL) {
malloc_mutex_unlock(&ctx.p->lock);
return (NULL);
}
ql_elm_new(ret.p, cnts_link);
ql_elm_new(ret.p, lru_link);
}
@ -506,6 +509,7 @@ prof_lookup(prof_bt_t *bt)
ret.p->epoch = 0;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
malloc_mutex_unlock(&ctx.p->lock);
idalloc(ret.v);
return (NULL);
}
@ -625,11 +629,14 @@ prof_ctx_destroy(prof_ctx_t *ctx)
/*
* Check that ctx is still unused by any thread cache before destroying
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
* avoid a race condition with this function.
* avoid a race condition with this function, and prof_ctx_merge()
* artificially raises ctx->cnt_merged.curobjs in order to avoid a race
* between the main body of prof_ctx_merge() and entry into this
* function.
*/
prof_enter();
malloc_mutex_lock(&ctx->lock);
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) {
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
assert(ctx->cnt_merged.curbytes == 0);
assert(ctx->cnt_merged.accumobjs == 0);
assert(ctx->cnt_merged.accumbytes == 0);
@ -642,6 +649,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
malloc_mutex_destroy(&ctx->lock);
idalloc(ctx);
} else {
/* Compensate for increment in prof_ctx_merge(). */
ctx->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx->lock);
prof_leave();
}
@ -660,9 +669,23 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
ctx->cnt_merged.curobjs == 0)
ctx->cnt_merged.curobjs == 0) {
/*
* Artificially raise ctx->cnt_merged.curobjs in order to keep
* another thread from winning the race to destroy ctx while
* this one has ctx->lock dropped. Without this, it would be
* possible for another thread to:
*
* 1) Sample an allocation associated with ctx.
* 2) Deallocate the sampled object.
* 3) Successfully prof_ctx_destroy(ctx).
*
* The result would be that ctx no longer exists by the time
* this thread accesses it in prof_ctx_destroy().
*/
ctx->cnt_merged.curobjs++;
destroy = true;
else
} else
destroy = false;
malloc_mutex_unlock(&ctx->lock);
if (destroy)