Merge branch 'dev'
This commit is contained in:
commit
0a36622dd1
@ -6,6 +6,15 @@ found in the git revision history:
|
||||
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
|
||||
git://canonware.com/jemalloc.git
|
||||
|
||||
* 2.0.1
|
||||
|
||||
Bug fixes:
|
||||
- Fix a race condition in heap profiling that could cause undefined behavior
|
||||
if opt.prof_accum were disabled.
|
||||
- Add missing mutex unlocks for some OOM error paths in the heap profiling
|
||||
code.
|
||||
- Fix a compilation error for non-C99 builds.
|
||||
|
||||
* 2.0.0
|
||||
|
||||
This version focuses on the experimental *allocm() API, and on improved
|
||||
@ -15,24 +24,23 @@ found in the git revision history:
|
||||
New features:
|
||||
- Implement the experimental {,r,s,d}allocm() API, which provides a superset
|
||||
of the functionality available via malloc(), calloc(), posix_memalign(),
|
||||
realloc(), malloc_usable_size(), and free(). These functions can be used
|
||||
to allocate/reallocate aligned zeroed memory, ask for optional extra
|
||||
memory during reallocation, prevent object movement during reallocation,
|
||||
etc.
|
||||
realloc(), malloc_usable_size(), and free(). These functions can be used to
|
||||
allocate/reallocate aligned zeroed memory, ask for optional extra memory
|
||||
during reallocation, prevent object movement during reallocation, etc.
|
||||
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
|
||||
more human-readable, and more flexible. For example:
|
||||
JEMALLOC_OPTIONS=AJP
|
||||
is now:
|
||||
MALLOC_CONF=abort:true,fill:true,stats_print:true
|
||||
- Port to Apple OS X. Sponsored by Mozilla.
|
||||
- Make it possible for the application to control thread-->arena mappings
|
||||
via the "thread.arena" mallctl.
|
||||
- Add compile-time support for all TLS-related functionality via pthreads
|
||||
TSD. This is mainly of interest for OS X, which does not support TLS, but
|
||||
has a TSD implementation with similar performance.
|
||||
- Make it possible for the application to control thread-->arena mappings via
|
||||
the "thread.arena" mallctl.
|
||||
- Add compile-time support for all TLS-related functionality via pthreads TSD.
|
||||
This is mainly of interest for OS X, which does not support TLS, but has a
|
||||
TSD implementation with similar performance.
|
||||
- Override memalign() and valloc() if they are provided by the system.
|
||||
- Add the "arenas.purge" mallctl, which can be used to synchronously purge
|
||||
all dirty unused pages.
|
||||
- Add the "arenas.purge" mallctl, which can be used to synchronously purge all
|
||||
dirty unused pages.
|
||||
- Make cumulative heap profiling data optional, so that it is possible to
|
||||
limit the amount of memory consumed by heap profiling data structures.
|
||||
- Add per thread allocation counters that can be accessed via the
|
||||
|
@ -304,8 +304,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
|
||||
static inline void
|
||||
arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
|
||||
{
|
||||
size_t i;
|
||||
size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
|
||||
for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
|
||||
|
||||
for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
|
||||
assert(p[i] == 0);
|
||||
}
|
||||
#endif
|
||||
|
@ -255,6 +255,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
||||
} else \
|
||||
return;
|
||||
|
||||
assert(nignore <= 3);
|
||||
assert(max <= (1U << opt_lg_prof_bt_max));
|
||||
|
||||
BT_FRAME(0)
|
||||
@ -398,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
||||
BT_FRAME(126)
|
||||
BT_FRAME(127)
|
||||
|
||||
/* Extras to compensate for NIGNORE. */
|
||||
/* Extras to compensate for nignore. */
|
||||
BT_FRAME(128)
|
||||
BT_FRAME(129)
|
||||
BT_FRAME(130)
|
||||
@ -496,8 +497,10 @@ prof_lookup(prof_bt_t *bt)
|
||||
opt_lg_prof_tcmax));
|
||||
/* Allocate and partially initialize a new cnt. */
|
||||
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
||||
if (ret.p == NULL)
|
||||
if (ret.p == NULL) {
|
||||
malloc_mutex_unlock(&ctx.p->lock);
|
||||
return (NULL);
|
||||
}
|
||||
ql_elm_new(ret.p, cnts_link);
|
||||
ql_elm_new(ret.p, lru_link);
|
||||
}
|
||||
@ -506,6 +509,7 @@ prof_lookup(prof_bt_t *bt)
|
||||
ret.p->epoch = 0;
|
||||
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
||||
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
|
||||
malloc_mutex_unlock(&ctx.p->lock);
|
||||
idalloc(ret.v);
|
||||
return (NULL);
|
||||
}
|
||||
@ -625,11 +629,14 @@ prof_ctx_destroy(prof_ctx_t *ctx)
|
||||
/*
|
||||
* Check that ctx is still unused by any thread cache before destroying
|
||||
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
|
||||
* avoid a race condition with this function.
|
||||
* avoid a race condition with this function, and prof_ctx_merge()
|
||||
* artificially raises ctx->cnt_merged.curobjs in order to avoid a race
|
||||
* between the main body of prof_ctx_merge() and entry into this
|
||||
* function.
|
||||
*/
|
||||
prof_enter();
|
||||
malloc_mutex_lock(&ctx->lock);
|
||||
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) {
|
||||
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
|
||||
assert(ctx->cnt_merged.curbytes == 0);
|
||||
assert(ctx->cnt_merged.accumobjs == 0);
|
||||
assert(ctx->cnt_merged.accumbytes == 0);
|
||||
@ -642,6 +649,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
|
||||
malloc_mutex_destroy(&ctx->lock);
|
||||
idalloc(ctx);
|
||||
} else {
|
||||
/* Compensate for increment in prof_ctx_merge(). */
|
||||
ctx->cnt_merged.curobjs--;
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
prof_leave();
|
||||
}
|
||||
@ -660,9 +669,23 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
|
||||
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
|
||||
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
|
||||
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
|
||||
ctx->cnt_merged.curobjs == 0)
|
||||
ctx->cnt_merged.curobjs == 0) {
|
||||
/*
|
||||
* Artificially raise ctx->cnt_merged.curobjs in order to keep
|
||||
* another thread from winning the race to destroy ctx while
|
||||
* this one has ctx->lock dropped. Without this, it would be
|
||||
* possible for another thread to:
|
||||
*
|
||||
* 1) Sample an allocation associated with ctx.
|
||||
* 2) Deallocate the sampled object.
|
||||
* 3) Successfully prof_ctx_destroy(ctx).
|
||||
*
|
||||
* The result would be that ctx no longer exists by the time
|
||||
* this thread accesses it in prof_ctx_destroy().
|
||||
*/
|
||||
ctx->cnt_merged.curobjs++;
|
||||
destroy = true;
|
||||
else
|
||||
} else
|
||||
destroy = false;
|
||||
malloc_mutex_unlock(&ctx->lock);
|
||||
if (destroy)
|
||||
|
Loading…
Reference in New Issue
Block a user