Merge branch 'dev'
This commit is contained in:
commit
0a36622dd1
@ -6,6 +6,15 @@ found in the git revision history:
|
|||||||
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
|
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
|
||||||
git://canonware.com/jemalloc.git
|
git://canonware.com/jemalloc.git
|
||||||
|
|
||||||
|
* 2.0.1
|
||||||
|
|
||||||
|
Bug fixes:
|
||||||
|
- Fix a race condition in heap profiling that could cause undefined behavior
|
||||||
|
if opt.prof_accum were disabled.
|
||||||
|
- Add missing mutex unlocks for some OOM error paths in the heap profiling
|
||||||
|
code.
|
||||||
|
- Fix a compilation error for non-C99 builds.
|
||||||
|
|
||||||
* 2.0.0
|
* 2.0.0
|
||||||
|
|
||||||
This version focuses on the experimental *allocm() API, and on improved
|
This version focuses on the experimental *allocm() API, and on improved
|
||||||
@ -15,24 +24,23 @@ found in the git revision history:
|
|||||||
New features:
|
New features:
|
||||||
- Implement the experimental {,r,s,d}allocm() API, which provides a superset
|
- Implement the experimental {,r,s,d}allocm() API, which provides a superset
|
||||||
of the functionality available via malloc(), calloc(), posix_memalign(),
|
of the functionality available via malloc(), calloc(), posix_memalign(),
|
||||||
realloc(), malloc_usable_size(), and free(). These functions can be used
|
realloc(), malloc_usable_size(), and free(). These functions can be used to
|
||||||
to allocate/reallocate aligned zeroed memory, ask for optional extra
|
allocate/reallocate aligned zeroed memory, ask for optional extra memory
|
||||||
memory during reallocation, prevent object movement during reallocation,
|
during reallocation, prevent object movement during reallocation, etc.
|
||||||
etc.
|
|
||||||
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
|
- Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
|
||||||
more human-readable, and more flexible. For example:
|
more human-readable, and more flexible. For example:
|
||||||
JEMALLOC_OPTIONS=AJP
|
JEMALLOC_OPTIONS=AJP
|
||||||
is now:
|
is now:
|
||||||
MALLOC_CONF=abort:true,fill:true,stats_print:true
|
MALLOC_CONF=abort:true,fill:true,stats_print:true
|
||||||
- Port to Apple OS X. Sponsored by Mozilla.
|
- Port to Apple OS X. Sponsored by Mozilla.
|
||||||
- Make it possible for the application to control thread-->arena mappings
|
- Make it possible for the application to control thread-->arena mappings via
|
||||||
via the "thread.arena" mallctl.
|
the "thread.arena" mallctl.
|
||||||
- Add compile-time support for all TLS-related functionality via pthreads
|
- Add compile-time support for all TLS-related functionality via pthreads TSD.
|
||||||
TSD. This is mainly of interest for OS X, which does not support TLS, but
|
This is mainly of interest for OS X, which does not support TLS, but has a
|
||||||
has a TSD implementation with similar performance.
|
TSD implementation with similar performance.
|
||||||
- Override memalign() and valloc() if they are provided by the system.
|
- Override memalign() and valloc() if they are provided by the system.
|
||||||
- Add the "arenas.purge" mallctl, which can be used to synchronously purge
|
- Add the "arenas.purge" mallctl, which can be used to synchronously purge all
|
||||||
all dirty unused pages.
|
dirty unused pages.
|
||||||
- Make cumulative heap profiling data optional, so that it is possible to
|
- Make cumulative heap profiling data optional, so that it is possible to
|
||||||
limit the amount of memory consumed by heap profiling data structures.
|
limit the amount of memory consumed by heap profiling data structures.
|
||||||
- Add per thread allocation counters that can be accessed via the
|
- Add per thread allocation counters that can be accessed via the
|
||||||
|
@ -304,8 +304,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
|
|||||||
static inline void
|
static inline void
|
||||||
arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
|
arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
|
||||||
{
|
{
|
||||||
|
size_t i;
|
||||||
size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
|
size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
|
||||||
for (size_t i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
|
|
||||||
|
for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
|
||||||
assert(p[i] == 0);
|
assert(p[i] == 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -255,6 +255,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
|||||||
} else \
|
} else \
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
assert(nignore <= 3);
|
||||||
assert(max <= (1U << opt_lg_prof_bt_max));
|
assert(max <= (1U << opt_lg_prof_bt_max));
|
||||||
|
|
||||||
BT_FRAME(0)
|
BT_FRAME(0)
|
||||||
@ -398,7 +399,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
|||||||
BT_FRAME(126)
|
BT_FRAME(126)
|
||||||
BT_FRAME(127)
|
BT_FRAME(127)
|
||||||
|
|
||||||
/* Extras to compensate for NIGNORE. */
|
/* Extras to compensate for nignore. */
|
||||||
BT_FRAME(128)
|
BT_FRAME(128)
|
||||||
BT_FRAME(129)
|
BT_FRAME(129)
|
||||||
BT_FRAME(130)
|
BT_FRAME(130)
|
||||||
@ -496,8 +497,10 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
opt_lg_prof_tcmax));
|
opt_lg_prof_tcmax));
|
||||||
/* Allocate and partially initialize a new cnt. */
|
/* Allocate and partially initialize a new cnt. */
|
||||||
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
||||||
if (ret.p == NULL)
|
if (ret.p == NULL) {
|
||||||
|
malloc_mutex_unlock(&ctx.p->lock);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
}
|
||||||
ql_elm_new(ret.p, cnts_link);
|
ql_elm_new(ret.p, cnts_link);
|
||||||
ql_elm_new(ret.p, lru_link);
|
ql_elm_new(ret.p, lru_link);
|
||||||
}
|
}
|
||||||
@ -506,6 +509,7 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
ret.p->epoch = 0;
|
ret.p->epoch = 0;
|
||||||
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
||||||
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
|
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
|
||||||
|
malloc_mutex_unlock(&ctx.p->lock);
|
||||||
idalloc(ret.v);
|
idalloc(ret.v);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
@ -625,11 +629,14 @@ prof_ctx_destroy(prof_ctx_t *ctx)
|
|||||||
/*
|
/*
|
||||||
* Check that ctx is still unused by any thread cache before destroying
|
* Check that ctx is still unused by any thread cache before destroying
|
||||||
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
|
* it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to
|
||||||
* avoid a race condition with this function.
|
* avoid a race condition with this function, and prof_ctx_merge()
|
||||||
|
* artificially raises ctx->cnt_merged.curobjs in order to avoid a race
|
||||||
|
* between the main body of prof_ctx_merge() and entry into this
|
||||||
|
* function.
|
||||||
*/
|
*/
|
||||||
prof_enter();
|
prof_enter();
|
||||||
malloc_mutex_lock(&ctx->lock);
|
malloc_mutex_lock(&ctx->lock);
|
||||||
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0) {
|
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
|
||||||
assert(ctx->cnt_merged.curbytes == 0);
|
assert(ctx->cnt_merged.curbytes == 0);
|
||||||
assert(ctx->cnt_merged.accumobjs == 0);
|
assert(ctx->cnt_merged.accumobjs == 0);
|
||||||
assert(ctx->cnt_merged.accumbytes == 0);
|
assert(ctx->cnt_merged.accumbytes == 0);
|
||||||
@ -642,6 +649,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
|
|||||||
malloc_mutex_destroy(&ctx->lock);
|
malloc_mutex_destroy(&ctx->lock);
|
||||||
idalloc(ctx);
|
idalloc(ctx);
|
||||||
} else {
|
} else {
|
||||||
|
/* Compensate for increment in prof_ctx_merge(). */
|
||||||
|
ctx->cnt_merged.curobjs--;
|
||||||
malloc_mutex_unlock(&ctx->lock);
|
malloc_mutex_unlock(&ctx->lock);
|
||||||
prof_leave();
|
prof_leave();
|
||||||
}
|
}
|
||||||
@ -660,9 +669,23 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
|
|||||||
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
|
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
|
||||||
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
|
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
|
||||||
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
|
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
|
||||||
ctx->cnt_merged.curobjs == 0)
|
ctx->cnt_merged.curobjs == 0) {
|
||||||
|
/*
|
||||||
|
* Artificially raise ctx->cnt_merged.curobjs in order to keep
|
||||||
|
* another thread from winning the race to destroy ctx while
|
||||||
|
* this one has ctx->lock dropped. Without this, it would be
|
||||||
|
* possible for another thread to:
|
||||||
|
*
|
||||||
|
* 1) Sample an allocation associated with ctx.
|
||||||
|
* 2) Deallocate the sampled object.
|
||||||
|
* 3) Successfully prof_ctx_destroy(ctx).
|
||||||
|
*
|
||||||
|
* The result would be that ctx no longer exists by the time
|
||||||
|
* this thread accesses it in prof_ctx_destroy().
|
||||||
|
*/
|
||||||
|
ctx->cnt_merged.curobjs++;
|
||||||
destroy = true;
|
destroy = true;
|
||||||
else
|
} else
|
||||||
destroy = false;
|
destroy = false;
|
||||||
malloc_mutex_unlock(&ctx->lock);
|
malloc_mutex_unlock(&ctx->lock);
|
||||||
if (destroy)
|
if (destroy)
|
||||||
|
Loading…
Reference in New Issue
Block a user