Merge pull request #73 from bmaurer/smallmalloc
Smaller malloc hot path
This commit is contained in:
commit
3e3caf03af
@ -385,6 +385,7 @@ extern ssize_t opt_lg_dirty_mult;
|
|||||||
* and all accesses are via the SMALL_SIZE2BIN macro.
|
* and all accesses are via the SMALL_SIZE2BIN macro.
|
||||||
*/
|
*/
|
||||||
extern uint8_t const small_size2bin[];
|
extern uint8_t const small_size2bin[];
|
||||||
|
extern uint32_t const small_bin2size[];
|
||||||
#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
|
#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
|
||||||
|
|
||||||
extern arena_bin_info_t arena_bin_info[NBINS];
|
extern arena_bin_info_t arena_bin_info[NBINS];
|
||||||
@ -964,7 +965,7 @@ arena_salloc(const void *ptr, bool demote)
|
|||||||
assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
|
assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
|
||||||
arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
|
arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
|
||||||
pageind)) == binind);
|
pageind)) == binind);
|
||||||
ret = arena_bin_info[binind].reg_size;
|
ret = small_bin2size[binind];
|
||||||
}
|
}
|
||||||
|
|
||||||
return (ret);
|
return (ret);
|
||||||
|
@ -526,7 +526,7 @@ s2u(size_t size)
|
|||||||
{
|
{
|
||||||
|
|
||||||
if (size <= SMALL_MAXCLASS)
|
if (size <= SMALL_MAXCLASS)
|
||||||
return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
|
return (small_bin2size[SMALL_SIZE2BIN(size)]);
|
||||||
if (size <= arena_maxclass)
|
if (size <= arena_maxclass)
|
||||||
return (PAGE_CEILING(size));
|
return (PAGE_CEILING(size));
|
||||||
return (CHUNK_CEILING(size));
|
return (CHUNK_CEILING(size));
|
||||||
@ -569,7 +569,7 @@ sa2u(size_t size, size_t alignment)
|
|||||||
|
|
||||||
if (usize <= arena_maxclass && alignment <= PAGE) {
|
if (usize <= arena_maxclass && alignment <= PAGE) {
|
||||||
if (usize <= SMALL_MAXCLASS)
|
if (usize <= SMALL_MAXCLASS)
|
||||||
return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
|
return (small_bin2size[SMALL_SIZE2BIN(usize)]);
|
||||||
return (PAGE_CEILING(usize));
|
return (PAGE_CEILING(usize));
|
||||||
} else {
|
} else {
|
||||||
size_t run_size;
|
size_t run_size;
|
||||||
|
@ -298,6 +298,7 @@ prof_idump
|
|||||||
prof_interval
|
prof_interval
|
||||||
prof_lookup
|
prof_lookup
|
||||||
prof_malloc
|
prof_malloc
|
||||||
|
prof_malloc_record_object
|
||||||
prof_mdump
|
prof_mdump
|
||||||
prof_postfork_child
|
prof_postfork_child
|
||||||
prof_postfork_parent
|
prof_postfork_parent
|
||||||
@ -344,6 +345,7 @@ rtree_set
|
|||||||
s2u
|
s2u
|
||||||
sa2u
|
sa2u
|
||||||
set_errno
|
set_errno
|
||||||
|
small_bin2size
|
||||||
small_size2bin
|
small_size2bin
|
||||||
stats_cactive
|
stats_cactive
|
||||||
stats_cactive_add
|
stats_cactive_add
|
||||||
@ -383,6 +385,7 @@ tcache_event
|
|||||||
tcache_event_hard
|
tcache_event_hard
|
||||||
tcache_flush
|
tcache_flush
|
||||||
tcache_get
|
tcache_get
|
||||||
|
tcache_get_hard
|
||||||
tcache_initialized
|
tcache_initialized
|
||||||
tcache_maxclass
|
tcache_maxclass
|
||||||
tcache_salloc
|
tcache_salloc
|
||||||
|
@ -177,8 +177,7 @@ struct prof_tdata_s {
|
|||||||
|
|
||||||
/* Sampling state. */
|
/* Sampling state. */
|
||||||
uint64_t prng_state;
|
uint64_t prng_state;
|
||||||
uint64_t threshold;
|
uint64_t bytes_until_sample;
|
||||||
uint64_t accum;
|
|
||||||
|
|
||||||
/* State used to avoid dumping while operating on prof internals. */
|
/* State used to avoid dumping while operating on prof internals. */
|
||||||
bool enq;
|
bool enq;
|
||||||
@ -239,6 +238,7 @@ bool prof_boot2(void);
|
|||||||
void prof_prefork(void);
|
void prof_prefork(void);
|
||||||
void prof_postfork_parent(void);
|
void prof_postfork_parent(void);
|
||||||
void prof_postfork_child(void);
|
void prof_postfork_child(void);
|
||||||
|
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
|
||||||
|
|
||||||
#endif /* JEMALLOC_H_EXTERNS */
|
#endif /* JEMALLOC_H_EXTERNS */
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -250,49 +250,13 @@ void prof_postfork_child(void);
|
|||||||
\
|
\
|
||||||
assert(size == s2u(size)); \
|
assert(size == s2u(size)); \
|
||||||
\
|
\
|
||||||
prof_tdata = prof_tdata_get(true); \
|
if (!opt_prof_active || \
|
||||||
if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
|
prof_sample_accum_update(size, false, &prof_tdata)) { \
|
||||||
if (prof_tdata != NULL) \
|
|
||||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
|
|
||||||
else \
|
|
||||||
ret = NULL; \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
if (opt_prof_active == false) { \
|
|
||||||
/* Sampling is currently inactive, so avoid sampling. */\
|
|
||||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
|
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
|
||||||
} else if (opt_lg_prof_sample == 0) { \
|
} else { \
|
||||||
/* Don't bother with sampling logic, since sampling */\
|
|
||||||
/* interval is 1. */\
|
|
||||||
bt_init(&bt, prof_tdata->vec); \
|
bt_init(&bt, prof_tdata->vec); \
|
||||||
prof_backtrace(&bt, nignore); \
|
prof_backtrace(&bt, nignore); \
|
||||||
ret = prof_lookup(&bt); \
|
ret = prof_lookup(&bt); \
|
||||||
} else { \
|
|
||||||
if (prof_tdata->threshold == 0) { \
|
|
||||||
/* Initialize. Seed the prng differently for */\
|
|
||||||
/* each thread. */\
|
|
||||||
prof_tdata->prng_state = \
|
|
||||||
(uint64_t)(uintptr_t)&size; \
|
|
||||||
prof_sample_threshold_update(prof_tdata); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* Determine whether to capture a backtrace based on */\
|
|
||||||
/* whether size is enough for prof_accum to reach */\
|
|
||||||
/* prof_tdata->threshold. However, delay updating */\
|
|
||||||
/* these variables until prof_{m,re}alloc(), because */\
|
|
||||||
/* we don't know for sure that the allocation will */\
|
|
||||||
/* succeed. */\
|
|
||||||
/* */\
|
|
||||||
/* Use subtraction rather than addition to avoid */\
|
|
||||||
/* potential integer overflow. */\
|
|
||||||
if (size >= prof_tdata->threshold - \
|
|
||||||
prof_tdata->accum) { \
|
|
||||||
bt_init(&bt, prof_tdata->vec); \
|
|
||||||
prof_backtrace(&bt, nignore); \
|
|
||||||
ret = prof_lookup(&bt); \
|
|
||||||
} else \
|
|
||||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
|
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@ -300,10 +264,13 @@ void prof_postfork_child(void);
|
|||||||
malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
|
malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
|
||||||
|
|
||||||
prof_tdata_t *prof_tdata_get(bool create);
|
prof_tdata_t *prof_tdata_get(bool create);
|
||||||
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
|
void prof_sample_accum_update(size_t size, bool commit,
|
||||||
|
prof_tdata_t **prof_tdata_out);
|
||||||
prof_ctx_t *prof_ctx_get(const void *ptr);
|
prof_ctx_t *prof_ctx_get(const void *ptr);
|
||||||
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
||||||
bool prof_sample_accum_update(size_t size);
|
bool prof_sample_accum_update(size_t size);
|
||||||
|
void prof_malloc_record_object(const void *ptr, size_t usize,
|
||||||
|
prof_thr_cnt_t *cnt)
|
||||||
void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
|
void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
|
||||||
void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
|
void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
|
||||||
size_t old_usize, prof_ctx_t *old_ctx);
|
size_t old_usize, prof_ctx_t *old_ctx);
|
||||||
@ -330,55 +297,6 @@ prof_tdata_get(bool create)
|
|||||||
return (prof_tdata);
|
return (prof_tdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE void
|
|
||||||
prof_sample_threshold_update(prof_tdata_t *prof_tdata)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* The body of this function is compiled out unless heap profiling is
|
|
||||||
* enabled, so that it is possible to compile jemalloc with floating
|
|
||||||
* point support completely disabled. Avoiding floating point code is
|
|
||||||
* important on memory-constrained systems, but it also enables a
|
|
||||||
* workaround for versions of glibc that don't properly save/restore
|
|
||||||
* floating point registers during dynamic lazy symbol loading (which
|
|
||||||
* internally calls into whatever malloc implementation happens to be
|
|
||||||
* integrated into the application). Note that some compilers (e.g.
|
|
||||||
* gcc 4.8) may use floating point registers for fast memory moves, so
|
|
||||||
* jemalloc must be compiled with such optimizations disabled (e.g.
|
|
||||||
* -mno-sse) in order for the workaround to be complete.
|
|
||||||
*/
|
|
||||||
#ifdef JEMALLOC_PROF
|
|
||||||
uint64_t r;
|
|
||||||
double u;
|
|
||||||
|
|
||||||
cassert(config_prof);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute sample threshold as a geometrically distributed random
|
|
||||||
* variable with mean (2^opt_lg_prof_sample).
|
|
||||||
*
|
|
||||||
* __ __
|
|
||||||
* | log(u) | 1
|
|
||||||
* prof_tdata->threshold = | -------- |, where p = -------------------
|
|
||||||
* | log(1-p) | opt_lg_prof_sample
|
|
||||||
* 2
|
|
||||||
*
|
|
||||||
* For more information on the math, see:
|
|
||||||
*
|
|
||||||
* Non-Uniform Random Variate Generation
|
|
||||||
* Luc Devroye
|
|
||||||
* Springer-Verlag, New York, 1986
|
|
||||||
* pp 500
|
|
||||||
* (http://luc.devroye.org/rnbookindex.html)
|
|
||||||
*/
|
|
||||||
prng64(r, 53, prof_tdata->prng_state,
|
|
||||||
UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
|
|
||||||
u = (double)r * (1.0/9007199254740992.0L);
|
|
||||||
prof_tdata->threshold = (uint64_t)(log(u) /
|
|
||||||
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
|
|
||||||
+ (uint64_t)1U;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_INLINE prof_ctx_t *
|
JEMALLOC_INLINE prof_ctx_t *
|
||||||
prof_ctx_get(const void *ptr)
|
prof_ctx_get(const void *ptr)
|
||||||
{
|
{
|
||||||
@ -415,34 +333,58 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE bool
|
JEMALLOC_INLINE bool
|
||||||
prof_sample_accum_update(size_t size)
|
prof_sample_accum_update(size_t size, bool commit,
|
||||||
|
prof_tdata_t **prof_tdata_out)
|
||||||
{
|
{
|
||||||
prof_tdata_t *prof_tdata;
|
prof_tdata_t *prof_tdata;
|
||||||
|
|
||||||
cassert(config_prof);
|
cassert(config_prof);
|
||||||
/* Sampling logic is unnecessary if the interval is 1. */
|
|
||||||
assert(opt_lg_prof_sample != 0);
|
|
||||||
|
|
||||||
prof_tdata = prof_tdata_get(false);
|
prof_tdata = prof_tdata_get(true);
|
||||||
if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
|
if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
|
||||||
|
prof_tdata = NULL;
|
||||||
|
|
||||||
|
if (prof_tdata_out != NULL)
|
||||||
|
*prof_tdata_out = prof_tdata;
|
||||||
|
|
||||||
|
if (prof_tdata == NULL)
|
||||||
return (true);
|
return (true);
|
||||||
|
|
||||||
/* Take care to avoid integer overflow. */
|
if (prof_tdata->bytes_until_sample >= size) {
|
||||||
if (size >= prof_tdata->threshold - prof_tdata->accum) {
|
if (commit)
|
||||||
prof_tdata->accum -= (prof_tdata->threshold - size);
|
prof_tdata->bytes_until_sample -= size;
|
||||||
/* Compute new sample threshold. */
|
|
||||||
prof_sample_threshold_update(prof_tdata);
|
|
||||||
while (prof_tdata->accum >= prof_tdata->threshold) {
|
|
||||||
prof_tdata->accum -= prof_tdata->threshold;
|
|
||||||
prof_sample_threshold_update(prof_tdata);
|
|
||||||
}
|
|
||||||
return (false);
|
|
||||||
} else {
|
|
||||||
prof_tdata->accum += size;
|
|
||||||
return (true);
|
return (true);
|
||||||
|
} else {
|
||||||
|
/* Compute new sample threshold. */
|
||||||
|
if (commit)
|
||||||
|
prof_sample_threshold_update(prof_tdata);
|
||||||
|
return (false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) {
|
||||||
|
prof_ctx_set(ptr, cnt->ctx);
|
||||||
|
|
||||||
|
cnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
cnt->cnts.curobjs++;
|
||||||
|
cnt->cnts.curbytes += usize;
|
||||||
|
if (opt_prof_accum) {
|
||||||
|
cnt->cnts.accumobjs++;
|
||||||
|
cnt->cnts.accumbytes += usize;
|
||||||
|
}
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
cnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE void
|
JEMALLOC_INLINE void
|
||||||
prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
|
prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
|
||||||
{
|
{
|
||||||
@ -451,40 +393,20 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
|
|||||||
assert(ptr != NULL);
|
assert(ptr != NULL);
|
||||||
assert(usize == isalloc(ptr, true));
|
assert(usize == isalloc(ptr, true));
|
||||||
|
|
||||||
if (opt_lg_prof_sample != 0) {
|
if (prof_sample_accum_update(usize, true, NULL)) {
|
||||||
if (prof_sample_accum_update(usize)) {
|
/*
|
||||||
/*
|
* Don't sample. For malloc()-like allocation, it is
|
||||||
* Don't sample. For malloc()-like allocation, it is
|
* always possible to tell in advance how large an
|
||||||
* always possible to tell in advance how large an
|
* object's usable size will be, so there should never
|
||||||
* object's usable size will be, so there should never
|
* be a difference between the usize passed to
|
||||||
* be a difference between the usize passed to
|
* PROF_ALLOC_PREP() and prof_malloc().
|
||||||
* PROF_ALLOC_PREP() and prof_malloc().
|
*/
|
||||||
*/
|
assert((uintptr_t)cnt == (uintptr_t)1U);
|
||||||
assert((uintptr_t)cnt == (uintptr_t)1U);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
if ((uintptr_t)cnt > (uintptr_t)1U)
|
||||||
prof_ctx_set(ptr, cnt->ctx);
|
prof_malloc_record_object(ptr, usize, cnt);
|
||||||
|
else
|
||||||
cnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
cnt->cnts.curobjs++;
|
|
||||||
cnt->cnts.curbytes += usize;
|
|
||||||
if (opt_prof_accum) {
|
|
||||||
cnt->cnts.accumobjs++;
|
|
||||||
cnt->cnts.accumbytes += usize;
|
|
||||||
}
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
cnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
} else
|
|
||||||
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -499,18 +421,16 @@ prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
|
|||||||
|
|
||||||
if (ptr != NULL) {
|
if (ptr != NULL) {
|
||||||
assert(usize == isalloc(ptr, true));
|
assert(usize == isalloc(ptr, true));
|
||||||
if (opt_lg_prof_sample != 0) {
|
if (prof_sample_accum_update(usize, true, NULL)) {
|
||||||
if (prof_sample_accum_update(usize)) {
|
/*
|
||||||
/*
|
* Don't sample. The usize passed to
|
||||||
* Don't sample. The usize passed to
|
* PROF_ALLOC_PREP() was larger than what
|
||||||
* PROF_ALLOC_PREP() was larger than what
|
* actually got allocated, so a backtrace was
|
||||||
* actually got allocated, so a backtrace was
|
* captured for this allocation, even though
|
||||||
* captured for this allocation, even though
|
* its actual usize was insufficient to cross
|
||||||
* its actual usize was insufficient to cross
|
* the sample threshold.
|
||||||
* the sample threshold.
|
*/
|
||||||
*/
|
cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,6 +110,7 @@ void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
|
|||||||
tcache_t *tcache);
|
tcache_t *tcache);
|
||||||
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
|
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
|
||||||
void tcache_arena_dissociate(tcache_t *tcache);
|
void tcache_arena_dissociate(tcache_t *tcache);
|
||||||
|
tcache_t *tcache_get_hard(tcache_t *tcache, bool create);
|
||||||
tcache_t *tcache_create(arena_t *arena);
|
tcache_t *tcache_create(arena_t *arena);
|
||||||
void tcache_destroy(tcache_t *tcache);
|
void tcache_destroy(tcache_t *tcache);
|
||||||
void tcache_thread_cleanup(void *arg);
|
void tcache_thread_cleanup(void *arg);
|
||||||
@ -220,39 +221,7 @@ tcache_get(bool create)
|
|||||||
if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
|
if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
|
||||||
if (tcache == TCACHE_STATE_DISABLED)
|
if (tcache == TCACHE_STATE_DISABLED)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
if (tcache == NULL) {
|
tcache = tcache_get_hard(tcache, create);
|
||||||
if (create == false) {
|
|
||||||
/*
|
|
||||||
* Creating a tcache here would cause
|
|
||||||
* allocation as a side effect of free().
|
|
||||||
* Ordinarily that would be okay since
|
|
||||||
* tcache_create() failure is a soft failure
|
|
||||||
* that doesn't propagate. However, if TLS
|
|
||||||
* data are freed via free() as in glibc,
|
|
||||||
* subtle corruption could result from setting
|
|
||||||
* a TLS variable after its backing memory is
|
|
||||||
* freed.
|
|
||||||
*/
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
if (tcache_enabled_get() == false) {
|
|
||||||
tcache_enabled_set(false); /* Memoize. */
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
return (tcache_create(choose_arena(NULL)));
|
|
||||||
}
|
|
||||||
if (tcache == TCACHE_STATE_PURGATORY) {
|
|
||||||
/*
|
|
||||||
* Make a note that an allocator function was called
|
|
||||||
* after tcache_thread_cleanup() was called.
|
|
||||||
*/
|
|
||||||
tcache = TCACHE_STATE_REINCARNATED;
|
|
||||||
tcache_tsd_set(&tcache);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
if (tcache == TCACHE_STATE_REINCARNATED)
|
|
||||||
return (NULL);
|
|
||||||
not_reached();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (tcache);
|
return (tcache);
|
||||||
@ -297,14 +266,14 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
|
|||||||
binind = SMALL_SIZE2BIN(size);
|
binind = SMALL_SIZE2BIN(size);
|
||||||
assert(binind < NBINS);
|
assert(binind < NBINS);
|
||||||
tbin = &tcache->tbins[binind];
|
tbin = &tcache->tbins[binind];
|
||||||
size = arena_bin_info[binind].reg_size;
|
size = small_bin2size[binind];
|
||||||
ret = tcache_alloc_easy(tbin);
|
ret = tcache_alloc_easy(tbin);
|
||||||
if (ret == NULL) {
|
if (ret == NULL) {
|
||||||
ret = tcache_alloc_small_hard(tcache, tbin, binind);
|
ret = tcache_alloc_small_hard(tcache, tbin, binind);
|
||||||
if (ret == NULL)
|
if (ret == NULL)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
|
assert(tcache_salloc(ret) == size);
|
||||||
|
|
||||||
if (zero == false) {
|
if (zero == false) {
|
||||||
if (config_fill) {
|
if (config_fill) {
|
||||||
@ -325,7 +294,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
|
|||||||
if (config_stats)
|
if (config_stats)
|
||||||
tbin->tstats.nrequests++;
|
tbin->tstats.nrequests++;
|
||||||
if (config_prof)
|
if (config_prof)
|
||||||
tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
|
tcache->prof_accumbytes += size;
|
||||||
tcache_event(tcache);
|
tcache_event(tcache);
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
10
src/arena.c
10
src/arena.c
@ -7,6 +7,14 @@
|
|||||||
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
|
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
|
||||||
arena_bin_info_t arena_bin_info[NBINS];
|
arena_bin_info_t arena_bin_info[NBINS];
|
||||||
|
|
||||||
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
|
const uint32_t small_bin2size[NBINS] = {
|
||||||
|
#define SIZE_CLASS(bin, delta, size) \
|
||||||
|
size,
|
||||||
|
SIZE_CLASSES
|
||||||
|
#undef SIZE_CLASS
|
||||||
|
};
|
||||||
|
|
||||||
JEMALLOC_ALIGNED(CACHELINE)
|
JEMALLOC_ALIGNED(CACHELINE)
|
||||||
const uint8_t small_size2bin[] = {
|
const uint8_t small_size2bin[] = {
|
||||||
#define S2B_8(i) i,
|
#define S2B_8(i) i,
|
||||||
@ -1615,7 +1623,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
|
|||||||
binind = SMALL_SIZE2BIN(size);
|
binind = SMALL_SIZE2BIN(size);
|
||||||
assert(binind < NBINS);
|
assert(binind < NBINS);
|
||||||
bin = &arena->bins[binind];
|
bin = &arena->bins[binind];
|
||||||
size = arena_bin_info[binind].reg_size;
|
size = small_bin2size[binind];
|
||||||
|
|
||||||
malloc_mutex_lock(&bin->lock);
|
malloc_mutex_lock(&bin->lock);
|
||||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||||
|
65
src/prof.c
65
src/prof.c
@ -645,6 +645,66 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
return (ret.p);
|
return (ret.p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
prof_sample_threshold_update(prof_tdata_t *prof_tdata)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The body of this function is compiled out unless heap profiling is
|
||||||
|
* enabled, so that it is possible to compile jemalloc with floating
|
||||||
|
* point support completely disabled. Avoiding floating point code is
|
||||||
|
* important on memory-constrained systems, but it also enables a
|
||||||
|
* workaround for versions of glibc that don't properly save/restore
|
||||||
|
* floating point registers during dynamic lazy symbol loading (which
|
||||||
|
* internally calls into whatever malloc implementation happens to be
|
||||||
|
* integrated into the application). Note that some compilers (e.g.
|
||||||
|
* gcc 4.8) may use floating point registers for fast memory moves, so
|
||||||
|
* jemalloc must be compiled with such optimizations disabled (e.g.
|
||||||
|
* -mno-sse) in order for the workaround to be complete.
|
||||||
|
*/
|
||||||
|
#ifdef JEMALLOC_PROF
|
||||||
|
uint64_t r;
|
||||||
|
double u;
|
||||||
|
|
||||||
|
if (!config_prof)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (prof_tdata == NULL)
|
||||||
|
prof_tdata = prof_tdata_get(false);
|
||||||
|
|
||||||
|
if (opt_lg_prof_sample == 0) {
|
||||||
|
prof_tdata->bytes_until_sample = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute sample threshold as a geometrically distributed random
|
||||||
|
* variable with mean (2^opt_lg_prof_sample).
|
||||||
|
*
|
||||||
|
* __ __
|
||||||
|
* | log(u) | 1
|
||||||
|
* prof_tdata->threshold = | -------- |, where p = -------------------
|
||||||
|
* | log(1-p) | opt_lg_prof_sample
|
||||||
|
* 2
|
||||||
|
*
|
||||||
|
* For more information on the math, see:
|
||||||
|
*
|
||||||
|
* Non-Uniform Random Variate Generation
|
||||||
|
* Luc Devroye
|
||||||
|
* Springer-Verlag, New York, 1986
|
||||||
|
* pp 500
|
||||||
|
* (http://luc.devroye.org/rnbookindex.html)
|
||||||
|
*/
|
||||||
|
prng64(r, 53, prof_tdata->prng_state,
|
||||||
|
UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
|
||||||
|
u = (double)r * (1.0/9007199254740992.0L);
|
||||||
|
prof_tdata->bytes_until_sample = (uint64_t)(log(u) /
|
||||||
|
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
|
||||||
|
+ (uint64_t)1U;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_JET
|
#ifdef JEMALLOC_JET
|
||||||
size_t
|
size_t
|
||||||
prof_bt_count(void)
|
prof_bt_count(void)
|
||||||
@ -1224,9 +1284,8 @@ prof_tdata_init(void)
|
|||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
prof_tdata->prng_state = 0;
|
prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata;
|
||||||
prof_tdata->threshold = 0;
|
prof_sample_threshold_update(prof_tdata);
|
||||||
prof_tdata->accum = 0;
|
|
||||||
|
|
||||||
prof_tdata->enq = false;
|
prof_tdata->enq = false;
|
||||||
prof_tdata->enq_idump = false;
|
prof_tdata->enq_idump = false;
|
||||||
|
40
src/tcache.c
40
src/tcache.c
@ -265,6 +265,46 @@ tcache_arena_dissociate(tcache_t *tcache)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tcache_t *
|
||||||
|
tcache_get_hard(tcache_t *tcache, bool create)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (tcache == NULL) {
|
||||||
|
if (create == false) {
|
||||||
|
/*
|
||||||
|
* Creating a tcache here would cause
|
||||||
|
* allocation as a side effect of free().
|
||||||
|
* Ordinarily that would be okay since
|
||||||
|
* tcache_create() failure is a soft failure
|
||||||
|
* that doesn't propagate. However, if TLS
|
||||||
|
* data are freed via free() as in glibc,
|
||||||
|
* subtle corruption could result from setting
|
||||||
|
* a TLS variable after its backing memory is
|
||||||
|
* freed.
|
||||||
|
*/
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
if (tcache_enabled_get() == false) {
|
||||||
|
tcache_enabled_set(false); /* Memoize. */
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
return (tcache_create(choose_arena(NULL)));
|
||||||
|
}
|
||||||
|
if (tcache == TCACHE_STATE_PURGATORY) {
|
||||||
|
/*
|
||||||
|
* Make a note that an allocator function was called
|
||||||
|
* after tcache_thread_cleanup() was called.
|
||||||
|
*/
|
||||||
|
tcache = TCACHE_STATE_REINCARNATED;
|
||||||
|
tcache_tsd_set(&tcache);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
if (tcache == TCACHE_STATE_REINCARNATED)
|
||||||
|
return (NULL);
|
||||||
|
not_reached();
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
tcache_t *
|
tcache_t *
|
||||||
tcache_create(arena_t *arena)
|
tcache_create(arena_t *arena)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user