Merge pull request #73 from bmaurer/smallmalloc

Smaller malloc hot path
This commit is contained in:
Jason Evans
2014-04-16 16:33:21 -07:00
8 changed files with 194 additions and 194 deletions

View File

@@ -7,6 +7,14 @@
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
arena_bin_info_t arena_bin_info[NBINS];
JEMALLOC_ALIGNED(CACHELINE)
const uint32_t small_bin2size[NBINS] = {
#define SIZE_CLASS(bin, delta, size) \
size,
SIZE_CLASSES
#undef SIZE_CLASS
};
JEMALLOC_ALIGNED(CACHELINE)
const uint8_t small_size2bin[] = {
#define S2B_8(i) i,
@@ -1615,7 +1623,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
binind = SMALL_SIZE2BIN(size);
assert(binind < NBINS);
bin = &arena->bins[binind];
size = arena_bin_info[binind].reg_size;
size = small_bin2size[binind];
malloc_mutex_lock(&bin->lock);
if ((run = bin->runcur) != NULL && run->nfree > 0)

View File

@@ -645,6 +645,66 @@ prof_lookup(prof_bt_t *bt)
return (ret.p);
}
void
prof_sample_threshold_update(prof_tdata_t *prof_tdata)
{
/*
* The body of this function is compiled out unless heap profiling is
* enabled, so that it is possible to compile jemalloc with floating
* point support completely disabled. Avoiding floating point code is
* important on memory-constrained systems, but it also enables a
* workaround for versions of glibc that don't properly save/restore
* floating point registers during dynamic lazy symbol loading (which
* internally calls into whatever malloc implementation happens to be
* integrated into the application). Note that some compilers (e.g.
* gcc 4.8) may use floating point registers for fast memory moves, so
* jemalloc must be compiled with such optimizations disabled (e.g.
* -mno-sse) in order for the workaround to be complete.
*/
#ifdef JEMALLOC_PROF
uint64_t r;
double u;
if (!config_prof)
return;
if (prof_tdata == NULL)
prof_tdata = prof_tdata_get(false);
if (opt_lg_prof_sample == 0) {
prof_tdata->bytes_until_sample = 0;
return;
}
/*
* Compute sample threshold as a geometrically distributed random
* variable with mean (2^opt_lg_prof_sample).
*
* __ __
* | log(u) | 1
* prof_tdata->threshold = | -------- |, where p = -------------------
* | log(1-p) | opt_lg_prof_sample
* 2
*
* For more information on the math, see:
*
* Non-Uniform Random Variate Generation
* Luc Devroye
* Springer-Verlag, New York, 1986
* pp 500
* (http://luc.devroye.org/rnbookindex.html)
*/
prng64(r, 53, prof_tdata->prng_state,
UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
u = (double)r * (1.0/9007199254740992.0L);
prof_tdata->bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+ (uint64_t)1U;
#endif
}
#ifdef JEMALLOC_JET
size_t
prof_bt_count(void)
@@ -1224,9 +1284,8 @@ prof_tdata_init(void)
return (NULL);
}
prof_tdata->prng_state = 0;
prof_tdata->threshold = 0;
prof_tdata->accum = 0;
prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata;
prof_sample_threshold_update(prof_tdata);
prof_tdata->enq = false;
prof_tdata->enq_idump = false;

View File

@@ -265,6 +265,46 @@ tcache_arena_dissociate(tcache_t *tcache)
}
}
tcache_t *
tcache_get_hard(tcache_t *tcache, bool create)
{
if (tcache == NULL) {
if (create == false) {
/*
* Creating a tcache here would cause
* allocation as a side effect of free().
* Ordinarily that would be okay since
* tcache_create() failure is a soft failure
* that doesn't propagate. However, if TLS
* data are freed via free() as in glibc,
* subtle corruption could result from setting
* a TLS variable after its backing memory is
* freed.
*/
return (NULL);
}
if (tcache_enabled_get() == false) {
tcache_enabled_set(false); /* Memoize. */
return (NULL);
}
return (tcache_create(choose_arena(NULL)));
}
if (tcache == TCACHE_STATE_PURGATORY) {
/*
* Make a note that an allocator function was called
* after tcache_thread_cleanup() was called.
*/
tcache = TCACHE_STATE_REINCARNATED;
tcache_tsd_set(&tcache);
return (NULL);
}
if (tcache == TCACHE_STATE_REINCARNATED)
return (NULL);
not_reached();
return (NULL);
}
tcache_t *
tcache_create(arena_t *arena)
{