Create arena_bin_info_t.

Move read-only fields from arena_bin_t into arena_bin_info_t, primarily
in order to avoid false cacheline sharing.
This commit is contained in:
Jason Evans 2011-03-15 13:59:15 -07:00
parent 1b17768e24
commit 49f7e8f35a
6 changed files with 324 additions and 223 deletions

View File

@ -71,6 +71,7 @@
typedef struct arena_chunk_map_s arena_chunk_map_t;
typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_run_s arena_run_t;
typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
@ -218,6 +219,33 @@ struct arena_run_s {
unsigned nfree;
};
/*
* Read-only information associated with each element for arena_t's bins array
* is stored separately, partly to reduce memory usage (only one copy, rather
* than one per arena), but mainly to avoid false cacheline sharing.
*/
struct arena_bin_info_s {
/* Size of regions in a run for this bin's size class. */
size_t reg_size;
/* Total size of a run for this bin's size class. */
size_t run_size;
/* Total number of regions in a run for this bin's size class. */
uint32_t nregs;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
};
struct arena_bin_s {
/*
* All operations on runcur, runs, and stats require that lock be
@ -242,26 +270,6 @@ struct arena_bin_s {
*/
arena_run_tree_t runs;
/* Size of regions in a run for this bin's size class. */
size_t reg_size;
/* Total size of a run for this bin's size class. */
size_t run_size;
/* Total number of regions in a run for this bin's size class. */
uint32_t nregs;
#ifdef JEMALLOC_PROF
/*
* Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
#ifdef JEMALLOC_STATS
/* Bin statistics. */
malloc_bin_stats_t stats;
@ -398,6 +406,8 @@ extern ssize_t opt_lg_dirty_mult;
extern uint8_t const *small_size2bin;
#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
extern arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */
#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
@ -463,7 +473,8 @@ bool arena_boot(void);
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr, size_t size);
# ifdef JEMALLOC_PROF
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
@ -473,8 +484,16 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
JEMALLOC_INLINE size_t
arena_bin_index(arena_t *arena, arena_bin_t *bin)
{
size_t binind = bin - arena->bins;
assert(binind < nbins);
return (binind);
}
JEMALLOC_INLINE unsigned
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr,
size_t size)
{
unsigned shift, diff, regind;
@ -485,7 +504,8 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
* Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%!
*/
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
bin_info->reg0_offset);
/* Rescale (factor powers of 2 out of the numerator and denominator). */
shift = ffs(size) - 1;
@ -531,7 +551,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
#undef SIZE_INV_SHIFT
}
assert(diff == regind * size);
assert(regind < bin->nregs);
assert(regind < bin_info->nregs);
return (regind);
}
@ -558,13 +578,15 @@ arena_prof_ctx_get(const void *ptr)
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
arena_bin_t *bin = run->bin;
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
unsigned regind;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
regind = arena_run_regind(run, bin_info, ptr,
bin_info->reg_size);
ret = *(prof_ctx_t **)((uintptr_t)run +
bin->ctx0_offset + (regind *
bin_info->ctx0_offset + (regind *
sizeof(prof_ctx_t *)));
}
} else
@ -593,11 +615,15 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
PAGE_SHIFT));
arena_bin_t *bin = run->bin;
unsigned regind;
size_t binind;
arena_bin_info_t *bin_info;
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
binind = arena_bin_index(chunk->arena, bin);
bin_info = &arena_bin_info[binind];
*((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
*((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+ (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
assert((uintptr_t)ctx == (uintptr_t)1U);
@ -637,10 +663,17 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
(uintptr_t)((pageind - (mapelm->bits >>
PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) %
run->bin->reg_size == 0);
bin = run->bin;
#ifndef NDEBUG
{
size_t binind = arena_bin_index(arena, bin);
arena_bin_info_t *bin_info =
&arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)bin_info->reg0_offset)) %
bin_info->reg_size == 0);
}
#endif
malloc_mutex_lock(&bin->lock);
arena_dalloc_bin(arena, chunk, ptr, mapelm);
malloc_mutex_unlock(&bin->lock);

View File

@ -402,7 +402,7 @@ s2u(size_t size)
{
if (size <= small_maxclass)
return (arenas[0]->bins[SMALL_SIZE2BIN(size)].reg_size);
return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
if (size <= arena_maxclass)
return (PAGE_CEILING(size));
return (CHUNK_CEILING(size));
@ -446,10 +446,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
}
if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
if (usize <= small_maxclass) {
return
(arenas[0]->bins[SMALL_SIZE2BIN(usize)].reg_size);
}
if (usize <= small_maxclass)
return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
return (PAGE_CEILING(usize));
} else {
size_t run_size;

View File

@ -232,7 +232,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
if (ret == NULL)
return (NULL);
}
assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size);
assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
if (zero == false) {
#ifdef JEMALLOC_FILL
@ -248,7 +248,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
tbin->tstats.nrequests++;
#endif
#ifdef JEMALLOC_PROF
tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size;
tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
#endif
tcache_event(tcache);
return (ret);
@ -331,7 +331,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
#ifdef JEMALLOC_FILL
if (opt_junk)
memset(ptr, 0x5a, bin->reg_size);
memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
#endif
tbin = &tcache->tbins[binind];

View File

@ -8,6 +8,7 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
uint8_t const *small_size2bin;
arena_bin_info_t *arena_bin_info;
/* Various bin-related settings. */
unsigned nqbins;
@ -174,7 +175,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin);
static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
@ -192,6 +192,9 @@ static bool small_size2bin_init(void);
static void small_size2bin_validate(void);
#endif
static bool small_size2bin_init_hard(void);
static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info,
size_t min_run_size);
static bool bin_info_init(void);
/******************************************************************************/
@ -247,7 +250,7 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
arena_chunk_map_t, u.rb_link, arena_avail_comp)
static inline void *
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
{
void *ret;
@ -261,16 +264,16 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
assert(ret != NULL);
/* Write-after free can cause assertion failure. */
assert((uintptr_t)ret >= (uintptr_t)run +
(uintptr_t)bin->reg0_offset);
(uintptr_t)bin_info->reg0_offset);
assert((uintptr_t)ret < (uintptr_t)run->next);
assert(((uintptr_t)ret - ((uintptr_t)run +
(uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
0);
(uintptr_t)bin_info->reg0_offset)) %
(uintptr_t)bin_info->reg_size == 0);
run->avail = *(void **)ret;
return (ret);
}
ret = run->next;
run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
run->next = (void *)((uintptr_t)ret + (uintptr_t)bin_info->reg_size);
assert(ret != NULL);
return (ret);
}
@ -279,22 +282,27 @@ static inline void
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
{
assert(run->nfree < run->bin->nregs);
#ifndef NDEBUG
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(run->nfree < bin_info->nregs);
/* Freeing an interior pointer can cause assertion failure. */
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
(uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
== 0);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
assert((uintptr_t)ptr >= (uintptr_t)run +
(uintptr_t)run->bin->reg0_offset);
(uintptr_t)bin_info->reg0_offset);
/*
* Freeing a pointer past in the run's frontier can cause assertion
* failure.
*/
assert((uintptr_t)ptr < (uintptr_t)run->next);
#endif
*(void **)ptr = run->avail;
run->avail = ptr;
@ -765,7 +773,11 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
assert((mapelm->bits >> PAGE_SHIFT) == 0);
assert(run->magic == ARENA_RUN_MAGIC);
pageind += run->bin->run_size >> PAGE_SHIFT;
size_t binind = arena_bin_index(arena,
run->bin);
arena_bin_info_t *bin_info =
&arena_bin_info[binind];
pageind += bin_info->run_size >> PAGE_SHIFT;
}
}
}
@ -947,8 +959,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
CHUNK_MAP_LARGE) != 0);
assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
CHUNK_MAP_ALLOCATED) != 0);
} else
size = run->bin->run_size;
} else {
size_t binind = arena_bin_index(arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
size = bin_info->run_size;
}
run_pages = (size >> PAGE_SHIFT);
arena->nactive -= run_pages;
@ -1175,6 +1190,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
{
arena_chunk_map_t *mapelm;
arena_run_t *run;
size_t binind;
arena_bin_info_t *bin_info;
/* Look for a usable run. */
mapelm = arena_run_tree_first(&bin->runs);
@ -1198,18 +1215,21 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
}
/* No existing runs have any space available. */
binind = arena_bin_index(arena, bin);
bin_info = &arena_bin_info[binind];
/* Allocate a new run. */
malloc_mutex_unlock(&bin->lock);
/******************************/
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc(arena, bin->run_size, false, false);
run = arena_run_alloc(arena, bin_info->run_size, false, false);
if (run != NULL) {
/* Initialize run internals. */
run->bin = bin;
run->avail = NULL;
run->next = (void *)((uintptr_t)run +
(uintptr_t)bin->reg0_offset);
run->nfree = bin->nregs;
(uintptr_t)bin_info->reg0_offset);
run->nfree = bin_info->nregs;
#ifdef JEMALLOC_DEBUG
run->magic = ARENA_RUN_MAGIC;
#endif
@ -1260,18 +1280,23 @@ static void *
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
{
void *ret;
size_t binind;
arena_bin_info_t *bin_info;
arena_run_t *run;
binind = arena_bin_index(arena, bin);
bin_info = &arena_bin_info[binind];
bin->runcur = NULL;
run = arena_bin_nonfull_run_get(arena, bin);
if (bin->runcur != NULL && bin->runcur->nfree > 0) {
/*
* Another thread updated runcur while this one ran without the
* bin lock in arena_bin_nonfull_run_get().
*/
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
ret = arena_run_reg_alloc(bin->runcur, bin);
ret = arena_run_reg_alloc(bin->runcur, bin_info);
if (run != NULL) {
arena_chunk_t *chunk;
@ -1284,7 +1309,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
* from the run.
*/
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
if (run->nfree == bin->nregs)
if (run->nfree == bin_info->nregs)
arena_dalloc_bin_run(arena, chunk, run, bin);
else
arena_bin_lower_run(arena, chunk, run, bin);
@ -1300,7 +1325,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
assert(bin->runcur->nfree > 0);
return (arena_run_reg_alloc(bin->runcur, bin));
return (arena_run_reg_alloc(bin->runcur, bin_info));
}
#ifdef JEMALLOC_PROF
@ -1342,7 +1367,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
malloc_mutex_lock(&bin->lock);
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_run_reg_alloc(run, bin);
ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ptr = arena_bin_malloc_hard(arena, bin);
if (ptr == NULL)
@ -1351,7 +1376,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
tbin->avail = ptr;
}
#ifdef JEMALLOC_STATS
bin->stats.allocated += (i - tbin->ncached) * bin->reg_size;
bin->stats.allocated += (i - tbin->ncached) *
arena_bin_info[binind].reg_size;
bin->stats.nmalloc += i;
bin->stats.nrequests += tbin->tstats.nrequests;
bin->stats.nfills++;
@ -1362,112 +1388,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
}
#endif
/*
* Calculate bin->run_size such that it meets the following constraints:
*
* *) bin->run_size >= min_run_size
* *) bin->run_size <= arena_maxclass
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
*
* bin->nregs and bin->reg0_offset are also calculated here, since these
* settings are all interdependent.
*/
static size_t
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
{
size_t try_run_size, good_run_size;
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
#ifdef JEMALLOC_PROF
uint32_t try_ctx0_offset, good_ctx0_offset;
#endif
uint32_t try_reg0_offset, good_reg0_offset;
assert(min_run_size >= PAGE_SIZE);
assert(min_run_size <= arena_maxclass);
/*
* Calculate known-valid settings before entering the run_size
* expansion loop, so that the first part of the loop always copies
* valid settings.
*
* The do..while loop iteratively reduces the number of regions until
* the run header and the regions no longer overlap. A closed formula
* would be quite messy, since there is an interdependency between the
* header's mask length and the number of regions.
*/
try_run_size = min_run_size;
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
+ 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/* Add space for one (prof_ctx_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
} else
try_ctx0_offset = 0;
#endif
try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
} while (try_hdr_size > try_reg0_offset);
/* run_size expansion loop. */
do {
/*
* Copy valid settings before trying more aggressive settings.
*/
good_run_size = try_run_size;
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
#ifdef JEMALLOC_PROF
good_ctx0_offset = try_ctx0_offset;
#endif
good_reg0_offset = try_reg0_offset;
/* Try more aggressive settings. */
try_run_size += PAGE_SIZE;
try_nregs = ((try_run_size - sizeof(arena_run_t)) /
bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/*
* Add space for one (prof_ctx_t *) per region.
*/
try_hdr_size += try_nregs *
sizeof(prof_ctx_t *);
}
#endif
try_reg0_offset = try_run_size - (try_nregs *
bin->reg_size);
} while (try_hdr_size > try_reg0_offset);
} while (try_run_size <= arena_maxclass
&& try_run_size <= arena_maxclass
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
assert(good_hdr_size <= good_reg0_offset);
/* Copy final settings. */
bin->run_size = good_run_size;
bin->nregs = good_nregs;
#ifdef JEMALLOC_PROF
bin->ctx0_offset = good_ctx0_offset;
#endif
bin->reg0_offset = good_reg0_offset;
return (good_run_size);
}
void *
arena_malloc_small(arena_t *arena, size_t size, bool zero)
{
@ -1479,11 +1399,11 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
binind = SMALL_SIZE2BIN(size);
assert(binind < nbins);
bin = &arena->bins[binind];
size = bin->reg_size;
size = arena_bin_info[binind].reg_size;
malloc_mutex_lock(&bin->lock);
if ((run = bin->runcur) != NULL && run->nfree > 0)
ret = arena_run_reg_alloc(run, bin);
ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
else
ret = arena_bin_malloc_hard(arena, bin);
@ -1688,10 +1608,12 @@ arena_salloc(const void *ptr)
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
(uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
0);
ret = run->bin->reg_size;
ret = bin_info->reg_size;
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
@ -1739,10 +1661,12 @@ arena_salloc_demote(const void *ptr)
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
size_t binind = arena_bin_index(chunk->arena, run->bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
(uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
0);
ret = run->bin->reg_size;
ret = bin_info->reg_size;
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
@ -1751,7 +1675,7 @@ arena_salloc_demote(const void *ptr)
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
CHUNK_MAP_CLASS_SHIFT) - 1;
assert(binind < nbins);
ret = chunk->arena->bins[binind].reg_size;
ret = arena_bin_info[binind].reg_size;
}
assert(ret != 0);
}
@ -1768,17 +1692,22 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
/* Dissociate run from bin. */
if (run == bin->runcur)
bin->runcur = NULL;
else if (bin->nregs != 1) {
size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >>
PAGE_SHIFT;
arena_chunk_map_t *run_mapelm =
&chunk->map[run_pageind-map_bias];
/*
* This block's conditional is necessary because if the run
* only contains one region, then it never gets inserted into
* the non-full runs tree.
*/
arena_run_tree_remove(&bin->runs, run_mapelm);
else {
size_t binind = arena_bin_index(chunk->arena, bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
if (bin_info->nregs != 1) {
size_t run_pageind = (((uintptr_t)run -
(uintptr_t)chunk)) >> PAGE_SHIFT;
arena_chunk_map_t *run_mapelm =
&chunk->map[run_pageind-map_bias];
/*
* This block's conditional is necessary because if the
* run only contains one region, then it never gets
* inserted into the non-full runs tree.
*/
arena_run_tree_remove(&bin->runs, run_mapelm);
}
}
}
@ -1786,15 +1715,20 @@ static void
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
arena_bin_t *bin)
{
size_t binind;
arena_bin_info_t *bin_info;
size_t npages, run_ind, past;
assert(run != bin->runcur);
assert(arena_run_tree_search(&bin->runs, &chunk->map[
(((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
binind = arena_bin_index(chunk->arena, run->bin);
bin_info = &arena_bin_info[binind];
malloc_mutex_unlock(&bin->lock);
/******************************/
npages = bin->run_size >> PAGE_SHIFT;
npages = bin_info->run_size >> PAGE_SHIFT;
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk)
>> PAGE_SHIFT);
@ -1814,7 +1748,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
(chunk->map[run_ind+npages-1-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
chunk->map[run_ind-map_bias].bits = bin->run_size |
chunk->map[run_ind-map_bias].bits = bin_info->run_size |
CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
CHUNK_MAP_FLAGS_MASK);
arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
@ -1885,8 +1819,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
(mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
bin = run->bin;
size_t binind = arena_bin_index(arena, bin);
arena_bin_info_t *bin_info = &arena_bin_info[binind];
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
size = bin->reg_size;
size = bin_info->reg_size;
#endif
#ifdef JEMALLOC_FILL
@ -1895,7 +1831,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#endif
arena_run_reg_dalloc(run, ptr);
if (run->nfree == bin->nregs) {
if (run->nfree == bin_info->nregs) {
arena_dissociate_bin_run(chunk, run, bin);
arena_dalloc_bin_run(arena, chunk, run, bin);
} else if (run->nfree == 1 && run != bin->runcur)
@ -2167,8 +2103,8 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
*/
if (oldsize <= arena_maxclass) {
if (oldsize <= small_maxclass) {
assert(choose_arena()->bins[SMALL_SIZE2BIN(
oldsize)].reg_size == oldsize);
assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
== oldsize);
if ((size + extra <= small_maxclass &&
SMALL_SIZE2BIN(size + extra) ==
SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
@ -2248,7 +2184,6 @@ arena_new(arena_t *arena, unsigned ind)
{
unsigned i;
arena_bin_t *bin;
size_t prev_run_size;
arena->ind = ind;
@ -2284,8 +2219,6 @@ arena_new(arena_t *arena, unsigned ind)
arena_avail_tree_new(&arena->runs_avail_dirty);
/* Initialize bins. */
prev_run_size = PAGE_SIZE;
i = 0;
#ifdef JEMALLOC_TINY
/* (2^n)-spaced tiny bins. */
@ -2295,11 +2228,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
bin->reg_size = (1U << (LG_TINY_MIN + i));
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@ -2313,11 +2241,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@ -2330,12 +2253,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
LG_CACHELINE);
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@ -2348,12 +2265,6 @@ arena_new(arena_t *arena, unsigned ind)
return (true);
bin->runcur = NULL;
arena_run_tree_new(&bin->runs);
bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
<< LG_SUBPAGE);
prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
#ifdef JEMALLOC_STATS
memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
#endif
@ -2487,6 +2398,162 @@ small_size2bin_init_hard(void)
#undef CUSTOM_SMALL_SIZE2BIN
}
/*
* Calculate bin_info->run_size such that it meets the following constraints:
*
* *) bin_info->run_size >= min_run_size
* *) bin_info->run_size <= arena_maxclass
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
*
* bin_info->nregs and bin_info->reg0_offset are also calculated here, since
* these settings are all interdependent.
*/
static size_t
bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
{
size_t try_run_size, good_run_size;
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
#ifdef JEMALLOC_PROF
uint32_t try_ctx0_offset, good_ctx0_offset;
#endif
uint32_t try_reg0_offset, good_reg0_offset;
assert(min_run_size >= PAGE_SIZE);
assert(min_run_size <= arena_maxclass);
/*
* Calculate known-valid settings before entering the run_size
* expansion loop, so that the first part of the loop always copies
* valid settings.
*
* The do..while loop iteratively reduces the number of regions until
* the run header and the regions no longer overlap. A closed formula
* would be quite messy, since there is an interdependency between the
* header's mask length and the number of regions.
*/
try_run_size = min_run_size;
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
+ 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/* Add space for one (prof_ctx_t *) per region. */
try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
} else
try_ctx0_offset = 0;
#endif
try_reg0_offset = try_run_size - (try_nregs *
bin_info->reg_size);
} while (try_hdr_size > try_reg0_offset);
/* run_size expansion loop. */
do {
/*
* Copy valid settings before trying more aggressive settings.
*/
good_run_size = try_run_size;
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
#ifdef JEMALLOC_PROF
good_ctx0_offset = try_ctx0_offset;
#endif
good_reg0_offset = try_reg0_offset;
/* Try more aggressive settings. */
try_run_size += PAGE_SIZE;
try_nregs = ((try_run_size - sizeof(arena_run_t)) /
bin_info->reg_size)
+ 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_ctx0_offset = try_hdr_size;
/*
* Add space for one (prof_ctx_t *) per region.
*/
try_hdr_size += try_nregs *
sizeof(prof_ctx_t *);
}
#endif
try_reg0_offset = try_run_size - (try_nregs *
bin_info->reg_size);
} while (try_hdr_size > try_reg0_offset);
} while (try_run_size <= arena_maxclass
&& try_run_size <= arena_maxclass
&& RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
assert(good_hdr_size <= good_reg0_offset);
/* Copy final settings. */
bin_info->run_size = good_run_size;
bin_info->nregs = good_nregs;
#ifdef JEMALLOC_PROF
bin_info->ctx0_offset = good_ctx0_offset;
#endif
bin_info->reg0_offset = good_reg0_offset;
return (good_run_size);
}
static bool
bin_info_init(void)
{
arena_bin_info_t *bin_info;
unsigned i;
size_t prev_run_size;
arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
if (arena_bin_info == NULL)
return (true);
prev_run_size = PAGE_SIZE;
i = 0;
#ifdef JEMALLOC_TINY
/* (2^n)-spaced tiny bins. */
for (; i < ntbins; i++) {
bin_info = &arena_bin_info[i];
bin_info->reg_size = (1U << (LG_TINY_MIN + i));
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
}
#endif
/* Quantum-spaced bins. */
for (; i < ntbins + nqbins; i++) {
bin_info = &arena_bin_info[i];
bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
}
/* Cacheline-spaced bins. */
for (; i < ntbins + nqbins + ncbins; i++) {
bin_info = &arena_bin_info[i];
bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
LG_CACHELINE);
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
}
/* Subpage-spaced bins. */
for (; i < nbins; i++) {
bin_info = &arena_bin_info[i];
bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
ncbins)) << LG_SUBPAGE);
prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
}
return (false);
}
bool
arena_boot(void)
{
@ -2545,9 +2612,6 @@ arena_boot(void)
abort();
}
if (small_size2bin_init())
return (true);
/*
* Compute the header size such that it is large enough to contain the
* page map. The page map is biased to omit entries for the header
@ -2571,5 +2635,11 @@ arena_boot(void)
arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
if (small_size2bin_init())
return (true);
if (bin_info_init())
return (true);
return (false);
}

View File

@ -1289,9 +1289,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
/******************************************************************************/
CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t)
CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t)
CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t)
CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
const ctl_node_t *
arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
{

View File

@ -253,9 +253,9 @@ tcache_create(arena_t *arena)
tcache->arena = arena;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
for (i = 0; i < nbins; i++) {
if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
1);
if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
tcache->tbins[i].ncached_max = (arena_bin_info[i].nregs
<< 1);
} else
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
}