Simplify small object allocation/deallocation.
Use chained run free lists instead of bitmaps to track free objects within small runs. Remove reference counting for small object run pages.
This commit is contained in:
parent
3fa9a2fad8
commit
1e0a636c11
@ -18,11 +18,7 @@
|
||||
|
||||
#ifdef JEMALLOC_TINY
|
||||
/* Smallest size class to support. */
|
||||
# ifdef JEMALLOC_TCACHE
|
||||
# define LG_TINY_MIN LG_SIZEOF_PTR
|
||||
# else
|
||||
# define LG_TINY_MIN 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -77,12 +73,6 @@
|
||||
#define RUN_MAX_OVRHD 0x0000003dU
|
||||
#define RUN_MAX_OVRHD_RELAX 0x00001800U
|
||||
|
||||
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
|
||||
#define RUN_MAX_SMALL \
|
||||
(arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT)) \
|
||||
? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE + \
|
||||
PAGE_SHIFT)))
|
||||
|
||||
/*
|
||||
* The minimum ratio of active:dirty pages per arena is computed as:
|
||||
*
|
||||
@ -130,7 +120,6 @@ struct arena_chunk_map_s {
|
||||
* Small/medium: Don't care.
|
||||
* Large: Run size for first page, unset for trailing pages.
|
||||
* - : Unused.
|
||||
* c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
|
||||
* d : dirty?
|
||||
* z : zeroed?
|
||||
* l : large?
|
||||
@ -150,9 +139,9 @@ struct arena_chunk_map_s {
|
||||
* ssssssss ssssssss ssss---- -----z--
|
||||
*
|
||||
* Small/medium:
|
||||
* pppppppp ppppcccc cccccccc cccc---a
|
||||
* pppppppp ppppcccc cccccccc cccc---a
|
||||
* pppppppp ppppcccc cccccccc cccc---a
|
||||
* pppppppp pppppppp pppp---- -------a
|
||||
* pppppppp pppppppp pppp---- -------a
|
||||
* pppppppp pppppppp pppp---- -------a
|
||||
*
|
||||
* Large:
|
||||
* ssssssss ssssssss ssss---- ------la
|
||||
@ -160,12 +149,9 @@ struct arena_chunk_map_s {
|
||||
* -------- -------- -------- ------la
|
||||
*/
|
||||
size_t bits;
|
||||
#define CHUNK_MAP_PG_MASK ((size_t)0xfff00000U)
|
||||
#define CHUNK_MAP_PG_SHIFT 20
|
||||
#define CHUNK_MAP_LG_PG_RANGE 12
|
||||
|
||||
#define CHUNK_MAP_RC_MASK ((size_t)0xffff0U)
|
||||
#define CHUNK_MAP_RC_ONE ((size_t)0x00010U)
|
||||
#define CHUNK_MAP_PG_MASK ((size_t)0xfffff000U)
|
||||
#define CHUNK_MAP_PG_SHIFT 12
|
||||
#define CHUNK_MAP_LG_PG_RANGE 20
|
||||
|
||||
#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
|
||||
#define CHUNK_MAP_DIRTY ((size_t)0x8U)
|
||||
@ -209,14 +195,14 @@ struct arena_run_s {
|
||||
/* Bin this run is associated with. */
|
||||
arena_bin_t *bin;
|
||||
|
||||
/* Index of first element that might have a free region. */
|
||||
unsigned regs_minelm;
|
||||
/* Stack of available freed regions, or NULL. */
|
||||
void *avail;
|
||||
|
||||
/* Next region that has never been allocated, or run boundary. */
|
||||
void *next;
|
||||
|
||||
/* Number of free regions in run. */
|
||||
unsigned nfree;
|
||||
|
||||
/* Bitmask of in-use regions (0: in use, 1: free). */
|
||||
unsigned regs_mask[1]; /* Dynamically sized. */
|
||||
};
|
||||
|
||||
struct arena_bin_s {
|
||||
@ -244,9 +230,6 @@ struct arena_bin_s {
|
||||
/* Total number of regions in a run for this bin's size class. */
|
||||
uint32_t nregs;
|
||||
|
||||
/* Number of elements in a run's regs_mask for this bin's size class. */
|
||||
uint32_t regs_mask_nelms;
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
/*
|
||||
* Offset of first (prof_cnt_t *) in a run header for this bin's size
|
||||
|
@ -55,13 +55,7 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
||||
#if (LG_QUANTUM == 4)
|
||||
/* 16-byte quantum **********************/
|
||||
# ifdef JEMALLOC_TINY
|
||||
# if (LG_TINY_MIN == 1)
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
S2B_8(3) /* 16 */
|
||||
# define S2B_QMIN 3
|
||||
# elif (LG_TINY_MIN == 2)
|
||||
# if (LG_TINY_MIN == 2)
|
||||
S2B_4(0) /* 4 */
|
||||
S2B_4(1) /* 8 */
|
||||
S2B_8(2) /* 16 */
|
||||
@ -88,12 +82,7 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
||||
#else
|
||||
/* 8-byte quantum ***********************/
|
||||
# ifdef JEMALLOC_TINY
|
||||
# if (LG_TINY_MIN == 1)
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
# define S2B_QMIN 2
|
||||
# elif (LG_TINY_MIN == 2)
|
||||
# if (LG_TINY_MIN == 2)
|
||||
S2B_4(0) /* 4 */
|
||||
S2B_4(1) /* 8 */
|
||||
# define S2B_QMIN 1
|
||||
@ -260,246 +249,48 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
||||
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
|
||||
arena_chunk_map_t, link, arena_avail_comp)
|
||||
|
||||
static inline void
|
||||
arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
|
||||
{
|
||||
arena_chunk_t *chunk;
|
||||
arena_t *arena;
|
||||
size_t pagebeg, pageend, i;
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
arena = chunk->arena;
|
||||
pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
||||
pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
|
||||
(uintptr_t)chunk) >> PAGE_SHIFT;
|
||||
|
||||
for (i = pagebeg; i <= pageend; i++) {
|
||||
size_t mapbits = chunk->map[i].bits;
|
||||
|
||||
if (mapbits & CHUNK_MAP_DIRTY) {
|
||||
assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
|
||||
chunk->ndirty--;
|
||||
arena->ndirty--;
|
||||
mapbits ^= CHUNK_MAP_DIRTY;
|
||||
}
|
||||
assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
|
||||
mapbits += CHUNK_MAP_RC_ONE;
|
||||
chunk->map[i].bits = mapbits;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
|
||||
{
|
||||
arena_chunk_t *chunk;
|
||||
arena_t *arena;
|
||||
size_t pagebeg, pageend, mapbits, i;
|
||||
bool dirtier = false;
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
arena = chunk->arena;
|
||||
pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
||||
pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
|
||||
(uintptr_t)chunk) >> PAGE_SHIFT;
|
||||
|
||||
/* First page. */
|
||||
mapbits = chunk->map[pagebeg].bits;
|
||||
mapbits -= CHUNK_MAP_RC_ONE;
|
||||
if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
|
||||
dirtier = true;
|
||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
||||
mapbits |= CHUNK_MAP_DIRTY;
|
||||
chunk->ndirty++;
|
||||
arena->ndirty++;
|
||||
}
|
||||
chunk->map[pagebeg].bits = mapbits;
|
||||
|
||||
if (pageend - pagebeg >= 1) {
|
||||
/*
|
||||
* Interior pages are completely consumed by the object being
|
||||
* deallocated, which means that the pages can be
|
||||
* unconditionally marked dirty.
|
||||
*/
|
||||
for (i = pagebeg + 1; i < pageend; i++) {
|
||||
mapbits = chunk->map[i].bits;
|
||||
mapbits -= CHUNK_MAP_RC_ONE;
|
||||
assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
|
||||
dirtier = true;
|
||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
||||
mapbits |= CHUNK_MAP_DIRTY;
|
||||
chunk->ndirty++;
|
||||
arena->ndirty++;
|
||||
chunk->map[i].bits = mapbits;
|
||||
}
|
||||
|
||||
/* Last page. */
|
||||
mapbits = chunk->map[pageend].bits;
|
||||
mapbits -= CHUNK_MAP_RC_ONE;
|
||||
if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
|
||||
dirtier = true;
|
||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
||||
mapbits |= CHUNK_MAP_DIRTY;
|
||||
chunk->ndirty++;
|
||||
arena->ndirty++;
|
||||
}
|
||||
chunk->map[pageend].bits = mapbits;
|
||||
}
|
||||
|
||||
if (dirtier) {
|
||||
if (chunk->dirtied == false) {
|
||||
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
||||
chunk->dirtied = true;
|
||||
}
|
||||
|
||||
/* Enforce opt_lg_dirty_mult. */
|
||||
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
|
||||
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
|
||||
arena_purge(arena);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void *
|
||||
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
||||
{
|
||||
void *ret;
|
||||
unsigned i, mask, bit, regind;
|
||||
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
assert(run->regs_minelm < bin->regs_mask_nelms);
|
||||
|
||||
/*
|
||||
* Move the first check outside the loop, so that run->regs_minelm can
|
||||
* be updated unconditionally, without the possibility of updating it
|
||||
* multiple times.
|
||||
*/
|
||||
i = run->regs_minelm;
|
||||
mask = run->regs_mask[i];
|
||||
if (mask != 0) {
|
||||
/* Usable allocation found. */
|
||||
bit = ffs((int)mask) - 1;
|
||||
|
||||
regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
|
||||
assert(regind < bin->nregs);
|
||||
ret = (void *)(((uintptr_t)run) + bin->reg0_offset
|
||||
+ (bin->reg_size * regind));
|
||||
|
||||
/* Clear bit. */
|
||||
mask ^= (1U << bit);
|
||||
run->regs_mask[i] = mask;
|
||||
|
||||
arena_run_rc_incr(run, bin, ret);
|
||||
assert(run->nfree > 0);
|
||||
|
||||
run->nfree--;
|
||||
ret = run->avail;
|
||||
if (ret != NULL) {
|
||||
run->avail = *(void **)ret;
|
||||
/* Double free can cause assertion failure.*/
|
||||
assert(ret != NULL);
|
||||
/* Write-after free can cause assertion failure. */
|
||||
assert((uintptr_t)ret >= (uintptr_t)run +
|
||||
(uintptr_t)bin->reg0_offset);
|
||||
assert((uintptr_t)ret < (uintptr_t)run->next);
|
||||
assert(((uintptr_t)ret - ((uintptr_t)run +
|
||||
(uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
|
||||
0);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
for (i++; i < bin->regs_mask_nelms; i++) {
|
||||
mask = run->regs_mask[i];
|
||||
if (mask != 0) {
|
||||
/* Usable allocation found. */
|
||||
bit = ffs((int)mask) - 1;
|
||||
|
||||
regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
|
||||
assert(regind < bin->nregs);
|
||||
ret = (void *)(((uintptr_t)run) + bin->reg0_offset
|
||||
+ (bin->reg_size * regind));
|
||||
|
||||
/* Clear bit. */
|
||||
mask ^= (1U << bit);
|
||||
run->regs_mask[i] = mask;
|
||||
|
||||
/*
|
||||
* Make a note that nothing before this element
|
||||
* contains a free region.
|
||||
*/
|
||||
run->regs_minelm = i; /* Low payoff: + (mask == 0); */
|
||||
|
||||
arena_run_rc_incr(run, bin, ret);
|
||||
|
||||
ret = run->next;
|
||||
run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
|
||||
assert(ret != NULL);
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
/* Not reached. */
|
||||
assert(0);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
||||
size_t size)
|
||||
{
|
||||
unsigned shift, diff, regind;
|
||||
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
|
||||
/*
|
||||
* Avoid doing division with a variable divisor if possible. Using
|
||||
* actual division here can reduce allocator throughput by over 20%!
|
||||
*/
|
||||
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
||||
|
||||
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
||||
shift = ffs(size) - 1;
|
||||
diff >>= shift;
|
||||
size >>= shift;
|
||||
|
||||
if (size == 1) {
|
||||
/* The divisor was a power of 2. */
|
||||
regind = diff;
|
||||
} else {
|
||||
/*
|
||||
* To divide by a number D that is not a power of two we
|
||||
* multiply by (2^21 / D) and then right shift by 21 positions.
|
||||
*
|
||||
* X / D
|
||||
*
|
||||
* becomes
|
||||
*
|
||||
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
||||
*
|
||||
* We can omit the first three elements, because we never
|
||||
* divide by 0, and 1 and 2 are both powers of two, which are
|
||||
* handled above.
|
||||
*/
|
||||
#define SIZE_INV_SHIFT 21
|
||||
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
||||
static const unsigned size_invs[] = {
|
||||
SIZE_INV(3),
|
||||
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
||||
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
||||
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
||||
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
||||
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
||||
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
||||
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
||||
};
|
||||
|
||||
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
||||
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
||||
else
|
||||
regind = diff / size;
|
||||
#undef SIZE_INV
|
||||
#undef SIZE_INV_SHIFT
|
||||
}
|
||||
assert(diff == regind * size);
|
||||
assert(regind < bin->nregs);
|
||||
|
||||
return (regind);
|
||||
}
|
||||
|
||||
static inline void
|
||||
arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
|
||||
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
|
||||
{
|
||||
unsigned regind, elm, bit;
|
||||
|
||||
regind = arena_run_regind(run, bin, ptr, size);
|
||||
elm = regind >> (LG_SIZEOF_INT + 3);
|
||||
if (elm < run->regs_minelm)
|
||||
run->regs_minelm = elm;
|
||||
bit = regind - (elm << (LG_SIZEOF_INT + 3));
|
||||
assert((run->regs_mask[elm] & (1U << bit)) == 0);
|
||||
run->regs_mask[elm] |= (1U << bit);
|
||||
assert(run->nfree < run->bin->nregs);
|
||||
/* Freeing an interior pointer can cause assertion failure. */
|
||||
assert(((uintptr_t)ptr - ((uintptr_t)run +
|
||||
(uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
|
||||
== 0);
|
||||
|
||||
arena_run_rc_decr(run, bin, ptr);
|
||||
*(void **)ptr = run->avail;
|
||||
run->avail = ptr;
|
||||
run->nfree++;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -571,12 +362,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
|
||||
* tries to operate on an interior pointer.
|
||||
*/
|
||||
chunk->map[run_ind].bits |= size;
|
||||
} else {
|
||||
/*
|
||||
* Initialize the first page's refcount to 1, so that the run
|
||||
* header is protected from dirty page purging.
|
||||
*/
|
||||
chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
|
||||
}
|
||||
}
|
||||
|
||||
@ -960,7 +745,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
||||
{
|
||||
arena_chunk_map_t *mapelm;
|
||||
arena_run_t *run;
|
||||
unsigned i, remainder;
|
||||
|
||||
/* Look for a usable run. */
|
||||
mapelm = arena_run_tree_first(&bin->runs);
|
||||
@ -991,20 +775,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
||||
|
||||
/* Initialize run internals. */
|
||||
run->bin = bin;
|
||||
|
||||
for (i = 0; i < bin->regs_mask_nelms - 1; i++)
|
||||
run->regs_mask[i] = UINT_MAX;
|
||||
remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
|
||||
if (remainder == 0)
|
||||
run->regs_mask[i] = UINT_MAX;
|
||||
else {
|
||||
/* The last element has spare bits that need to be unset. */
|
||||
run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
|
||||
- remainder));
|
||||
}
|
||||
|
||||
run->regs_minelm = 0;
|
||||
|
||||
run->avail = NULL;
|
||||
run->next = (void *)(((uintptr_t)run) + (uintptr_t)bin->reg0_offset);
|
||||
run->nfree = bin->nregs;
|
||||
#ifdef JEMALLOC_DEBUG
|
||||
run->magic = ARENA_RUN_MAGIC;
|
||||
@ -1019,23 +791,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
||||
return (run);
|
||||
}
|
||||
|
||||
/* bin->runcur must have space available before this function is called. */
|
||||
static inline void *
|
||||
arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
assert(run->nfree > 0);
|
||||
|
||||
ret = arena_run_reg_alloc(run, bin);
|
||||
assert(ret != NULL);
|
||||
run->nfree--;
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
|
||||
/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
|
||||
static void *
|
||||
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
||||
{
|
||||
@ -1046,7 +802,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
||||
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
|
||||
assert(bin->runcur->nfree > 0);
|
||||
|
||||
return (arena_bin_malloc_easy(arena, bin, bin->runcur));
|
||||
return (arena_run_reg_alloc(bin->runcur, bin));
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
@ -1071,7 +827,7 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
|
||||
#endif
|
||||
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
|
||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||
ptr = arena_bin_malloc_easy(arena, bin, run);
|
||||
ptr = arena_run_reg_alloc(run, bin);
|
||||
else
|
||||
ptr = arena_bin_malloc_hard(arena, bin);
|
||||
if (ptr == NULL)
|
||||
@ -1120,19 +876,17 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
|
||||
*
|
||||
* *) bin->run_size >= min_run_size
|
||||
* *) bin->run_size <= arena_maxclass
|
||||
* *) bin->run_size <= RUN_MAX_SMALL
|
||||
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
|
||||
* *) run header size < PAGE_SIZE
|
||||
*
|
||||
* bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
|
||||
* also calculated here, since these settings are all interdependent.
|
||||
* bin->nregs and bin->reg0_offset are also calculated here, since these
|
||||
* settings are all interdependent.
|
||||
*/
|
||||
static size_t
|
||||
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
{
|
||||
size_t try_run_size, good_run_size;
|
||||
uint32_t try_nregs, good_nregs;
|
||||
uint32_t try_mask_nelms, good_mask_nelms;
|
||||
uint32_t try_hdr_size, good_hdr_size;
|
||||
#ifdef JEMALLOC_PROF
|
||||
uint32_t try_cnt0_offset, good_cnt0_offset;
|
||||
@ -1141,7 +895,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
|
||||
assert(min_run_size >= PAGE_SIZE);
|
||||
assert(min_run_size <= arena_maxclass);
|
||||
assert(min_run_size <= RUN_MAX_SMALL);
|
||||
|
||||
/*
|
||||
* Calculate known-valid settings before entering the run_size
|
||||
@ -1158,10 +911,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
+ 1; /* Counter-act try_nregs-- in loop. */
|
||||
do {
|
||||
try_nregs--;
|
||||
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
|
||||
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
|
||||
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
|
||||
(try_mask_nelms - 1));
|
||||
try_hdr_size = sizeof(arena_run_t);
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof) {
|
||||
/* Pad to a quantum boundary. */
|
||||
@ -1182,7 +932,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
*/
|
||||
good_run_size = try_run_size;
|
||||
good_nregs = try_nregs;
|
||||
good_mask_nelms = try_mask_nelms;
|
||||
good_hdr_size = try_hdr_size;
|
||||
#ifdef JEMALLOC_PROF
|
||||
good_cnt0_offset = try_cnt0_offset;
|
||||
@ -1195,11 +944,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
|
||||
do {
|
||||
try_nregs--;
|
||||
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
|
||||
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
|
||||
1 : 0);
|
||||
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
|
||||
(try_mask_nelms - 1));
|
||||
try_hdr_size = sizeof(arena_run_t);
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof) {
|
||||
/* Pad to a quantum boundary. */
|
||||
@ -1216,18 +961,17 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
try_reg0_offset = try_run_size - (try_nregs *
|
||||
bin->reg_size);
|
||||
} while (try_hdr_size > try_reg0_offset);
|
||||
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
|
||||
} while (try_run_size <= arena_maxclass
|
||||
&& try_run_size <= arena_maxclass
|
||||
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
|
||||
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
|
||||
&& try_hdr_size < PAGE_SIZE);
|
||||
|
||||
assert(good_hdr_size <= good_reg0_offset);
|
||||
assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
|
||||
|
||||
/* Copy final settings. */
|
||||
bin->run_size = good_run_size;
|
||||
bin->nregs = good_nregs;
|
||||
bin->regs_mask_nelms = good_mask_nelms;
|
||||
#ifdef JEMALLOC_PROF
|
||||
bin->cnt0_offset = good_cnt0_offset;
|
||||
#endif
|
||||
@ -1251,7 +995,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
|
||||
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||
ret = arena_bin_malloc_easy(arena, bin, run);
|
||||
ret = arena_run_reg_alloc(run, bin);
|
||||
else
|
||||
ret = arena_bin_malloc_hard(arena, bin);
|
||||
|
||||
@ -1306,7 +1050,7 @@ arena_malloc_medium(arena_t *arena, size_t size, bool zero)
|
||||
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||
ret = arena_bin_malloc_easy(arena, bin, run);
|
||||
ret = arena_run_reg_alloc(run, bin);
|
||||
else
|
||||
ret = arena_bin_malloc_hard(arena, bin);
|
||||
|
||||
@ -1522,6 +1266,69 @@ arena_salloc(const void *ptr)
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
static inline unsigned
|
||||
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
||||
size_t size)
|
||||
{
|
||||
unsigned shift, diff, regind;
|
||||
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
|
||||
/*
|
||||
* Avoid doing division with a variable divisor if possible. Using
|
||||
* actual division here can reduce allocator throughput by over 20%!
|
||||
*/
|
||||
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
||||
|
||||
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
||||
shift = ffs(size) - 1;
|
||||
diff >>= shift;
|
||||
size >>= shift;
|
||||
|
||||
if (size == 1) {
|
||||
/* The divisor was a power of 2. */
|
||||
regind = diff;
|
||||
} else {
|
||||
/*
|
||||
* To divide by a number D that is not a power of two we
|
||||
* multiply by (2^21 / D) and then right shift by 21 positions.
|
||||
*
|
||||
* X / D
|
||||
*
|
||||
* becomes
|
||||
*
|
||||
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
||||
*
|
||||
* We can omit the first three elements, because we never
|
||||
* divide by 0, and 1 and 2 are both powers of two, which are
|
||||
* handled above.
|
||||
*/
|
||||
#define SIZE_INV_SHIFT 21
|
||||
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
||||
static const unsigned size_invs[] = {
|
||||
SIZE_INV(3),
|
||||
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
||||
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
||||
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
||||
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
||||
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
||||
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
||||
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
||||
};
|
||||
|
||||
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
||||
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
||||
else
|
||||
regind = diff / size;
|
||||
#undef SIZE_INV
|
||||
#undef SIZE_INV_SHIFT
|
||||
}
|
||||
assert(diff == regind * size);
|
||||
assert(regind < bin->nregs);
|
||||
|
||||
return (regind);
|
||||
}
|
||||
|
||||
prof_thr_cnt_t *
|
||||
arena_prof_cnt_get(const void *ptr)
|
||||
{
|
||||
@ -1589,7 +1396,7 @@ static void
|
||||
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
||||
arena_bin_t *bin)
|
||||
{
|
||||
size_t run_ind;
|
||||
size_t run_ind, past;
|
||||
|
||||
/* Deallocate run. */
|
||||
if (run == bin->runcur)
|
||||
@ -1606,17 +1413,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
||||
*/
|
||||
arena_run_tree_remove(&bin->runs, run_mapelm);
|
||||
}
|
||||
/*
|
||||
* Mark the first page as dirty. The dirty bit for every other page in
|
||||
* the run is already properly set, which means we can call
|
||||
* arena_run_dalloc(..., false), thus potentially avoiding the needless
|
||||
* creation of many dirty pages.
|
||||
*/
|
||||
/* Mark all pages that were ever used for allocations as dirty. */
|
||||
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U -
|
||||
(uintptr_t)chunk) >> PAGE_SHIFT) + 1;
|
||||
chunk->ndirty += past - run_ind;
|
||||
arena->ndirty += past - run_ind;
|
||||
for (; run_ind < past; run_ind++) {
|
||||
assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
|
||||
chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
|
||||
chunk->ndirty++;
|
||||
arena->ndirty++;
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_DEBUG
|
||||
run->magic = 0;
|
||||
@ -1643,7 +1449,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
||||
size_t pageind;
|
||||
arena_run_t *run;
|
||||
arena_bin_t *bin;
|
||||
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
|
||||
size_t size;
|
||||
#endif
|
||||
|
||||
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
|
||||
@ -1651,15 +1459,16 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
||||
PAGE_SHIFT));
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
bin = run->bin;
|
||||
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
|
||||
size = bin->reg_size;
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_FILL
|
||||
if (opt_junk)
|
||||
memset(ptr, 0x5a, size);
|
||||
#endif
|
||||
|
||||
arena_run_reg_dalloc(run, bin, ptr, size);
|
||||
run->nfree++;
|
||||
arena_run_reg_dalloc(run, ptr);
|
||||
|
||||
if (run->nfree == bin->nregs)
|
||||
arena_dalloc_bin_run(arena, chunk, run, bin);
|
||||
|
Loading…
Reference in New Issue
Block a user