Simplify small object allocation/deallocation.
Use chained run free lists instead of bitmaps to track free objects within small runs. Remove reference counting for small object run pages.
This commit is contained in:
parent
3fa9a2fad8
commit
1e0a636c11
@ -18,11 +18,7 @@
|
|||||||
|
|
||||||
#ifdef JEMALLOC_TINY
|
#ifdef JEMALLOC_TINY
|
||||||
/* Smallest size class to support. */
|
/* Smallest size class to support. */
|
||||||
# ifdef JEMALLOC_TCACHE
|
# define LG_TINY_MIN LG_SIZEOF_PTR
|
||||||
# define LG_TINY_MIN LG_SIZEOF_PTR
|
|
||||||
# else
|
|
||||||
# define LG_TINY_MIN 1
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -77,12 +73,6 @@
|
|||||||
#define RUN_MAX_OVRHD 0x0000003dU
|
#define RUN_MAX_OVRHD 0x0000003dU
|
||||||
#define RUN_MAX_OVRHD_RELAX 0x00001800U
|
#define RUN_MAX_OVRHD_RELAX 0x00001800U
|
||||||
|
|
||||||
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
|
|
||||||
#define RUN_MAX_SMALL \
|
|
||||||
(arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT)) \
|
|
||||||
? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE + \
|
|
||||||
PAGE_SHIFT)))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The minimum ratio of active:dirty pages per arena is computed as:
|
* The minimum ratio of active:dirty pages per arena is computed as:
|
||||||
*
|
*
|
||||||
@ -130,7 +120,6 @@ struct arena_chunk_map_s {
|
|||||||
* Small/medium: Don't care.
|
* Small/medium: Don't care.
|
||||||
* Large: Run size for first page, unset for trailing pages.
|
* Large: Run size for first page, unset for trailing pages.
|
||||||
* - : Unused.
|
* - : Unused.
|
||||||
* c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
|
|
||||||
* d : dirty?
|
* d : dirty?
|
||||||
* z : zeroed?
|
* z : zeroed?
|
||||||
* l : large?
|
* l : large?
|
||||||
@ -150,9 +139,9 @@ struct arena_chunk_map_s {
|
|||||||
* ssssssss ssssssss ssss---- -----z--
|
* ssssssss ssssssss ssss---- -----z--
|
||||||
*
|
*
|
||||||
* Small/medium:
|
* Small/medium:
|
||||||
* pppppppp ppppcccc cccccccc cccc---a
|
* pppppppp pppppppp pppp---- -------a
|
||||||
* pppppppp ppppcccc cccccccc cccc---a
|
* pppppppp pppppppp pppp---- -------a
|
||||||
* pppppppp ppppcccc cccccccc cccc---a
|
* pppppppp pppppppp pppp---- -------a
|
||||||
*
|
*
|
||||||
* Large:
|
* Large:
|
||||||
* ssssssss ssssssss ssss---- ------la
|
* ssssssss ssssssss ssss---- ------la
|
||||||
@ -160,12 +149,9 @@ struct arena_chunk_map_s {
|
|||||||
* -------- -------- -------- ------la
|
* -------- -------- -------- ------la
|
||||||
*/
|
*/
|
||||||
size_t bits;
|
size_t bits;
|
||||||
#define CHUNK_MAP_PG_MASK ((size_t)0xfff00000U)
|
#define CHUNK_MAP_PG_MASK ((size_t)0xfffff000U)
|
||||||
#define CHUNK_MAP_PG_SHIFT 20
|
#define CHUNK_MAP_PG_SHIFT 12
|
||||||
#define CHUNK_MAP_LG_PG_RANGE 12
|
#define CHUNK_MAP_LG_PG_RANGE 20
|
||||||
|
|
||||||
#define CHUNK_MAP_RC_MASK ((size_t)0xffff0U)
|
|
||||||
#define CHUNK_MAP_RC_ONE ((size_t)0x00010U)
|
|
||||||
|
|
||||||
#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
|
#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
|
||||||
#define CHUNK_MAP_DIRTY ((size_t)0x8U)
|
#define CHUNK_MAP_DIRTY ((size_t)0x8U)
|
||||||
@ -209,14 +195,14 @@ struct arena_run_s {
|
|||||||
/* Bin this run is associated with. */
|
/* Bin this run is associated with. */
|
||||||
arena_bin_t *bin;
|
arena_bin_t *bin;
|
||||||
|
|
||||||
/* Index of first element that might have a free region. */
|
/* Stack of available freed regions, or NULL. */
|
||||||
unsigned regs_minelm;
|
void *avail;
|
||||||
|
|
||||||
|
/* Next region that has never been allocated, or run boundary. */
|
||||||
|
void *next;
|
||||||
|
|
||||||
/* Number of free regions in run. */
|
/* Number of free regions in run. */
|
||||||
unsigned nfree;
|
unsigned nfree;
|
||||||
|
|
||||||
/* Bitmask of in-use regions (0: in use, 1: free). */
|
|
||||||
unsigned regs_mask[1]; /* Dynamically sized. */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct arena_bin_s {
|
struct arena_bin_s {
|
||||||
@ -244,9 +230,6 @@ struct arena_bin_s {
|
|||||||
/* Total number of regions in a run for this bin's size class. */
|
/* Total number of regions in a run for this bin's size class. */
|
||||||
uint32_t nregs;
|
uint32_t nregs;
|
||||||
|
|
||||||
/* Number of elements in a run's regs_mask for this bin's size class. */
|
|
||||||
uint32_t regs_mask_nelms;
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
/*
|
/*
|
||||||
* Offset of first (prof_cnt_t *) in a run header for this bin's size
|
* Offset of first (prof_cnt_t *) in a run header for this bin's size
|
||||||
|
@ -55,21 +55,15 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
|||||||
#if (LG_QUANTUM == 4)
|
#if (LG_QUANTUM == 4)
|
||||||
/* 16-byte quantum **********************/
|
/* 16-byte quantum **********************/
|
||||||
# ifdef JEMALLOC_TINY
|
# ifdef JEMALLOC_TINY
|
||||||
# if (LG_TINY_MIN == 1)
|
# if (LG_TINY_MIN == 2)
|
||||||
S2B_2(0) /* 2 */
|
|
||||||
S2B_2(1) /* 4 */
|
|
||||||
S2B_4(2) /* 8 */
|
|
||||||
S2B_8(3) /* 16 */
|
|
||||||
# define S2B_QMIN 3
|
|
||||||
# elif (LG_TINY_MIN == 2)
|
|
||||||
S2B_4(0) /* 4 */
|
S2B_4(0) /* 4 */
|
||||||
S2B_4(1) /* 8 */
|
S2B_4(1) /* 8 */
|
||||||
S2B_8(2) /* 16 */
|
S2B_8(2) /* 16 */
|
||||||
# define S2B_QMIN 2
|
# define S2B_QMIN 2
|
||||||
# elif (LG_TINY_MIN == 3)
|
# elif (LG_TINY_MIN == 3)
|
||||||
S2B_8(0) /* 8 */
|
S2B_8(0) /* 8 */
|
||||||
S2B_8(1) /* 16 */
|
S2B_8(1) /* 16 */
|
||||||
# define S2B_QMIN 1
|
# define S2B_QMIN 1
|
||||||
# else
|
# else
|
||||||
# error "Unsupported LG_TINY_MIN"
|
# error "Unsupported LG_TINY_MIN"
|
||||||
# endif
|
# endif
|
||||||
@ -88,15 +82,10 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
|||||||
#else
|
#else
|
||||||
/* 8-byte quantum ***********************/
|
/* 8-byte quantum ***********************/
|
||||||
# ifdef JEMALLOC_TINY
|
# ifdef JEMALLOC_TINY
|
||||||
# if (LG_TINY_MIN == 1)
|
# if (LG_TINY_MIN == 2)
|
||||||
S2B_2(0) /* 2 */
|
|
||||||
S2B_2(1) /* 4 */
|
|
||||||
S2B_4(2) /* 8 */
|
|
||||||
# define S2B_QMIN 2
|
|
||||||
# elif (LG_TINY_MIN == 2)
|
|
||||||
S2B_4(0) /* 4 */
|
S2B_4(0) /* 4 */
|
||||||
S2B_4(1) /* 8 */
|
S2B_4(1) /* 8 */
|
||||||
# define S2B_QMIN 1
|
# define S2B_QMIN 1
|
||||||
# else
|
# else
|
||||||
# error "Unsupported LG_TINY_MIN"
|
# error "Unsupported LG_TINY_MIN"
|
||||||
# endif
|
# endif
|
||||||
@ -260,246 +249,48 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
|||||||
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
|
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
|
||||||
arena_chunk_map_t, link, arena_avail_comp)
|
arena_chunk_map_t, link, arena_avail_comp)
|
||||||
|
|
||||||
static inline void
|
|
||||||
arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
|
|
||||||
{
|
|
||||||
arena_chunk_t *chunk;
|
|
||||||
arena_t *arena;
|
|
||||||
size_t pagebeg, pageend, i;
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
||||||
arena = chunk->arena;
|
|
||||||
pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
|
||||||
pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
|
|
||||||
(uintptr_t)chunk) >> PAGE_SHIFT;
|
|
||||||
|
|
||||||
for (i = pagebeg; i <= pageend; i++) {
|
|
||||||
size_t mapbits = chunk->map[i].bits;
|
|
||||||
|
|
||||||
if (mapbits & CHUNK_MAP_DIRTY) {
|
|
||||||
assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
|
|
||||||
chunk->ndirty--;
|
|
||||||
arena->ndirty--;
|
|
||||||
mapbits ^= CHUNK_MAP_DIRTY;
|
|
||||||
}
|
|
||||||
assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
|
|
||||||
mapbits += CHUNK_MAP_RC_ONE;
|
|
||||||
chunk->map[i].bits = mapbits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
|
|
||||||
{
|
|
||||||
arena_chunk_t *chunk;
|
|
||||||
arena_t *arena;
|
|
||||||
size_t pagebeg, pageend, mapbits, i;
|
|
||||||
bool dirtier = false;
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
||||||
arena = chunk->arena;
|
|
||||||
pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
|
||||||
pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
|
|
||||||
(uintptr_t)chunk) >> PAGE_SHIFT;
|
|
||||||
|
|
||||||
/* First page. */
|
|
||||||
mapbits = chunk->map[pagebeg].bits;
|
|
||||||
mapbits -= CHUNK_MAP_RC_ONE;
|
|
||||||
if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
|
|
||||||
dirtier = true;
|
|
||||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
|
||||||
mapbits |= CHUNK_MAP_DIRTY;
|
|
||||||
chunk->ndirty++;
|
|
||||||
arena->ndirty++;
|
|
||||||
}
|
|
||||||
chunk->map[pagebeg].bits = mapbits;
|
|
||||||
|
|
||||||
if (pageend - pagebeg >= 1) {
|
|
||||||
/*
|
|
||||||
* Interior pages are completely consumed by the object being
|
|
||||||
* deallocated, which means that the pages can be
|
|
||||||
* unconditionally marked dirty.
|
|
||||||
*/
|
|
||||||
for (i = pagebeg + 1; i < pageend; i++) {
|
|
||||||
mapbits = chunk->map[i].bits;
|
|
||||||
mapbits -= CHUNK_MAP_RC_ONE;
|
|
||||||
assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
|
|
||||||
dirtier = true;
|
|
||||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
|
||||||
mapbits |= CHUNK_MAP_DIRTY;
|
|
||||||
chunk->ndirty++;
|
|
||||||
arena->ndirty++;
|
|
||||||
chunk->map[i].bits = mapbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Last page. */
|
|
||||||
mapbits = chunk->map[pageend].bits;
|
|
||||||
mapbits -= CHUNK_MAP_RC_ONE;
|
|
||||||
if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
|
|
||||||
dirtier = true;
|
|
||||||
assert((mapbits & CHUNK_MAP_DIRTY) == 0);
|
|
||||||
mapbits |= CHUNK_MAP_DIRTY;
|
|
||||||
chunk->ndirty++;
|
|
||||||
arena->ndirty++;
|
|
||||||
}
|
|
||||||
chunk->map[pageend].bits = mapbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dirtier) {
|
|
||||||
if (chunk->dirtied == false) {
|
|
||||||
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
|
|
||||||
chunk->dirtied = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Enforce opt_lg_dirty_mult. */
|
|
||||||
if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
|
|
||||||
(arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
|
|
||||||
arena_purge(arena);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void *
|
static inline void *
|
||||||
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
|
||||||
{
|
{
|
||||||
void *ret;
|
void *ret;
|
||||||
unsigned i, mask, bit, regind;
|
|
||||||
|
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
assert(run->magic == ARENA_RUN_MAGIC);
|
||||||
assert(run->regs_minelm < bin->regs_mask_nelms);
|
assert(run->nfree > 0);
|
||||||
|
|
||||||
/*
|
|
||||||
* Move the first check outside the loop, so that run->regs_minelm can
|
|
||||||
* be updated unconditionally, without the possibility of updating it
|
|
||||||
* multiple times.
|
|
||||||
*/
|
|
||||||
i = run->regs_minelm;
|
|
||||||
mask = run->regs_mask[i];
|
|
||||||
if (mask != 0) {
|
|
||||||
/* Usable allocation found. */
|
|
||||||
bit = ffs((int)mask) - 1;
|
|
||||||
|
|
||||||
regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
|
|
||||||
assert(regind < bin->nregs);
|
|
||||||
ret = (void *)(((uintptr_t)run) + bin->reg0_offset
|
|
||||||
+ (bin->reg_size * regind));
|
|
||||||
|
|
||||||
/* Clear bit. */
|
|
||||||
mask ^= (1U << bit);
|
|
||||||
run->regs_mask[i] = mask;
|
|
||||||
|
|
||||||
arena_run_rc_incr(run, bin, ret);
|
|
||||||
|
|
||||||
|
run->nfree--;
|
||||||
|
ret = run->avail;
|
||||||
|
if (ret != NULL) {
|
||||||
|
run->avail = *(void **)ret;
|
||||||
|
/* Double free can cause assertion failure.*/
|
||||||
|
assert(ret != NULL);
|
||||||
|
/* Write-after free can cause assertion failure. */
|
||||||
|
assert((uintptr_t)ret >= (uintptr_t)run +
|
||||||
|
(uintptr_t)bin->reg0_offset);
|
||||||
|
assert((uintptr_t)ret < (uintptr_t)run->next);
|
||||||
|
assert(((uintptr_t)ret - ((uintptr_t)run +
|
||||||
|
(uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size ==
|
||||||
|
0);
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
ret = run->next;
|
||||||
for (i++; i < bin->regs_mask_nelms; i++) {
|
run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size);
|
||||||
mask = run->regs_mask[i];
|
assert(ret != NULL);
|
||||||
if (mask != 0) {
|
return (ret);
|
||||||
/* Usable allocation found. */
|
|
||||||
bit = ffs((int)mask) - 1;
|
|
||||||
|
|
||||||
regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
|
|
||||||
assert(regind < bin->nregs);
|
|
||||||
ret = (void *)(((uintptr_t)run) + bin->reg0_offset
|
|
||||||
+ (bin->reg_size * regind));
|
|
||||||
|
|
||||||
/* Clear bit. */
|
|
||||||
mask ^= (1U << bit);
|
|
||||||
run->regs_mask[i] = mask;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Make a note that nothing before this element
|
|
||||||
* contains a free region.
|
|
||||||
*/
|
|
||||||
run->regs_minelm = i; /* Low payoff: + (mask == 0); */
|
|
||||||
|
|
||||||
arena_run_rc_incr(run, bin, ret);
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Not reached. */
|
|
||||||
assert(0);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned
|
|
||||||
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
|
||||||
size_t size)
|
|
||||||
{
|
|
||||||
unsigned shift, diff, regind;
|
|
||||||
|
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Avoid doing division with a variable divisor if possible. Using
|
|
||||||
* actual division here can reduce allocator throughput by over 20%!
|
|
||||||
*/
|
|
||||||
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
|
||||||
|
|
||||||
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
|
||||||
shift = ffs(size) - 1;
|
|
||||||
diff >>= shift;
|
|
||||||
size >>= shift;
|
|
||||||
|
|
||||||
if (size == 1) {
|
|
||||||
/* The divisor was a power of 2. */
|
|
||||||
regind = diff;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* To divide by a number D that is not a power of two we
|
|
||||||
* multiply by (2^21 / D) and then right shift by 21 positions.
|
|
||||||
*
|
|
||||||
* X / D
|
|
||||||
*
|
|
||||||
* becomes
|
|
||||||
*
|
|
||||||
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
|
||||||
*
|
|
||||||
* We can omit the first three elements, because we never
|
|
||||||
* divide by 0, and 1 and 2 are both powers of two, which are
|
|
||||||
* handled above.
|
|
||||||
*/
|
|
||||||
#define SIZE_INV_SHIFT 21
|
|
||||||
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
|
||||||
static const unsigned size_invs[] = {
|
|
||||||
SIZE_INV(3),
|
|
||||||
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
|
||||||
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
|
||||||
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
|
||||||
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
|
||||||
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
|
||||||
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
|
||||||
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
|
||||||
};
|
|
||||||
|
|
||||||
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
|
||||||
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
|
||||||
else
|
|
||||||
regind = diff / size;
|
|
||||||
#undef SIZE_INV
|
|
||||||
#undef SIZE_INV_SHIFT
|
|
||||||
}
|
|
||||||
assert(diff == regind * size);
|
|
||||||
assert(regind < bin->nregs);
|
|
||||||
|
|
||||||
return (regind);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
|
arena_run_reg_dalloc(arena_run_t *run, void *ptr)
|
||||||
{
|
{
|
||||||
unsigned regind, elm, bit;
|
|
||||||
|
|
||||||
regind = arena_run_regind(run, bin, ptr, size);
|
assert(run->nfree < run->bin->nregs);
|
||||||
elm = regind >> (LG_SIZEOF_INT + 3);
|
/* Freeing an interior pointer can cause assertion failure. */
|
||||||
if (elm < run->regs_minelm)
|
assert(((uintptr_t)ptr - ((uintptr_t)run +
|
||||||
run->regs_minelm = elm;
|
(uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size
|
||||||
bit = regind - (elm << (LG_SIZEOF_INT + 3));
|
== 0);
|
||||||
assert((run->regs_mask[elm] & (1U << bit)) == 0);
|
|
||||||
run->regs_mask[elm] |= (1U << bit);
|
|
||||||
|
|
||||||
arena_run_rc_decr(run, bin, ptr);
|
*(void **)ptr = run->avail;
|
||||||
|
run->avail = ptr;
|
||||||
|
run->nfree++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -571,12 +362,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
|
|||||||
* tries to operate on an interior pointer.
|
* tries to operate on an interior pointer.
|
||||||
*/
|
*/
|
||||||
chunk->map[run_ind].bits |= size;
|
chunk->map[run_ind].bits |= size;
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Initialize the first page's refcount to 1, so that the run
|
|
||||||
* header is protected from dirty page purging.
|
|
||||||
*/
|
|
||||||
chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -960,7 +745,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
|||||||
{
|
{
|
||||||
arena_chunk_map_t *mapelm;
|
arena_chunk_map_t *mapelm;
|
||||||
arena_run_t *run;
|
arena_run_t *run;
|
||||||
unsigned i, remainder;
|
|
||||||
|
|
||||||
/* Look for a usable run. */
|
/* Look for a usable run. */
|
||||||
mapelm = arena_run_tree_first(&bin->runs);
|
mapelm = arena_run_tree_first(&bin->runs);
|
||||||
@ -991,20 +775,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
|||||||
|
|
||||||
/* Initialize run internals. */
|
/* Initialize run internals. */
|
||||||
run->bin = bin;
|
run->bin = bin;
|
||||||
|
run->avail = NULL;
|
||||||
for (i = 0; i < bin->regs_mask_nelms - 1; i++)
|
run->next = (void *)(((uintptr_t)run) + (uintptr_t)bin->reg0_offset);
|
||||||
run->regs_mask[i] = UINT_MAX;
|
|
||||||
remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
|
|
||||||
if (remainder == 0)
|
|
||||||
run->regs_mask[i] = UINT_MAX;
|
|
||||||
else {
|
|
||||||
/* The last element has spare bits that need to be unset. */
|
|
||||||
run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
|
|
||||||
- remainder));
|
|
||||||
}
|
|
||||||
|
|
||||||
run->regs_minelm = 0;
|
|
||||||
|
|
||||||
run->nfree = bin->nregs;
|
run->nfree = bin->nregs;
|
||||||
#ifdef JEMALLOC_DEBUG
|
#ifdef JEMALLOC_DEBUG
|
||||||
run->magic = ARENA_RUN_MAGIC;
|
run->magic = ARENA_RUN_MAGIC;
|
||||||
@ -1019,23 +791,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
|
|||||||
return (run);
|
return (run);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bin->runcur must have space available before this function is called. */
|
/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
|
||||||
static inline void *
|
|
||||||
arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
|
|
||||||
{
|
|
||||||
void *ret;
|
|
||||||
|
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
|
||||||
assert(run->nfree > 0);
|
|
||||||
|
|
||||||
ret = arena_run_reg_alloc(run, bin);
|
|
||||||
assert(ret != NULL);
|
|
||||||
run->nfree--;
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
|
|
||||||
static void *
|
static void *
|
||||||
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
||||||
{
|
{
|
||||||
@ -1046,7 +802,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
|
|||||||
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
|
assert(bin->runcur->magic == ARENA_RUN_MAGIC);
|
||||||
assert(bin->runcur->nfree > 0);
|
assert(bin->runcur->nfree > 0);
|
||||||
|
|
||||||
return (arena_bin_malloc_easy(arena, bin, bin->runcur));
|
return (arena_run_reg_alloc(bin->runcur, bin));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
@ -1071,7 +827,7 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
|
|||||||
#endif
|
#endif
|
||||||
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
|
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
|
||||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||||
ptr = arena_bin_malloc_easy(arena, bin, run);
|
ptr = arena_run_reg_alloc(run, bin);
|
||||||
else
|
else
|
||||||
ptr = arena_bin_malloc_hard(arena, bin);
|
ptr = arena_bin_malloc_hard(arena, bin);
|
||||||
if (ptr == NULL)
|
if (ptr == NULL)
|
||||||
@ -1120,19 +876,17 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
|
|||||||
*
|
*
|
||||||
* *) bin->run_size >= min_run_size
|
* *) bin->run_size >= min_run_size
|
||||||
* *) bin->run_size <= arena_maxclass
|
* *) bin->run_size <= arena_maxclass
|
||||||
* *) bin->run_size <= RUN_MAX_SMALL
|
|
||||||
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
|
* *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
|
||||||
* *) run header size < PAGE_SIZE
|
* *) run header size < PAGE_SIZE
|
||||||
*
|
*
|
||||||
* bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
|
* bin->nregs and bin->reg0_offset are also calculated here, since these
|
||||||
* also calculated here, since these settings are all interdependent.
|
* settings are all interdependent.
|
||||||
*/
|
*/
|
||||||
static size_t
|
static size_t
|
||||||
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||||
{
|
{
|
||||||
size_t try_run_size, good_run_size;
|
size_t try_run_size, good_run_size;
|
||||||
uint32_t try_nregs, good_nregs;
|
uint32_t try_nregs, good_nregs;
|
||||||
uint32_t try_mask_nelms, good_mask_nelms;
|
|
||||||
uint32_t try_hdr_size, good_hdr_size;
|
uint32_t try_hdr_size, good_hdr_size;
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
uint32_t try_cnt0_offset, good_cnt0_offset;
|
uint32_t try_cnt0_offset, good_cnt0_offset;
|
||||||
@ -1141,7 +895,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|||||||
|
|
||||||
assert(min_run_size >= PAGE_SIZE);
|
assert(min_run_size >= PAGE_SIZE);
|
||||||
assert(min_run_size <= arena_maxclass);
|
assert(min_run_size <= arena_maxclass);
|
||||||
assert(min_run_size <= RUN_MAX_SMALL);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate known-valid settings before entering the run_size
|
* Calculate known-valid settings before entering the run_size
|
||||||
@ -1158,10 +911,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|||||||
+ 1; /* Counter-act try_nregs-- in loop. */
|
+ 1; /* Counter-act try_nregs-- in loop. */
|
||||||
do {
|
do {
|
||||||
try_nregs--;
|
try_nregs--;
|
||||||
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
|
try_hdr_size = sizeof(arena_run_t);
|
||||||
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
|
|
||||||
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
|
|
||||||
(try_mask_nelms - 1));
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof) {
|
if (opt_prof) {
|
||||||
/* Pad to a quantum boundary. */
|
/* Pad to a quantum boundary. */
|
||||||
@ -1182,7 +932,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|||||||
*/
|
*/
|
||||||
good_run_size = try_run_size;
|
good_run_size = try_run_size;
|
||||||
good_nregs = try_nregs;
|
good_nregs = try_nregs;
|
||||||
good_mask_nelms = try_mask_nelms;
|
|
||||||
good_hdr_size = try_hdr_size;
|
good_hdr_size = try_hdr_size;
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
good_cnt0_offset = try_cnt0_offset;
|
good_cnt0_offset = try_cnt0_offset;
|
||||||
@ -1195,11 +944,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|||||||
bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
|
bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
|
||||||
do {
|
do {
|
||||||
try_nregs--;
|
try_nregs--;
|
||||||
try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
|
try_hdr_size = sizeof(arena_run_t);
|
||||||
((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
|
|
||||||
1 : 0);
|
|
||||||
try_hdr_size = sizeof(arena_run_t) + (sizeof(unsigned) *
|
|
||||||
(try_mask_nelms - 1));
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
if (opt_prof) {
|
if (opt_prof) {
|
||||||
/* Pad to a quantum boundary. */
|
/* Pad to a quantum boundary. */
|
||||||
@ -1216,18 +961,17 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
|||||||
try_reg0_offset = try_run_size - (try_nregs *
|
try_reg0_offset = try_run_size - (try_nregs *
|
||||||
bin->reg_size);
|
bin->reg_size);
|
||||||
} while (try_hdr_size > try_reg0_offset);
|
} while (try_hdr_size > try_reg0_offset);
|
||||||
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
|
} while (try_run_size <= arena_maxclass
|
||||||
|
&& try_run_size <= arena_maxclass
|
||||||
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
|
&& RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
|
||||||
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
|
&& (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
|
||||||
&& try_hdr_size < PAGE_SIZE);
|
&& try_hdr_size < PAGE_SIZE);
|
||||||
|
|
||||||
assert(good_hdr_size <= good_reg0_offset);
|
assert(good_hdr_size <= good_reg0_offset);
|
||||||
assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
|
|
||||||
|
|
||||||
/* Copy final settings. */
|
/* Copy final settings. */
|
||||||
bin->run_size = good_run_size;
|
bin->run_size = good_run_size;
|
||||||
bin->nregs = good_nregs;
|
bin->nregs = good_nregs;
|
||||||
bin->regs_mask_nelms = good_mask_nelms;
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
bin->cnt0_offset = good_cnt0_offset;
|
bin->cnt0_offset = good_cnt0_offset;
|
||||||
#endif
|
#endif
|
||||||
@ -1251,7 +995,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
|
|||||||
|
|
||||||
malloc_mutex_lock(&arena->lock);
|
malloc_mutex_lock(&arena->lock);
|
||||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||||
ret = arena_bin_malloc_easy(arena, bin, run);
|
ret = arena_run_reg_alloc(run, bin);
|
||||||
else
|
else
|
||||||
ret = arena_bin_malloc_hard(arena, bin);
|
ret = arena_bin_malloc_hard(arena, bin);
|
||||||
|
|
||||||
@ -1306,7 +1050,7 @@ arena_malloc_medium(arena_t *arena, size_t size, bool zero)
|
|||||||
|
|
||||||
malloc_mutex_lock(&arena->lock);
|
malloc_mutex_lock(&arena->lock);
|
||||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||||
ret = arena_bin_malloc_easy(arena, bin, run);
|
ret = arena_run_reg_alloc(run, bin);
|
||||||
else
|
else
|
||||||
ret = arena_bin_malloc_hard(arena, bin);
|
ret = arena_bin_malloc_hard(arena, bin);
|
||||||
|
|
||||||
@ -1522,6 +1266,69 @@ arena_salloc(const void *ptr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
|
static inline unsigned
|
||||||
|
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
unsigned shift, diff, regind;
|
||||||
|
|
||||||
|
assert(run->magic == ARENA_RUN_MAGIC);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid doing division with a variable divisor if possible. Using
|
||||||
|
* actual division here can reduce allocator throughput by over 20%!
|
||||||
|
*/
|
||||||
|
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
||||||
|
|
||||||
|
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
||||||
|
shift = ffs(size) - 1;
|
||||||
|
diff >>= shift;
|
||||||
|
size >>= shift;
|
||||||
|
|
||||||
|
if (size == 1) {
|
||||||
|
/* The divisor was a power of 2. */
|
||||||
|
regind = diff;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* To divide by a number D that is not a power of two we
|
||||||
|
* multiply by (2^21 / D) and then right shift by 21 positions.
|
||||||
|
*
|
||||||
|
* X / D
|
||||||
|
*
|
||||||
|
* becomes
|
||||||
|
*
|
||||||
|
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
||||||
|
*
|
||||||
|
* We can omit the first three elements, because we never
|
||||||
|
* divide by 0, and 1 and 2 are both powers of two, which are
|
||||||
|
* handled above.
|
||||||
|
*/
|
||||||
|
#define SIZE_INV_SHIFT 21
|
||||||
|
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
||||||
|
static const unsigned size_invs[] = {
|
||||||
|
SIZE_INV(3),
|
||||||
|
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
||||||
|
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
||||||
|
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
||||||
|
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
||||||
|
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
||||||
|
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
||||||
|
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
||||||
|
};
|
||||||
|
|
||||||
|
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
||||||
|
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
||||||
|
else
|
||||||
|
regind = diff / size;
|
||||||
|
#undef SIZE_INV
|
||||||
|
#undef SIZE_INV_SHIFT
|
||||||
|
}
|
||||||
|
assert(diff == regind * size);
|
||||||
|
assert(regind < bin->nregs);
|
||||||
|
|
||||||
|
return (regind);
|
||||||
|
}
|
||||||
|
|
||||||
prof_thr_cnt_t *
|
prof_thr_cnt_t *
|
||||||
arena_prof_cnt_get(const void *ptr)
|
arena_prof_cnt_get(const void *ptr)
|
||||||
{
|
{
|
||||||
@ -1589,7 +1396,7 @@ static void
|
|||||||
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
||||||
arena_bin_t *bin)
|
arena_bin_t *bin)
|
||||||
{
|
{
|
||||||
size_t run_ind;
|
size_t run_ind, past;
|
||||||
|
|
||||||
/* Deallocate run. */
|
/* Deallocate run. */
|
||||||
if (run == bin->runcur)
|
if (run == bin->runcur)
|
||||||
@ -1606,17 +1413,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
|
|||||||
*/
|
*/
|
||||||
arena_run_tree_remove(&bin->runs, run_mapelm);
|
arena_run_tree_remove(&bin->runs, run_mapelm);
|
||||||
}
|
}
|
||||||
/*
|
/* Mark all pages that were ever used for allocations as dirty. */
|
||||||
* Mark the first page as dirty. The dirty bit for every other page in
|
|
||||||
* the run is already properly set, which means we can call
|
|
||||||
* arena_run_dalloc(..., false), thus potentially avoiding the needless
|
|
||||||
* creation of many dirty pages.
|
|
||||||
*/
|
|
||||||
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
|
run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||||
assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
|
past = (size_t)(((uintptr_t)run->next - (uintptr_t)1U -
|
||||||
chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
|
(uintptr_t)chunk) >> PAGE_SHIFT) + 1;
|
||||||
chunk->ndirty++;
|
chunk->ndirty += past - run_ind;
|
||||||
arena->ndirty++;
|
arena->ndirty += past - run_ind;
|
||||||
|
for (; run_ind < past; run_ind++) {
|
||||||
|
assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
|
||||||
|
chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_DEBUG
|
#ifdef JEMALLOC_DEBUG
|
||||||
run->magic = 0;
|
run->magic = 0;
|
||||||
@ -1643,7 +1449,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
|||||||
size_t pageind;
|
size_t pageind;
|
||||||
arena_run_t *run;
|
arena_run_t *run;
|
||||||
arena_bin_t *bin;
|
arena_bin_t *bin;
|
||||||
|
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
|
||||||
size_t size;
|
size_t size;
|
||||||
|
#endif
|
||||||
|
|
||||||
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||||
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
|
run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
|
||||||
@ -1651,15 +1459,16 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
|||||||
PAGE_SHIFT));
|
PAGE_SHIFT));
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
assert(run->magic == ARENA_RUN_MAGIC);
|
||||||
bin = run->bin;
|
bin = run->bin;
|
||||||
|
#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
|
||||||
size = bin->reg_size;
|
size = bin->reg_size;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef JEMALLOC_FILL
|
#ifdef JEMALLOC_FILL
|
||||||
if (opt_junk)
|
if (opt_junk)
|
||||||
memset(ptr, 0x5a, size);
|
memset(ptr, 0x5a, size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
arena_run_reg_dalloc(run, bin, ptr, size);
|
arena_run_reg_dalloc(run, ptr);
|
||||||
run->nfree++;
|
|
||||||
|
|
||||||
if (run->nfree == bin->nregs)
|
if (run->nfree == bin->nregs)
|
||||||
arena_dalloc_bin_run(arena, chunk, run, bin);
|
arena_dalloc_bin_run(arena, chunk, run, bin);
|
||||||
|
Loading…
Reference in New Issue
Block a user