Separate arena_avail trees
Separate run trees by index, replacing the previous quantize logic. Quantization by index is now performed only on insertion / removal from the tree, and not on node comparison, saving some cpu. This also means we don't have to dereference the miscelm* pointers, saving half of the memory loads from miscelms/mapbits that have fallen out of cache. A linear scan of the indicies appears to be fast enough. The only cost of this is an extra tree array in each arena.
This commit is contained in:
parent
2b1fc90b7b
commit
3417a304cc
@ -351,12 +351,6 @@ struct arena_s {
|
|||||||
*/
|
*/
|
||||||
size_t ndirty;
|
size_t ndirty;
|
||||||
|
|
||||||
/*
|
|
||||||
* Size/address-ordered tree of this arena's available runs. The tree
|
|
||||||
* is used for first-best-fit run allocation.
|
|
||||||
*/
|
|
||||||
arena_avail_tree_t runs_avail;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unused dirty memory this arena manages. Dirty memory is conceptually
|
* Unused dirty memory this arena manages. Dirty memory is conceptually
|
||||||
* tracked as an arbitrarily interleaved LRU of dirty runs and cached
|
* tracked as an arbitrarily interleaved LRU of dirty runs and cached
|
||||||
@ -462,6 +456,12 @@ struct arena_s {
|
|||||||
|
|
||||||
/* bins is used to store trees of free regions. */
|
/* bins is used to store trees of free regions. */
|
||||||
arena_bin_t bins[NBINS];
|
arena_bin_t bins[NBINS];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Quantized address-ordered trees of this arena's available runs. The
|
||||||
|
* trees are used for first-best-fit run allocation.
|
||||||
|
*/
|
||||||
|
arena_avail_tree_t runs_avail[1]; /* Dynamically sized. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Used in conjunction with tsd for fast arena-related context lookup. */
|
/* Used in conjunction with tsd for fast arena-related context lookup. */
|
||||||
|
142
src/arena.c
142
src/arena.c
@ -28,6 +28,8 @@ static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */
|
|||||||
static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
|
static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
|
||||||
unsigned nlclasses; /* Number of large size classes. */
|
unsigned nlclasses; /* Number of large size classes. */
|
||||||
unsigned nhclasses; /* Number of huge size classes. */
|
unsigned nhclasses; /* Number of huge size classes. */
|
||||||
|
static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */
|
||||||
|
static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/*
|
/*
|
||||||
@ -45,42 +47,12 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
|
|||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
#define CHUNK_MAP_KEY ((uintptr_t)0x1U)
|
|
||||||
|
|
||||||
JEMALLOC_INLINE_C arena_chunk_map_misc_t *
|
|
||||||
arena_miscelm_key_create(size_t size)
|
|
||||||
{
|
|
||||||
|
|
||||||
return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
|
|
||||||
CHUNK_MAP_KEY));
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_INLINE_C bool
|
|
||||||
arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm)
|
|
||||||
{
|
|
||||||
|
|
||||||
return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef CHUNK_MAP_KEY
|
|
||||||
|
|
||||||
JEMALLOC_INLINE_C size_t
|
|
||||||
arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
|
|
||||||
{
|
|
||||||
|
|
||||||
assert(arena_miscelm_is_key(miscelm));
|
|
||||||
|
|
||||||
return (arena_mapbits_size_decode((uintptr_t)miscelm));
|
|
||||||
}
|
|
||||||
|
|
||||||
JEMALLOC_INLINE_C size_t
|
JEMALLOC_INLINE_C size_t
|
||||||
arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
|
arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
|
||||||
{
|
{
|
||||||
arena_chunk_t *chunk;
|
arena_chunk_t *chunk;
|
||||||
size_t pageind, mapbits;
|
size_t pageind, mapbits;
|
||||||
|
|
||||||
assert(!arena_miscelm_is_key(miscelm));
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
|
||||||
pageind = arena_miscelm_to_pageind(miscelm);
|
pageind = arena_miscelm_to_pageind(miscelm);
|
||||||
mapbits = arena_mapbits_get(chunk, pageind);
|
mapbits = arena_mapbits_get(chunk, pageind);
|
||||||
@ -88,7 +60,8 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
JEMALLOC_INLINE_C int
|
JEMALLOC_INLINE_C int
|
||||||
arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
|
arena_run_addr_comp(const arena_chunk_map_misc_t *a,
|
||||||
|
const arena_chunk_map_misc_t *b)
|
||||||
{
|
{
|
||||||
uintptr_t a_miscelm = (uintptr_t)a;
|
uintptr_t a_miscelm = (uintptr_t)a;
|
||||||
uintptr_t b_miscelm = (uintptr_t)b;
|
uintptr_t b_miscelm = (uintptr_t)b;
|
||||||
@ -101,7 +74,7 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
|
|||||||
|
|
||||||
/* Generate red-black tree functions. */
|
/* Generate red-black tree functions. */
|
||||||
rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
|
rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
|
||||||
rb_link, arena_run_comp)
|
rb_link, arena_run_addr_comp)
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
run_quantize_floor_compute(size_t size)
|
run_quantize_floor_compute(size_t size)
|
||||||
@ -226,61 +199,42 @@ run_quantize_ceil(size_t size)
|
|||||||
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
|
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
JEMALLOC_INLINE_C int
|
|
||||||
arena_avail_comp(const arena_chunk_map_misc_t *a,
|
|
||||||
const arena_chunk_map_misc_t *b)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
uintptr_t a_miscelm = (uintptr_t)a;
|
|
||||||
size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ?
|
|
||||||
arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
|
|
||||||
size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compare based on quantized size rather than size, in order to sort
|
|
||||||
* equally useful runs only by address.
|
|
||||||
*/
|
|
||||||
ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
|
|
||||||
if (ret == 0) {
|
|
||||||
if (!arena_miscelm_is_key(a)) {
|
|
||||||
uintptr_t b_miscelm = (uintptr_t)b;
|
|
||||||
|
|
||||||
ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Treat keys as if they are lower than anything else.
|
|
||||||
*/
|
|
||||||
ret = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Generate red-black tree functions. */
|
/* Generate red-black tree functions. */
|
||||||
rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
|
rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
|
||||||
arena_chunk_map_misc_t, rb_link, arena_avail_comp)
|
arena_chunk_map_misc_t, rb_link, arena_run_addr_comp)
|
||||||
|
|
||||||
|
static arena_avail_tree_t *
|
||||||
|
arena_runs_avail_get(arena_t *arena, szind_t ind)
|
||||||
|
{
|
||||||
|
|
||||||
|
assert(ind >= runs_avail_bias);
|
||||||
|
assert(ind - runs_avail_bias < runs_avail_nclasses);
|
||||||
|
|
||||||
|
return (&arena->runs_avail[ind - runs_avail_bias]);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||||
size_t npages)
|
size_t npages)
|
||||||
{
|
{
|
||||||
|
szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
|
||||||
|
arena_miscelm_get(chunk, pageind))));
|
||||||
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
||||||
LG_PAGE));
|
LG_PAGE));
|
||||||
arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
|
arena_avail_tree_insert(arena_runs_avail_get(arena, ind),
|
||||||
pageind));
|
arena_miscelm_get(chunk, pageind));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||||
size_t npages)
|
size_t npages)
|
||||||
{
|
{
|
||||||
|
szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
|
||||||
|
arena_miscelm_get(chunk, pageind))));
|
||||||
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
||||||
LG_PAGE));
|
LG_PAGE));
|
||||||
arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
|
arena_avail_tree_remove(arena_runs_avail_get(arena, ind),
|
||||||
pageind));
|
arena_miscelm_get(chunk, pageind));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -770,7 +724,6 @@ arena_chunk_alloc(arena_t *arena)
|
|||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Insert the run into the runs_avail tree. */
|
|
||||||
arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
|
arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
|
||||||
|
|
||||||
return (chunk);
|
return (chunk);
|
||||||
@ -791,10 +744,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
|
|||||||
assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
|
assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
|
||||||
arena_mapbits_decommitted_get(chunk, chunk_npages-1));
|
arena_mapbits_decommitted_get(chunk, chunk_npages-1));
|
||||||
|
|
||||||
/*
|
/* Remove run from runs_avail, so that the arena does not use it. */
|
||||||
* Remove run from the runs_avail tree, so that the arena does not use
|
|
||||||
* it.
|
|
||||||
*/
|
|
||||||
arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
|
arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
|
||||||
|
|
||||||
if (arena->spare != NULL) {
|
if (arena->spare != NULL) {
|
||||||
@ -1124,19 +1074,23 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Do first-best-fit run selection, i.e. select the lowest run that best fits.
|
* Do first-best-fit run selection, i.e. select the lowest run that best fits.
|
||||||
* Run sizes are quantized, so not all candidate runs are necessarily exactly
|
* Run sizes are indexed, so not all candidate runs are necessarily exactly the
|
||||||
* the same size.
|
* same size.
|
||||||
*/
|
*/
|
||||||
static arena_run_t *
|
static arena_run_t *
|
||||||
arena_run_first_best_fit(arena_t *arena, size_t size)
|
arena_run_first_best_fit(arena_t *arena, size_t size)
|
||||||
{
|
{
|
||||||
size_t search_size = run_quantize_ceil(size);
|
szind_t ind, i;
|
||||||
arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
|
|
||||||
arena_chunk_map_misc_t *miscelm =
|
ind = size2index(run_quantize_ceil(size));
|
||||||
arena_avail_tree_nsearch(&arena->runs_avail, key);
|
for (i = ind; i < runs_avail_nclasses; i++) {
|
||||||
if (miscelm == NULL)
|
arena_chunk_map_misc_t *miscelm = arena_avail_tree_first(
|
||||||
return (NULL);
|
arena_runs_avail_get(arena, i));
|
||||||
return (&miscelm->run);
|
if (miscelm != NULL)
|
||||||
|
return (&miscelm->run);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static arena_run_t *
|
static arena_run_t *
|
||||||
@ -3315,19 +3269,23 @@ arena_t *
|
|||||||
arena_new(unsigned ind)
|
arena_new(unsigned ind)
|
||||||
{
|
{
|
||||||
arena_t *arena;
|
arena_t *arena;
|
||||||
|
size_t arena_size;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
arena_bin_t *bin;
|
arena_bin_t *bin;
|
||||||
|
|
||||||
|
/* Compute arena size to incorporate sufficient runs_avail elements. */
|
||||||
|
arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t)
|
||||||
|
* (runs_avail_nclasses - 1));
|
||||||
/*
|
/*
|
||||||
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
|
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
|
||||||
* because there is no way to clean up if base_alloc() OOMs.
|
* because there is no way to clean up if base_alloc() OOMs.
|
||||||
*/
|
*/
|
||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
|
arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
|
||||||
+ QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
|
QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
|
||||||
nhclasses) * sizeof(malloc_huge_stats_t));
|
nhclasses) * sizeof(malloc_huge_stats_t));
|
||||||
} else
|
} else
|
||||||
arena = (arena_t *)base_alloc(sizeof(arena_t));
|
arena = (arena_t *)base_alloc(arena_size);
|
||||||
if (arena == NULL)
|
if (arena == NULL)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
|
|
||||||
@ -3339,11 +3297,11 @@ arena_new(unsigned ind)
|
|||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
memset(&arena->stats, 0, sizeof(arena_stats_t));
|
memset(&arena->stats, 0, sizeof(arena_stats_t));
|
||||||
arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
|
arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
|
||||||
+ CACHELINE_CEILING(sizeof(arena_t)));
|
+ CACHELINE_CEILING(arena_size));
|
||||||
memset(arena->stats.lstats, 0, nlclasses *
|
memset(arena->stats.lstats, 0, nlclasses *
|
||||||
sizeof(malloc_large_stats_t));
|
sizeof(malloc_large_stats_t));
|
||||||
arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
|
arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
|
||||||
+ CACHELINE_CEILING(sizeof(arena_t)) +
|
+ CACHELINE_CEILING(arena_size) +
|
||||||
QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
|
QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
|
||||||
memset(arena->stats.hstats, 0, nhclasses *
|
memset(arena->stats.hstats, 0, nhclasses *
|
||||||
sizeof(malloc_huge_stats_t));
|
sizeof(malloc_huge_stats_t));
|
||||||
@ -3375,7 +3333,8 @@ arena_new(unsigned ind)
|
|||||||
arena->nactive = 0;
|
arena->nactive = 0;
|
||||||
arena->ndirty = 0;
|
arena->ndirty = 0;
|
||||||
|
|
||||||
arena_avail_tree_new(&arena->runs_avail);
|
for(i = 0; i < runs_avail_nclasses; i++)
|
||||||
|
arena_avail_tree_new(&arena->runs_avail[i]);
|
||||||
qr_new(&arena->runs_dirty, rd_link);
|
qr_new(&arena->runs_dirty, rd_link);
|
||||||
qr_new(&arena->chunks_cache, cc_link);
|
qr_new(&arena->chunks_cache, cc_link);
|
||||||
|
|
||||||
@ -3635,6 +3594,9 @@ arena_boot(void)
|
|||||||
if (run_quantize_init())
|
if (run_quantize_init())
|
||||||
return (true);
|
return (true);
|
||||||
|
|
||||||
|
runs_avail_bias = size2index(PAGE);
|
||||||
|
runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
|
||||||
|
|
||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user