Use pairing heap for arena->runs_avail
Use pairing heap instead of red black tree in arena runs_avail. The extra links are unioned with the bitmap_t, so this change doesn't use any extra memory. Canaries show this change to be a 1% cpu win, and 2% latency win. In particular, large free()s, and small bin frees are now O(1) (barring coalescing). I also tested changing bin->runs to be a pairing heap, but saw a much smaller win, and it would mean increasing the size of arena_run_s by two pointers, so I left that as an rb-tree for now.
This commit is contained in:
parent
f8d80d62a8
commit
4a0dbb5ac8
@ -36,6 +36,7 @@ typedef enum {
|
||||
#define DECAY_NTICKS_PER_UPDATE 1000
|
||||
|
||||
typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
|
||||
typedef struct arena_avail_links_s arena_avail_links_t;
|
||||
typedef struct arena_run_s arena_run_t;
|
||||
typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
|
||||
typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
|
||||
@ -146,6 +147,11 @@ struct arena_runs_dirty_link_s {
|
||||
qr(arena_runs_dirty_link_t) rd_link;
|
||||
};
|
||||
|
||||
struct arena_avail_links_s {
|
||||
arena_runs_dirty_link_t rd;
|
||||
ph_node_t ph_link;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
|
||||
* like arena_chunk_map_bits_t. Two separate arrays are stored within each
|
||||
@ -163,7 +169,7 @@ struct arena_chunk_map_misc_s {
|
||||
|
||||
union {
|
||||
/* Linkage for list of dirty runs. */
|
||||
arena_runs_dirty_link_t rd;
|
||||
arena_avail_links_t avail;
|
||||
|
||||
/* Profile counters, used for large object runs. */
|
||||
union {
|
||||
@ -457,10 +463,10 @@ struct arena_s {
|
||||
arena_bin_t bins[NBINS];
|
||||
|
||||
/*
|
||||
* Quantized address-ordered trees of this arena's available runs. The
|
||||
* trees are used for first-best-fit run allocation.
|
||||
* Quantized address-ordered heaps of this arena's available runs. The
|
||||
* heaps are used for first-best-fit run allocation.
|
||||
*/
|
||||
arena_run_tree_t runs_avail[1]; /* Dynamically sized. */
|
||||
ph_heap_t runs_avail[1]; /* Dynamically sized. */
|
||||
};
|
||||
|
||||
/* Used in conjunction with tsd for fast arena-related context lookup. */
|
||||
@ -600,6 +606,7 @@ arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk,
|
||||
size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
|
||||
void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
|
||||
arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
|
||||
arena_chunk_map_misc_t *arena_ph_to_miscelm(ph_node_t *ph);
|
||||
arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run);
|
||||
size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
|
||||
size_t arena_mapbitsp_read(size_t *mapbitsp);
|
||||
@ -702,7 +709,19 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
|
||||
arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
|
||||
{
|
||||
arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
|
||||
*)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
|
||||
*)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
|
||||
|
||||
assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
|
||||
assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
|
||||
|
||||
return (miscelm);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
|
||||
arena_ph_to_miscelm(ph_node_t *ph)
|
||||
{
|
||||
arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
|
||||
((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
|
||||
|
||||
assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
|
||||
assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
|
||||
|
@ -79,6 +79,7 @@ arena_nthreads_dec
|
||||
arena_nthreads_get
|
||||
arena_nthreads_inc
|
||||
arena_palloc
|
||||
arena_ph_to_miscelm
|
||||
arena_postfork_child
|
||||
arena_postfork_parent
|
||||
arena_prefork
|
||||
|
28
src/arena.c
28
src/arena.c
@ -199,7 +199,7 @@ run_quantize_ceil(size_t size)
|
||||
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
|
||||
#endif
|
||||
|
||||
static arena_run_tree_t *
|
||||
static ph_heap_t *
|
||||
arena_runs_avail_get(arena_t *arena, szind_t ind)
|
||||
{
|
||||
|
||||
@ -217,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||
arena_miscelm_get(chunk, pageind))));
|
||||
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
||||
LG_PAGE));
|
||||
arena_run_tree_insert(arena_runs_avail_get(arena, ind),
|
||||
arena_miscelm_get(chunk, pageind));
|
||||
ph_insert(arena_runs_avail_get(arena, ind),
|
||||
&arena_miscelm_get(chunk, pageind)->avail.ph_link);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -229,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||
arena_miscelm_get(chunk, pageind))));
|
||||
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
|
||||
LG_PAGE));
|
||||
arena_run_tree_remove(arena_runs_avail_get(arena, ind),
|
||||
arena_miscelm_get(chunk, pageind));
|
||||
ph_remove(arena_runs_avail_get(arena, ind),
|
||||
&arena_miscelm_get(chunk, pageind)->avail.ph_link);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -245,8 +245,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
|
||||
CHUNK_MAP_DIRTY);
|
||||
|
||||
qr_new(&miscelm->rd, rd_link);
|
||||
qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
|
||||
qr_new(&miscelm->avail.rd, rd_link);
|
||||
qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
|
||||
arena->ndirty += npages;
|
||||
}
|
||||
|
||||
@ -262,7 +262,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
|
||||
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
|
||||
CHUNK_MAP_DIRTY);
|
||||
|
||||
qr_remove(&miscelm->rd, rd_link);
|
||||
qr_remove(&miscelm->avail.rd, rd_link);
|
||||
assert(arena->ndirty >= npages);
|
||||
arena->ndirty -= npages;
|
||||
}
|
||||
@ -1079,10 +1079,12 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
|
||||
|
||||
ind = size2index(run_quantize_ceil(size));
|
||||
for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
|
||||
arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
|
||||
arena_runs_avail_get(arena, i));
|
||||
if (miscelm != NULL)
|
||||
ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
|
||||
if (node != NULL) {
|
||||
arena_chunk_map_misc_t *miscelm =
|
||||
arena_ph_to_miscelm(node);
|
||||
return (&miscelm->run);
|
||||
}
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
@ -3323,7 +3325,7 @@ arena_new(unsigned ind)
|
||||
arena_bin_t *bin;
|
||||
|
||||
/* Compute arena size to incorporate sufficient runs_avail elements. */
|
||||
arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
|
||||
arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
|
||||
runs_avail_nclasses);
|
||||
/*
|
||||
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
|
||||
@ -3383,7 +3385,7 @@ arena_new(unsigned ind)
|
||||
arena->ndirty = 0;
|
||||
|
||||
for(i = 0; i < runs_avail_nclasses; i++)
|
||||
arena_run_tree_new(&arena->runs_avail[i]);
|
||||
ph_new(&arena->runs_avail[i]);
|
||||
qr_new(&arena->runs_dirty, rd_link);
|
||||
qr_new(&arena->chunks_cache, cc_link);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user