Use pairing heap for arena->runs_avail

Use pairing heap instead of red black tree in arena runs_avail.  The
extra links are unioned with the bitmap_t, so this change doesn't use
any extra memory.

Canaries show this change to be a 1% cpu win, and 2% latency win.  In
particular, large free()s, and small bin frees are now O(1) (barring
coalescing).

I also tested changing bin->runs to be a pairing heap, but saw a much
smaller win, and it would mean increasing the size of arena_run_s by two
pointers, so I left that as an rb-tree for now.
This commit is contained in:
Dave Watson
2016-02-29 11:54:42 -08:00
committed by Jason Evans
parent f8d80d62a8
commit 4a0dbb5ac8
3 changed files with 40 additions and 18 deletions

View File

@@ -199,7 +199,7 @@ run_quantize_ceil(size_t size)
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
#endif
static arena_run_tree_t *
static ph_heap_t *
arena_runs_avail_get(arena_t *arena, szind_t ind)
{
@@ -217,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
arena_run_tree_insert(arena_runs_avail_get(arena, ind),
arena_miscelm_get(chunk, pageind));
ph_insert(arena_runs_avail_get(arena, ind),
&arena_miscelm_get(chunk, pageind)->avail.ph_link);
}
static void
@@ -229,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
arena_run_tree_remove(arena_runs_avail_get(arena, ind),
arena_miscelm_get(chunk, pageind));
ph_remove(arena_runs_avail_get(arena, ind),
&arena_miscelm_get(chunk, pageind)->avail.ph_link);
}
static void
@@ -245,8 +245,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
CHUNK_MAP_DIRTY);
qr_new(&miscelm->rd, rd_link);
qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
qr_new(&miscelm->avail.rd, rd_link);
qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
arena->ndirty += npages;
}
@@ -262,7 +262,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
CHUNK_MAP_DIRTY);
qr_remove(&miscelm->rd, rd_link);
qr_remove(&miscelm->avail.rd, rd_link);
assert(arena->ndirty >= npages);
arena->ndirty -= npages;
}
@@ -1079,10 +1079,12 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
ind = size2index(run_quantize_ceil(size));
for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
arena_runs_avail_get(arena, i));
if (miscelm != NULL)
ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
if (node != NULL) {
arena_chunk_map_misc_t *miscelm =
arena_ph_to_miscelm(node);
return (&miscelm->run);
}
}
return (NULL);
@@ -3323,7 +3325,7 @@ arena_new(unsigned ind)
arena_bin_t *bin;
/* Compute arena size to incorporate sufficient runs_avail elements. */
arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
runs_avail_nclasses);
/*
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@@ -3383,7 +3385,7 @@ arena_new(unsigned ind)
arena->ndirty = 0;
for(i = 0; i < runs_avail_nclasses; i++)
arena_run_tree_new(&arena->runs_avail[i]);
ph_new(&arena->runs_avail[i]);
qr_new(&arena->runs_dirty, rd_link);
qr_new(&arena->chunks_cache, cc_link);