Use pairing heap for arena->runs_avail

Use pairing heap instead of red black tree in arena runs_avail.  The
extra links are unioned with the bitmap_t, so this change doesn't use
any extra memory.

Canaries show this change to be a 1% cpu win, and 2% latency win.  In
particular, large free()s, and small bin frees are now O(1) (barring
coalescing).

I also tested changing bin->runs to be a pairing heap, but saw a much
smaller win, and it would mean increasing the size of arena_run_s by two
pointers, so I left that as an rb-tree for now.
This commit is contained in:
Dave Watson 2016-02-29 11:54:42 -08:00 committed by Jason Evans
parent f8d80d62a8
commit 4a0dbb5ac8
3 changed files with 40 additions and 18 deletions

View File

@ -36,6 +36,7 @@ typedef enum {
#define DECAY_NTICKS_PER_UPDATE 1000 #define DECAY_NTICKS_PER_UPDATE 1000
typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
typedef struct arena_avail_links_s arena_avail_links_t;
typedef struct arena_run_s arena_run_t; typedef struct arena_run_s arena_run_t;
typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
@ -146,6 +147,11 @@ struct arena_runs_dirty_link_s {
qr(arena_runs_dirty_link_t) rd_link; qr(arena_runs_dirty_link_t) rd_link;
}; };
struct arena_avail_links_s {
arena_runs_dirty_link_t rd;
ph_node_t ph_link;
};
/* /*
* Each arena_chunk_map_misc_t corresponds to one page within the chunk, just * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
* like arena_chunk_map_bits_t. Two separate arrays are stored within each * like arena_chunk_map_bits_t. Two separate arrays are stored within each
@ -163,7 +169,7 @@ struct arena_chunk_map_misc_s {
union { union {
/* Linkage for list of dirty runs. */ /* Linkage for list of dirty runs. */
arena_runs_dirty_link_t rd; arena_avail_links_t avail;
/* Profile counters, used for large object runs. */ /* Profile counters, used for large object runs. */
union { union {
@ -457,10 +463,10 @@ struct arena_s {
arena_bin_t bins[NBINS]; arena_bin_t bins[NBINS];
/* /*
* Quantized address-ordered trees of this arena's available runs. The * Quantized address-ordered heaps of this arena's available runs. The
* trees are used for first-best-fit run allocation. * heaps are used for first-best-fit run allocation.
*/ */
arena_run_tree_t runs_avail[1]; /* Dynamically sized. */ ph_heap_t runs_avail[1]; /* Dynamically sized. */
}; };
/* Used in conjunction with tsd for fast arena-related context lookup. */ /* Used in conjunction with tsd for fast arena-related context lookup. */
@ -600,6 +606,7 @@ arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk,
size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
arena_chunk_map_misc_t *arena_ph_to_miscelm(ph_node_t *ph);
arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run);
size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbitsp_read(size_t *mapbitsp);
@ -702,7 +709,19 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
{ {
arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
*)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
return (miscelm);
}
JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
arena_ph_to_miscelm(ph_node_t *ph)
{
arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
assert(arena_miscelm_to_pageind(miscelm) >= map_bias); assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);

View File

@ -79,6 +79,7 @@ arena_nthreads_dec
arena_nthreads_get arena_nthreads_get
arena_nthreads_inc arena_nthreads_inc
arena_palloc arena_palloc
arena_ph_to_miscelm
arena_postfork_child arena_postfork_child
arena_postfork_parent arena_postfork_parent
arena_prefork arena_prefork

View File

@ -199,7 +199,7 @@ run_quantize_ceil(size_t size)
run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl); run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
#endif #endif
static arena_run_tree_t * static ph_heap_t *
arena_runs_avail_get(arena_t *arena, szind_t ind) arena_runs_avail_get(arena_t *arena, szind_t ind)
{ {
@ -217,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get(chunk, pageind)))); arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE)); LG_PAGE));
arena_run_tree_insert(arena_runs_avail_get(arena, ind), ph_insert(arena_runs_avail_get(arena, ind),
arena_miscelm_get(chunk, pageind)); &arena_miscelm_get(chunk, pageind)->avail.ph_link);
} }
static void static void
@ -229,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get(chunk, pageind)))); arena_miscelm_get(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE)); LG_PAGE));
arena_run_tree_remove(arena_runs_avail_get(arena, ind), ph_remove(arena_runs_avail_get(arena, ind),
arena_miscelm_get(chunk, pageind)); &arena_miscelm_get(chunk, pageind)->avail.ph_link);
} }
static void static void
@ -245,8 +245,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
CHUNK_MAP_DIRTY); CHUNK_MAP_DIRTY);
qr_new(&miscelm->rd, rd_link); qr_new(&miscelm->avail.rd, rd_link);
qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link); qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
arena->ndirty += npages; arena->ndirty += npages;
} }
@ -262,7 +262,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) == assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
CHUNK_MAP_DIRTY); CHUNK_MAP_DIRTY);
qr_remove(&miscelm->rd, rd_link); qr_remove(&miscelm->avail.rd, rd_link);
assert(arena->ndirty >= npages); assert(arena->ndirty >= npages);
arena->ndirty -= npages; arena->ndirty -= npages;
} }
@ -1079,10 +1079,12 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
ind = size2index(run_quantize_ceil(size)); ind = size2index(run_quantize_ceil(size));
for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
arena_chunk_map_misc_t *miscelm = arena_run_tree_first( ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
arena_runs_avail_get(arena, i)); if (node != NULL) {
if (miscelm != NULL) arena_chunk_map_misc_t *miscelm =
arena_ph_to_miscelm(node);
return (&miscelm->run); return (&miscelm->run);
}
} }
return (NULL); return (NULL);
@ -3323,7 +3325,7 @@ arena_new(unsigned ind)
arena_bin_t *bin; arena_bin_t *bin;
/* Compute arena size to incorporate sufficient runs_avail elements. */ /* Compute arena size to incorporate sufficient runs_avail elements. */
arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) * arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
runs_avail_nclasses); runs_avail_nclasses);
/* /*
* Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@ -3383,7 +3385,7 @@ arena_new(unsigned ind)
arena->ndirty = 0; arena->ndirty = 0;
for(i = 0; i < runs_avail_nclasses; i++) for(i = 0; i < runs_avail_nclasses; i++)
arena_run_tree_new(&arena->runs_avail[i]); ph_new(&arena->runs_avail[i]);
qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->runs_dirty, rd_link);
qr_new(&arena->chunks_cache, cc_link); qr_new(&arena->chunks_cache, cc_link);