From 6bafa6678fc36483e638f1c3a0a9bf79fb89bfc9 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Mon, 29 Feb 2016 11:22:52 -0800 Subject: [PATCH] Pairing heap Initial implementation of a twopass pairing heap with aux list. Research papers linked in comments. Where search/nsearch/last aren't needed, this gives much faster first(), delete(), and insert(). Insert is O(1), and first/delete don't have to walk the whole tree. Also tested rb_old with parent pointers - it was better than the current rb.h for memory loads, but still much worse than a pairing heap. An array-based heap would be much faster if everything fits in memory, but on a cold cache it has many more memory loads for most operations. --- Makefile.in | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 4 + include/jemalloc/internal/ph.h | 255 ++++++++++++++++++ include/jemalloc/internal/private_symbols.txt | 8 + src/ph.c | 2 + 5 files changed, 270 insertions(+) create mode 100644 include/jemalloc/internal/ph.h create mode 100644 src/ph.c diff --git a/Makefile.in b/Makefile.in index f60823f5..3d725be1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -95,6 +95,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/mutex.c \ $(srcroot)src/nstime.c \ $(srcroot)src/pages.c \ + $(srcroot)src/ph.c \ $(srcroot)src/prng.c \ $(srcroot)src/prof.c \ $(srcroot)src/quarantine.c \ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3f54391f..d3b94c00 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -371,6 +371,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" +#include "jemalloc/internal/ph.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" @@ -401,6 +402,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/ph.h" #define JEMALLOC_ARENA_STRUCTS_A #include "jemalloc/internal/arena.h" #undef JEMALLOC_ARENA_STRUCTS_A @@ -494,6 +496,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" +#include "jemalloc/internal/ph.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/rtree.h" @@ -525,6 +528,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" +#include "jemalloc/internal/ph.h" #include "jemalloc/internal/base.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/pages.h" diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h new file mode 100644 index 00000000..aeca693e --- /dev/null +++ b/include/jemalloc/internal/ph.h @@ -0,0 +1,255 @@ +/* + * A Pairing Heap implementation. + * + * "The Pairing Heap: A New Form of Self-Adjusting Heap" + * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf + * + * With auxiliary list, described in a follow on paper + * + * "Pairing Heaps: Experiments and Analysis" + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf + * + * Where search/nsearch/last are not needed, ph.h outperforms rb.h by ~7x fewer + * cpu cycles, and ~4x fewer memory references. + * + * Tagging parent/prev pointers on the next list was also described in the + * original paper, such that only two pointers are needed. This is not + * implemented here, as it substantially increases the memory references + * needed when ph_remove is called, almost overshadowing the other performance + * gains. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct ph_node_s ph_node_t; +typedef struct ph_heap_s ph_heap_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ph_node_s { + ph_node_t *subheaps; + ph_node_t *parent; + ph_node_t *next; + ph_node_t *prev; +}; + +struct ph_heap_s { + ph_node_t *root; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +ph_node_t *ph_merge(ph_node_t *heap1, ph_node_t *heap2); +ph_node_t *ph_merge_pairs(ph_node_t *subheaps); +void ph_merge_aux_list(ph_heap_t *l); +void ph_new(ph_heap_t *n); +ph_node_t *ph_first(ph_heap_t *l); +void ph_insert(ph_heap_t *l, ph_node_t *n); +ph_node_t *ph_remove_first(ph_heap_t *l); +void ph_remove(ph_heap_t *l, ph_node_t *n); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PH_C_)) + +/* Helper routines ************************************************************/ + +JEMALLOC_INLINE ph_node_t * +ph_merge(ph_node_t *heap1, ph_node_t *heap2) +{ + + if (heap1 == NULL) + return (heap2); + if (heap2 == NULL) + return (heap1); + /* Optional: user-settable comparison function */ + if ((uintptr_t)heap1 < (uintptr_t)heap2) { + heap2->parent = heap1; + heap2->prev = NULL; + heap2->next = heap1->subheaps; + if (heap1->subheaps != NULL) + heap1->subheaps->prev = heap2; + heap1->subheaps = heap2; + return (heap1); + } else { + heap1->parent = heap2; + heap1->prev = NULL; + heap1->next = heap2->subheaps; + if (heap2->subheaps != NULL) + heap2->subheaps->prev = heap1; + heap2->subheaps = heap1; + return (heap2); + } +} + +JEMALLOC_INLINE ph_node_t * +ph_merge_pairs(ph_node_t *subheaps) +{ + + if (subheaps == NULL) + return (NULL); + if (subheaps->next == NULL) + return (subheaps); + { + ph_node_t *l0 = subheaps; + ph_node_t *l1 = l0->next; + ph_node_t *lrest = l1->next; + + if (lrest != NULL) + lrest->prev = NULL; + l1->next = NULL; + l1->prev = NULL; + l0->next = NULL; + l0->prev = NULL; + return (ph_merge(ph_merge(l0, l1), ph_merge_pairs(lrest))); + } +} + +/* + * Merge the aux list into the root node. + */ +JEMALLOC_INLINE void +ph_merge_aux_list(ph_heap_t *l) +{ + + if (l->root == NULL) + return; + if (l->root->next != NULL) { + ph_node_t *l0 = l->root->next; + ph_node_t *l1 = l0->next; + ph_node_t *lrest = NULL; + + /* Multipass merge. */ + while (l1 != NULL) { + lrest = l1->next; + if (lrest != NULL) + lrest->prev = NULL; + l1->next = NULL; + l1->prev = NULL; + l0->next = NULL; + l0->prev = NULL; + l0 = ph_merge(l0, l1); + l1 = lrest; + } + l->root->next = NULL; + l->root = ph_merge(l->root, l0); + } +} + +/* User API *******************************************************************/ + +JEMALLOC_INLINE void +ph_new(ph_heap_t *n) +{ + + memset(n, 0, sizeof(ph_heap_t)); +} + +JEMALLOC_INLINE ph_node_t * +ph_first(ph_heap_t *l) +{ + + /* + * For the cost of an extra pointer, a l->min could be stored instead of + * merging the aux list here. Current users always call ph_remove(l, + * ph_first(l)) though, and the aux list must always be merged for + * delete of the min node anyway. + */ + ph_merge_aux_list(l); + return (l->root); +} + +JEMALLOC_INLINE void +ph_insert(ph_heap_t *l, ph_node_t *n) +{ + + memset(n, 0, sizeof(ph_node_t)); + + /* + * Non-aux list insert: + * + * l->root = ph_merge(l->root, n); + * + * Aux list insert: + */ + if (l->root == NULL) + l->root = n; + else { + n->next = l->root->next; + if (l->root->next != NULL) + l->root->next->prev = n; + n->prev = l->root; + l->root->next = n; + } +} + +JEMALLOC_INLINE ph_node_t * +ph_remove_first(ph_heap_t *l) +{ + ph_node_t *ret; + + ph_merge_aux_list(l); + if (l->root == NULL) + return (NULL); + + ret = l->root; + + l->root = ph_merge_pairs(l->root->subheaps); + + return (ret); +} + +JEMALLOC_INLINE void +ph_remove(ph_heap_t *l, ph_node_t *n) +{ + ph_node_t *replace; + + /* + * We can delete from aux list without merging it, but we need to merge + * if we are dealing with the root node. + */ + if (l->root == n) { + ph_merge_aux_list(l); + if (l->root == n) { + ph_remove_first(l); + return; + } + } + + /* Find a possible replacement node, and link to parent. */ + replace = ph_merge_pairs(n->subheaps); + if (n->parent != NULL && n->parent->subheaps == n) { + if (replace != NULL) + n->parent->subheaps = replace; + else + n->parent->subheaps = n->next; + } + /* Set next/prev for sibling linked list. */ + if (replace != NULL) { + replace->parent = n->parent; + replace->prev = n->prev; + if (n->prev != NULL) + n->prev->next = replace; + replace->next = n->next; + if (n->next != NULL) + n->next->prev = replace; + } else { + if (n->prev != NULL) + n->prev->next = n->next; + if (n->next != NULL) + n->next->prev = n->prev; + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 5880996a..2de1d5f3 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -379,6 +379,14 @@ pages_map pages_purge pages_trim pages_unmap +ph_first +ph_insert +ph_merge +ph_merge_aux_list +ph_merge_pairs +ph_new +ph_remove_first +ph_remove pow2_ceil_u32 pow2_ceil_u64 pow2_ceil_zu diff --git a/src/ph.c b/src/ph.c new file mode 100644 index 00000000..051a20d7 --- /dev/null +++ b/src/ph.c @@ -0,0 +1,2 @@ +#define JEMALLOC_PH_C_ +#include "jemalloc/internal/jemalloc_internal.h"