Eset: Cache summary information for heap edatas.
This lets us do a single array scan to find first fits, instead of taking a cache miss per examined size class.
This commit is contained in:
parent
252e0942d0
commit
dcb7b83fac
@ -18,6 +18,14 @@
|
||||
typedef struct eset_bin_s eset_bin_t;
|
||||
struct eset_bin_s {
|
||||
edata_heap_t heap;
|
||||
/*
|
||||
* We do first-fit across multiple size classes. If we compared against
|
||||
* the min element in each heap directly, we'd take a cache miss per
|
||||
* extent we looked at. If we co-locate the edata summaries, we only
|
||||
* take a miss on the edata we're actually going to return (which is
|
||||
* inevitable anyways).
|
||||
*/
|
||||
edata_cmp_summary_t heap_min;
|
||||
};
|
||||
|
||||
typedef struct eset_bin_stats_s eset_bin_stats_t;
|
||||
|
60
src/eset.c
60
src/eset.c
@ -8,6 +8,10 @@
|
||||
static void
|
||||
eset_bin_init(eset_bin_t *bin) {
|
||||
edata_heap_new(&bin->heap);
|
||||
/*
|
||||
* heap_min doesn't need initialization; it gets filled in when the bin
|
||||
* goes from non-empty to empty.
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
@ -71,8 +75,21 @@ eset_insert(eset_t *eset, edata_t *edata) {
|
||||
size_t size = edata_size_get(edata);
|
||||
size_t psz = sz_psz_quantize_floor(size);
|
||||
pszind_t pind = sz_psz2ind(psz);
|
||||
|
||||
edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
|
||||
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
||||
fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
||||
/* Only element is automatically the min element. */
|
||||
eset->bins[pind].heap_min = edata_cmp_summary;
|
||||
} else {
|
||||
/*
|
||||
* There's already a min element; update the summary if we're
|
||||
* about to insert a lower one.
|
||||
*/
|
||||
if (edata_cmp_summary_comp(edata_cmp_summary,
|
||||
eset->bins[pind].heap_min) < 0) {
|
||||
eset->bins[pind].heap_min = edata_cmp_summary;
|
||||
}
|
||||
}
|
||||
edata_heap_insert(&eset->bins[pind].heap, edata);
|
||||
|
||||
@ -101,14 +118,29 @@ eset_remove(eset_t *eset, edata_t *edata) {
|
||||
size_t size = edata_size_get(edata);
|
||||
size_t psz = sz_psz_quantize_floor(size);
|
||||
pszind_t pind = sz_psz2ind(psz);
|
||||
edata_heap_remove(&eset->bins[pind].heap, edata);
|
||||
|
||||
if (config_stats) {
|
||||
eset_stats_sub(eset, pind, size);
|
||||
}
|
||||
|
||||
edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
|
||||
edata_heap_remove(&eset->bins[pind].heap, edata);
|
||||
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
||||
fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
||||
} else {
|
||||
/*
|
||||
* This is a little weird; we compare if the summaries are
|
||||
* equal, rather than if the edata we removed was the heap
|
||||
* minimum. The reason why is that getting the heap minimum
|
||||
* can cause a pairing heap merge operation. We can avoid this
|
||||
* if we only update the min if it's changed, in which case the
|
||||
* summaries of the removed element and the min element should
|
||||
* compare equal.
|
||||
*/
|
||||
if (edata_cmp_summary_comp(edata_cmp_summary,
|
||||
eset->bins[pind].heap_min) == 0) {
|
||||
eset->bins[pind].heap_min = edata_cmp_summary_get(
|
||||
edata_heap_first(&eset->bins[pind].heap));
|
||||
}
|
||||
}
|
||||
edata_list_inactive_remove(&eset->lru, edata);
|
||||
size_t npages = size >> LG_PAGE;
|
||||
@ -116,10 +148,6 @@ eset_remove(eset_t *eset, edata_t *edata) {
|
||||
* As in eset_insert, we hold eset->mtx and so don't need atomic
|
||||
* operations for updating eset->npages.
|
||||
*/
|
||||
/*
|
||||
* This class is not thread-safe in general; we rely on external
|
||||
* synchronization for all mutating operations.
|
||||
*/
|
||||
size_t cur_extents_npages =
|
||||
atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
|
||||
assert(cur_extents_npages >= npages);
|
||||
@ -178,6 +206,7 @@ static edata_t *
|
||||
eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
||||
unsigned lg_max_fit) {
|
||||
edata_t *ret = NULL;
|
||||
edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
|
||||
|
||||
pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
|
||||
|
||||
@ -191,8 +220,6 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
||||
i < ESET_NPSIZES;
|
||||
i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
|
||||
assert(!edata_heap_empty(&eset->bins[i].heap));
|
||||
edata_t *edata = edata_heap_first(&eset->bins[i].heap);
|
||||
assert(edata_size_get(edata) >= size);
|
||||
if (lg_max_fit == SC_PTR_BITS) {
|
||||
/*
|
||||
* We'll shift by this below, and shifting out all the
|
||||
@ -204,8 +231,23 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
||||
if ((sz_pind2sz(i) >> lg_max_fit) > size) {
|
||||
break;
|
||||
}
|
||||
if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
|
||||
if (ret == NULL || edata_cmp_summary_comp(
|
||||
eset->bins[i].heap_min, ret_summ) < 0) {
|
||||
/*
|
||||
* We grab the edata as early as possible, even though
|
||||
* we might change it later. Practically, a large
|
||||
* portion of eset_fit calls succeed at the first valid
|
||||
* index, so this doesn't cost much, and we get the
|
||||
* effect of prefetching the edata as early as possible.
|
||||
*/
|
||||
edata_t *edata = edata_heap_first(&eset->bins[i].heap);
|
||||
assert(edata_size_get(edata) >= size);
|
||||
assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
|
||||
assert(ret == NULL || edata_cmp_summary_comp(
|
||||
eset->bins[i].heap_min,
|
||||
edata_cmp_summary_get(edata)) == 0);
|
||||
ret = edata;
|
||||
ret_summ = eset->bins[i].heap_min;
|
||||
}
|
||||
if (i == SC_NPSIZES) {
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user