Eset: Cache summary information for heap edatas.
This lets us do a single array scan to find first fits, instead of taking a cache miss per examined size class.
This commit is contained in:
parent
252e0942d0
commit
dcb7b83fac
@ -18,6 +18,14 @@
|
|||||||
typedef struct eset_bin_s eset_bin_t;
|
typedef struct eset_bin_s eset_bin_t;
|
||||||
struct eset_bin_s {
|
struct eset_bin_s {
|
||||||
edata_heap_t heap;
|
edata_heap_t heap;
|
||||||
|
/*
|
||||||
|
* We do first-fit across multiple size classes. If we compared against
|
||||||
|
* the min element in each heap directly, we'd take a cache miss per
|
||||||
|
* extent we looked at. If we co-locate the edata summaries, we only
|
||||||
|
* take a miss on the edata we're actually going to return (which is
|
||||||
|
* inevitable anyways).
|
||||||
|
*/
|
||||||
|
edata_cmp_summary_t heap_min;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct eset_bin_stats_s eset_bin_stats_t;
|
typedef struct eset_bin_stats_s eset_bin_stats_t;
|
||||||
|
60
src/eset.c
60
src/eset.c
@ -8,6 +8,10 @@
|
|||||||
static void
|
static void
|
||||||
eset_bin_init(eset_bin_t *bin) {
|
eset_bin_init(eset_bin_t *bin) {
|
||||||
edata_heap_new(&bin->heap);
|
edata_heap_new(&bin->heap);
|
||||||
|
/*
|
||||||
|
* heap_min doesn't need initialization; it gets filled in when the bin
|
||||||
|
* goes from non-empty to empty.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -71,8 +75,21 @@ eset_insert(eset_t *eset, edata_t *edata) {
|
|||||||
size_t size = edata_size_get(edata);
|
size_t size = edata_size_get(edata);
|
||||||
size_t psz = sz_psz_quantize_floor(size);
|
size_t psz = sz_psz_quantize_floor(size);
|
||||||
pszind_t pind = sz_psz2ind(psz);
|
pszind_t pind = sz_psz2ind(psz);
|
||||||
|
|
||||||
|
edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
|
||||||
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
||||||
fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
||||||
|
/* Only element is automatically the min element. */
|
||||||
|
eset->bins[pind].heap_min = edata_cmp_summary;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* There's already a min element; update the summary if we're
|
||||||
|
* about to insert a lower one.
|
||||||
|
*/
|
||||||
|
if (edata_cmp_summary_comp(edata_cmp_summary,
|
||||||
|
eset->bins[pind].heap_min) < 0) {
|
||||||
|
eset->bins[pind].heap_min = edata_cmp_summary;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
edata_heap_insert(&eset->bins[pind].heap, edata);
|
edata_heap_insert(&eset->bins[pind].heap, edata);
|
||||||
|
|
||||||
@ -101,14 +118,29 @@ eset_remove(eset_t *eset, edata_t *edata) {
|
|||||||
size_t size = edata_size_get(edata);
|
size_t size = edata_size_get(edata);
|
||||||
size_t psz = sz_psz_quantize_floor(size);
|
size_t psz = sz_psz_quantize_floor(size);
|
||||||
pszind_t pind = sz_psz2ind(psz);
|
pszind_t pind = sz_psz2ind(psz);
|
||||||
edata_heap_remove(&eset->bins[pind].heap, edata);
|
|
||||||
|
|
||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
eset_stats_sub(eset, pind, size);
|
eset_stats_sub(eset, pind, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
|
||||||
|
edata_heap_remove(&eset->bins[pind].heap, edata);
|
||||||
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
if (edata_heap_empty(&eset->bins[pind].heap)) {
|
||||||
fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* This is a little weird; we compare if the summaries are
|
||||||
|
* equal, rather than if the edata we removed was the heap
|
||||||
|
* minimum. The reason why is that getting the heap minimum
|
||||||
|
* can cause a pairing heap merge operation. We can avoid this
|
||||||
|
* if we only update the min if it's changed, in which case the
|
||||||
|
* summaries of the removed element and the min element should
|
||||||
|
* compare equal.
|
||||||
|
*/
|
||||||
|
if (edata_cmp_summary_comp(edata_cmp_summary,
|
||||||
|
eset->bins[pind].heap_min) == 0) {
|
||||||
|
eset->bins[pind].heap_min = edata_cmp_summary_get(
|
||||||
|
edata_heap_first(&eset->bins[pind].heap));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
edata_list_inactive_remove(&eset->lru, edata);
|
edata_list_inactive_remove(&eset->lru, edata);
|
||||||
size_t npages = size >> LG_PAGE;
|
size_t npages = size >> LG_PAGE;
|
||||||
@ -116,10 +148,6 @@ eset_remove(eset_t *eset, edata_t *edata) {
|
|||||||
* As in eset_insert, we hold eset->mtx and so don't need atomic
|
* As in eset_insert, we hold eset->mtx and so don't need atomic
|
||||||
* operations for updating eset->npages.
|
* operations for updating eset->npages.
|
||||||
*/
|
*/
|
||||||
/*
|
|
||||||
* This class is not thread-safe in general; we rely on external
|
|
||||||
* synchronization for all mutating operations.
|
|
||||||
*/
|
|
||||||
size_t cur_extents_npages =
|
size_t cur_extents_npages =
|
||||||
atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
|
atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
|
||||||
assert(cur_extents_npages >= npages);
|
assert(cur_extents_npages >= npages);
|
||||||
@ -178,6 +206,7 @@ static edata_t *
|
|||||||
eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
||||||
unsigned lg_max_fit) {
|
unsigned lg_max_fit) {
|
||||||
edata_t *ret = NULL;
|
edata_t *ret = NULL;
|
||||||
|
edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
|
||||||
|
|
||||||
pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
|
pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
|
||||||
|
|
||||||
@ -191,8 +220,6 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
|||||||
i < ESET_NPSIZES;
|
i < ESET_NPSIZES;
|
||||||
i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
|
i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
|
||||||
assert(!edata_heap_empty(&eset->bins[i].heap));
|
assert(!edata_heap_empty(&eset->bins[i].heap));
|
||||||
edata_t *edata = edata_heap_first(&eset->bins[i].heap);
|
|
||||||
assert(edata_size_get(edata) >= size);
|
|
||||||
if (lg_max_fit == SC_PTR_BITS) {
|
if (lg_max_fit == SC_PTR_BITS) {
|
||||||
/*
|
/*
|
||||||
* We'll shift by this below, and shifting out all the
|
* We'll shift by this below, and shifting out all the
|
||||||
@ -204,8 +231,23 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
|
|||||||
if ((sz_pind2sz(i) >> lg_max_fit) > size) {
|
if ((sz_pind2sz(i) >> lg_max_fit) > size) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
|
if (ret == NULL || edata_cmp_summary_comp(
|
||||||
|
eset->bins[i].heap_min, ret_summ) < 0) {
|
||||||
|
/*
|
||||||
|
* We grab the edata as early as possible, even though
|
||||||
|
* we might change it later. Practically, a large
|
||||||
|
* portion of eset_fit calls succeed at the first valid
|
||||||
|
* index, so this doesn't cost much, and we get the
|
||||||
|
* effect of prefetching the edata as early as possible.
|
||||||
|
*/
|
||||||
|
edata_t *edata = edata_heap_first(&eset->bins[i].heap);
|
||||||
|
assert(edata_size_get(edata) >= size);
|
||||||
|
assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
|
||||||
|
assert(ret == NULL || edata_cmp_summary_comp(
|
||||||
|
eset->bins[i].heap_min,
|
||||||
|
edata_cmp_summary_get(edata)) == 0);
|
||||||
ret = edata;
|
ret = edata;
|
||||||
|
ret_summ = eset->bins[i].heap_min;
|
||||||
}
|
}
|
||||||
if (i == SC_NPSIZES) {
|
if (i == SC_NPSIZES) {
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user