Tcache: Add tcache gc delay option.

This can reduce flushing frequency for small size classes.
This commit is contained in:
David Goldblatt 2020-05-11 16:24:17 -07:00 committed by David Goldblatt
parent d338dd45d7
commit ee72bf1cfd
4 changed files with 99 additions and 27 deletions

View File

@ -9,6 +9,7 @@ extern unsigned opt_tcache_nslots_small_max;
extern unsigned opt_tcache_nslots_large; extern unsigned opt_tcache_nslots_large;
extern ssize_t opt_lg_tcache_shift; extern ssize_t opt_lg_tcache_shift;
extern size_t opt_tcache_gc_incr_bytes; extern size_t opt_tcache_gc_incr_bytes;
extern size_t opt_tcache_gc_delay_bytes;
/* /*
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more

View File

@ -36,6 +36,11 @@ struct tcache_slow_s {
uint8_t lg_fill_div[SC_NBINS]; uint8_t lg_fill_div[SC_NBINS];
/* For small bins, whether has been refilled since last GC. */ /* For small bins, whether has been refilled since last GC. */
bool bin_refilled[SC_NBINS]; bool bin_refilled[SC_NBINS];
/*
* For small bins, the number of items we can pretend to flush before
* actually flushing.
*/
uint8_t bin_flush_delay_items[SC_NBINS];
/* /*
* The start of the allocation containing the dynamic allocation for * The start of the allocation containing the dynamic allocation for
* either the cache bins alone, or the cache bin memory as well as this * either the cache bins alone, or the cache bin memory as well as this

View File

@ -1393,6 +1393,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
"tcache_gc_incr_bytes", 1024, SIZE_T_MAX, "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
/* clip */ true) /* clip */ true)
CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
"tcache_gc_delay_bytes", 0, SIZE_T_MAX,
CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
/* clip */ false)
/* /*
* The runtime option of oversize_threshold remains * The runtime option of oversize_threshold remains

View File

@ -33,6 +33,20 @@ unsigned opt_tcache_nslots_large = 20;
*/ */
ssize_t opt_lg_tcache_nslots_mul = -1; ssize_t opt_lg_tcache_nslots_mul = -1;
/*
* With default settings, we may end up flushing small bins frequently with
* small flush amounts. To limit this tendency, we can set a number of bytes to
* "delay" by. If we try to flush N M-byte items, we decrease that size-class's
* delay by N * M. So, if delay is 1024 and we're looking at the 64-byte size
* class, we won't do any flushing until we've been asked to flush 1024/64 == 16
* items. This can happen in any configuration (i.e. being asked to flush 16
* items once, or 4 items 4 times).
*
* Practically, this is stored as a count of items in a uint8_t, so the
* effective maximum value for a size class is 255 * sz.
*/
size_t opt_tcache_gc_delay_bytes = 0;
/* /*
* Number of allocation bytes between tcache incremental GCs. Again, this * Number of allocation bytes between tcache incremental GCs. Again, this
* default just seems to work well; more tuning is possible. * default just seems to work well; more tuning is possible.
@ -86,6 +100,67 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
return TE_MIN_START_WAIT; return TE_MIN_START_WAIT;
} }
static uint8_t
tcache_gc_item_delay_compute(szind_t szind) {
assert(szind < SC_NBINS);
size_t sz = sz_index2size(szind);
size_t item_delay = opt_tcache_gc_delay_bytes / sz;
size_t delay_max = ZU(1)
<< (sizeof(((tcache_slow_t *)NULL)->bin_flush_delay_items[0]) * 8);
if (item_delay >= delay_max) {
item_delay = delay_max - 1;
}
return item_delay;
}
static void
tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
szind_t szind) {
/* Aim to flush 3/4 of items below low-water. */
assert(szind < SC_NBINS);
cache_bin_t *cache_bin = &tcache->bins[szind];
cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
&tcache_bin_info[szind]);
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
&tcache_bin_info[szind]);
assert(!tcache_slow->bin_refilled[szind]);
size_t nflush = low_water - (low_water >> 2);
if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
tcache_slow->bin_flush_delay_items[szind] -= nflush;
return;
} else {
tcache_slow->bin_flush_delay_items[szind]
= tcache_gc_item_delay_compute(szind);
}
tcache_bin_flush_small(tsd, tcache, cache_bin, szind, ncached - nflush);
/*
* Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1.
*/
if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
>> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
tcache_slow->lg_fill_div[szind]++;
}
}
static void
tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
szind_t szind) {
/* Like the small GC; flush 3/4 of untouched items. */
assert(szind >= SC_NBINS);
cache_bin_t *cache_bin = &tcache->bins[szind];
cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
&tcache_bin_info[szind]);
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
&tcache_bin_info[szind]);
tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
ncached - low_water + (low_water >> 2));
}
static void static void
tcache_event(tsd_t *tsd) { tcache_event(tsd_t *tsd) {
tcache_t *tcache = tcache_get(tsd); tcache_t *tcache = tcache_get(tsd);
@ -94,45 +169,28 @@ tcache_event(tsd_t *tsd) {
} }
tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd); tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
szind_t binind = tcache_slow->next_gc_bin; szind_t szind = tcache_slow->next_gc_bin;
bool is_small = (binind < SC_NBINS); bool is_small = (szind < SC_NBINS);
cache_bin_t *cache_bin = &tcache->bins[binind]; cache_bin_t *cache_bin = &tcache->bins[szind];
cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin, cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
&tcache_bin_info[binind]); &tcache_bin_info[szind]);
cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
&tcache_bin_info[binind]);
if (low_water > 0) { if (low_water > 0) {
/*
* Flush (ceiling) 3/4 of the objects below the low water mark.
*/
if (is_small) { if (is_small) {
assert(!tcache_slow->bin_refilled[binind]); tcache_gc_small(tsd, tcache_slow, tcache, szind);
tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
ncached - low_water + (low_water >> 2));
/*
* Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1.
*/
if ((cache_bin_info_ncached_max(
&tcache_bin_info[binind]) >>
(tcache_slow->lg_fill_div[binind] + 1)) >= 1) {
tcache_slow->lg_fill_div[binind]++;
}
} else { } else {
tcache_bin_flush_large(tsd, tcache, cache_bin, binind, tcache_gc_large(tsd, tcache_slow, tcache, szind);
ncached - low_water + (low_water >> 2));
} }
} else if (is_small && tcache_slow->bin_refilled[binind]) { } else if (is_small && tcache_slow->bin_refilled[szind]) {
assert(low_water == 0); assert(low_water == 0);
/* /*
* Increase fill count by 2X for small bins. Make sure * Increase fill count by 2X for small bins. Make sure
* lg_fill_div stays greater than 0. * lg_fill_div stays greater than 0.
*/ */
if (tcache_slow->lg_fill_div[binind] > 1) { if (tcache_slow->lg_fill_div[szind] > 1) {
tcache_slow->lg_fill_div[binind]--; tcache_slow->lg_fill_div[szind]--;
} }
tcache_slow->bin_refilled[binind] = false; tcache_slow->bin_refilled[szind] = false;
} }
cache_bin_low_water_set(cache_bin); cache_bin_low_water_set(cache_bin);
@ -519,6 +577,10 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
&cur_offset); &cur_offset);
/* Sanity check that the whole stack is used. */ /* Sanity check that the whole stack is used. */
assert(cur_offset == tcache_bin_alloc_size); assert(cur_offset == tcache_bin_alloc_size);
for (unsigned i = 0; i < SC_NBINS; i++) {
tcache_slow->bin_flush_delay_items[i]
= tcache_gc_item_delay_compute(i);
}
} }
/* Initialize auto tcache (embedded in TSD). */ /* Initialize auto tcache (embedded in TSD). */