SEC: Use batch fills.

Currently, this doesn't help much, since no PAI implementation supports
flushing.  This will change in subsequent commits.
This commit is contained in:
David Goldblatt 2021-01-19 13:06:43 -08:00 committed by David Goldblatt
parent 480f3b11cd
commit cdae6706a6
3 changed files with 167 additions and 71 deletions

View File

@ -45,6 +45,24 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
/* A collections of free extents, all of the same size. */ /* A collections of free extents, all of the same size. */
typedef struct sec_bin_s sec_bin_t; typedef struct sec_bin_s sec_bin_t;
struct sec_bin_s { struct sec_bin_s {
/*
* When we fail to fulfill an allocation, we do a batch-alloc on the
* underlying allocator to fill extra items, as well. We drop the SEC
* lock while doing so, to allow operations on other bins to succeed.
* That introduces the possibility of other threads also trying to
* allocate out of this bin, failing, and also going to the backing
* allocator. To avoid a thundering herd problem in which lots of
* threads do batch allocs and overfill this bin as a result, we only
* allow one batch allocation at a time for a bin. This bool tracks
* whether or not some thread is already batch allocating.
*
* Eventually, the right answer may be a smarter sharding policy for the
* bins (e.g. a mutex per bin, which would also be more scalable
* generally; the batch-allocating thread could hold it while
* batch-allocating).
*/
bool being_batch_filled;
/* /*
* Number of bytes in this particular bin (as opposed to the * Number of bytes in this particular bin (as opposed to the
* sec_shard_t's bytes_cur. This isn't user visible or reported in * sec_shard_t's bytes_cur. This isn't user visible or reported in
@ -108,6 +126,16 @@ struct sec_s {
*/ */
size_t bytes_after_flush; size_t bytes_after_flush;
/*
* When we can't satisfy an allocation out of the SEC because there are
* no available ones cached, we allocate multiple of that size out of
* the fallback allocator. Eventually we might want to do something
* cleverer, but for now we just grab a fixed number.
*
* For now, just the constant 4. Eventually, it should be configurable.
*/
size_t batch_fill_extra;
/* /*
* We don't necessarily always use all the shards; requests are * We don't necessarily always use all the shards; requests are
* distributed across shards [0, nshards - 1). * distributed across shards [0, nshards - 1).

147
src/sec.c
View File

@ -13,6 +13,7 @@ static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
static void static void
sec_bin_init(sec_bin_t *bin) { sec_bin_init(sec_bin_t *bin) {
bin->being_batch_filled = false;
bin->bytes_cur = 0; bin->bytes_cur = 0;
edata_list_active_init(&bin->freelist); edata_list_active_init(&bin->freelist);
} }
@ -45,6 +46,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
sec->bytes_max = bytes_max; sec->bytes_max = bytes_max;
sec->bytes_after_flush = bytes_max / 2; sec->bytes_after_flush = bytes_max / 2;
sec->batch_fill_extra = 4;
sec->nshards = nshards; sec->nshards = nshards;
/* /*
@ -88,14 +90,52 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
return &sec->shards[*idxp]; return &sec->shards[*idxp];
} }
/*
* Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
* empty cache, we'll try to fill it, which can push the shard over it's limit.
*/
static void
sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
edata_list_active_t to_flush;
edata_list_active_init(&to_flush);
while (shard->bytes_cur > sec->bytes_after_flush) {
/* Pick a victim. */
sec_bin_t *bin = &shard->bins[shard->to_flush_next];
/* Update our victim-picking state. */
shard->to_flush_next++;
if (shard->to_flush_next == SEC_NPSIZES) {
shard->to_flush_next = 0;
}
assert(shard->bytes_cur >= bin->bytes_cur);
if (bin->bytes_cur != 0) {
shard->bytes_cur -= bin->bytes_cur;
bin->bytes_cur = 0;
edata_list_active_concat(&to_flush, &bin->freelist);
}
/*
* Either bin->bytes_cur was 0, in which case we didn't touch
* the bin list but it should be empty anyways (or else we
* missed a bytes_cur update on a list modification), or it
* *was* 0 and we emptied it ourselves. Either way, it should
* be empty now.
*/
assert(edata_list_active_empty(&bin->freelist));
}
malloc_mutex_unlock(tsdn, &shard->mtx);
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
}
static edata_t * static edata_t *
sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
pszind_t pszind) { sec_bin_t *bin) {
malloc_mutex_assert_owner(tsdn, &shard->mtx); malloc_mutex_assert_owner(tsdn, &shard->mtx);
if (!shard->enabled) { if (!shard->enabled) {
return NULL; return NULL;
} }
sec_bin_t *bin = &shard->bins[pszind];
edata_t *edata = edata_list_active_first(&bin->freelist); edata_t *edata = edata_list_active_first(&bin->freelist);
if (edata != NULL) { if (edata != NULL) {
edata_list_active_remove(&bin->freelist, edata); edata_list_active_remove(&bin->freelist, edata);
@ -107,6 +147,50 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
return edata; return edata;
} }
static edata_t *
sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
sec_bin_t *bin, size_t size) {
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
edata_list_active_t result;
edata_list_active_init(&result);
size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
1 + sec->batch_fill_extra, &result);
edata_t *ret = edata_list_active_first(&result);
if (ret != NULL) {
edata_list_active_remove(&result, ret);
}
malloc_mutex_lock(tsdn, &shard->mtx);
bin->being_batch_filled = false;
/*
* Handle the easy case first: nothing to cache. Note that this can
* only happen in case of OOM, since sec_alloc checks the expected
* number of allocs, and doesn't bother going down the batch_fill
* pathway if there won't be anything left to cache. So to be in this
* code path, we must have asked for > 1 alloc, but only gotten 1 back.
*/
if (nalloc <= 1) {
malloc_mutex_unlock(tsdn, &shard->mtx);
return ret;
}
size_t new_cached_bytes = (nalloc - 1) * size;
edata_list_active_concat(&bin->freelist, &result);
bin->bytes_cur += new_cached_bytes;
shard->bytes_cur += new_cached_bytes;
if (shard->bytes_cur > sec->bytes_max) {
sec_flush_some_and_unlock(tsdn, sec, shard);
} else {
malloc_mutex_unlock(tsdn, &shard->mtx);
}
return ret;
}
static edata_t * static edata_t *
sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) { sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
assert((size & PAGE_MASK) == 0); assert((size & PAGE_MASK) == 0);
@ -119,16 +203,26 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
} }
pszind_t pszind = sz_psz2ind(size); pszind_t pszind = sz_psz2ind(size);
sec_shard_t *shard = sec_shard_pick(tsdn, sec); sec_shard_t *shard = sec_shard_pick(tsdn, sec);
sec_bin_t *bin = &shard->bins[pszind];
bool do_batch_fill = false;
malloc_mutex_lock(tsdn, &shard->mtx); malloc_mutex_lock(tsdn, &shard->mtx);
edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind); edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
if (edata == NULL) {
if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
bin->being_batch_filled = true;
do_batch_fill = true;
}
}
malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->mtx);
if (edata == NULL) { if (edata == NULL) {
/* if (do_batch_fill) {
* See the note in dalloc, below; really, we should add a edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
* batch_alloc method to the PAI and get more than one extent at size);
* a time. } else {
*/ edata = pai_alloc(tsdn, sec->fallback, size, alignment,
edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero); zero);
}
} }
return edata; return edata;
} }
@ -168,41 +262,6 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
pai_dalloc_batch(tsdn, sec->fallback, &to_flush); pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
} }
static void
sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
malloc_mutex_assert_owner(tsdn, &shard->mtx);
edata_list_active_t to_flush;
edata_list_active_init(&to_flush);
while (shard->bytes_cur > sec->bytes_after_flush) {
/* Pick a victim. */
sec_bin_t *bin = &shard->bins[shard->to_flush_next];
/* Update our victim-picking state. */
shard->to_flush_next++;
if (shard->to_flush_next == SEC_NPSIZES) {
shard->to_flush_next = 0;
}
assert(shard->bytes_cur >= bin->bytes_cur);
if (bin->bytes_cur != 0) {
shard->bytes_cur -= bin->bytes_cur;
bin->bytes_cur = 0;
edata_list_active_concat(&to_flush, &bin->freelist);
}
/*
* Either bin->bytes_cur was 0, in which case we didn't touch
* the bin list but it should be empty anyways (or else we
* missed a bytes_cur update on a list modification), or it
* *was* 0 and we emptied it ourselves. Either way, it should
* be empty now.
*/
assert(edata_list_active_empty(&bin->freelist));
}
malloc_mutex_unlock(tsdn, &shard->mtx);
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
}
static void static void
sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
edata_t *edata) { edata_t *edata) {

View File

@ -134,14 +134,17 @@ TEST_BEGIN(test_reuse) {
*/ */
tsdn_t *tsdn = TSDN_NULL; tsdn_t *tsdn = TSDN_NULL;
/* /*
* 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
* able to get to 30 pages in the cache before triggering a flush. * able to get to 33 pages in the cache before triggering a flush. We
* set the flush liimt to twice this amount, to avoid accidentally
* triggering a flush caused by the batch-allocation down the cache fill
* pathway disrupting ordering.
*/ */
enum { NALLOCS = 10 }; enum { NALLOCS = 11 };
edata_t *one_page[NALLOCS]; edata_t *one_page[NALLOCS];
edata_t *two_page[NALLOCS]; edata_t *two_page[NALLOCS];
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE, sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
/* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE); /* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
for (int i = 0; i < NALLOCS; i++) { for (int i = 0; i < NALLOCS; i++) {
one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
/* zero */ false); /* zero */ false);
@ -150,7 +153,9 @@ TEST_BEGIN(test_reuse) {
/* zero */ false); /* zero */ false);
expect_ptr_not_null(one_page[i], "Unexpected alloc failure"); expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
} }
expect_zu_eq(2 * NALLOCS, ta.alloc_count, expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
expect_zu_le(2 * NALLOCS, max_allocs,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -164,7 +169,7 @@ TEST_BEGIN(test_reuse) {
for (int i = NALLOCS - 1; i >= 0; i--) { for (int i = NALLOCS - 1; i >= 0; i--) {
pai_dalloc(tsdn, &sec.pai, two_page[i]); pai_dalloc(tsdn, &sec.pai, two_page[i]);
} }
expect_zu_eq(2 * NALLOCS, ta.alloc_count, expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -182,7 +187,7 @@ TEST_BEGIN(test_reuse) {
expect_ptr_eq(two_page[i], alloc2, expect_ptr_eq(two_page[i], alloc2,
"Got unexpected allocation"); "Got unexpected allocation");
} }
expect_zu_eq(2 * NALLOCS, ta.alloc_count, expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -198,7 +203,12 @@ TEST_BEGIN(test_auto_flush) {
tsdn_t *tsdn = TSDN_NULL; tsdn_t *tsdn = TSDN_NULL;
/* /*
* 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
* able to get to 30 pages in the cache before triggering a flush. * able to get to 30 pages in the cache before triggering a flush. The
* choice of NALLOCS here is chosen to match the batch allocation
* default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
* empty, even in the presence of batch allocation on fill).
* Eventually, once our allocation batching strategies become smarter,
* this should change.
*/ */
enum { NALLOCS = 10 }; enum { NALLOCS = 10 };
edata_t *extra_alloc; edata_t *extra_alloc;
@ -212,7 +222,8 @@ TEST_BEGIN(test_auto_flush) {
} }
extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false); extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
expect_ptr_not_null(extra_alloc, "Unexpected alloc failure"); expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
expect_zu_eq(NALLOCS + 1, ta.alloc_count, size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
expect_zu_le(NALLOCS + 1, max_allocs,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -220,7 +231,7 @@ TEST_BEGIN(test_auto_flush) {
for (int i = 0; i < NALLOCS; i++) { for (int i = 0; i < NALLOCS; i++) {
pai_dalloc(tsdn, &sec.pai, allocs[i]); pai_dalloc(tsdn, &sec.pai, allocs[i]);
} }
expect_zu_eq(NALLOCS + 1, ta.alloc_count, expect_zu_le(NALLOCS + 1, max_allocs,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -232,7 +243,7 @@ TEST_BEGIN(test_auto_flush) {
* right now. * right now.
*/ */
pai_dalloc(tsdn, &sec.pai, extra_alloc); pai_dalloc(tsdn, &sec.pai, extra_alloc);
expect_zu_eq(NALLOCS + 1, ta.alloc_count, expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of (non-batch) deallocations"); "Incorrect number of (non-batch) deallocations");
@ -253,7 +264,7 @@ do_disable_flush_test(bool is_disable) {
/* See the note above -- we can't use the real tsd. */ /* See the note above -- we can't use the real tsd. */
tsdn_t *tsdn = TSDN_NULL; tsdn_t *tsdn = TSDN_NULL;
enum { NALLOCS = 10 }; enum { NALLOCS = 11 };
edata_t *allocs[NALLOCS]; edata_t *allocs[NALLOCS];
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
/* bytes_max */ NALLOCS * PAGE); /* bytes_max */ NALLOCS * PAGE);
@ -266,8 +277,9 @@ do_disable_flush_test(bool is_disable) {
for (int i = 0; i < NALLOCS - 1; i++) { for (int i = 0; i < NALLOCS - 1; i++) {
pai_dalloc(tsdn, &sec.pai, allocs[i]); pai_dalloc(tsdn, &sec.pai, allocs[i]);
} }
expect_zu_eq(NALLOCS, ta.alloc_count, size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
"Incorrect number of allocations");
expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
@ -277,12 +289,13 @@ do_disable_flush_test(bool is_disable) {
sec_flush(tsdn, &sec); sec_flush(tsdn, &sec);
} }
expect_zu_eq(NALLOCS, ta.alloc_count, expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(0, ta.dalloc_count, expect_zu_eq(0, ta.dalloc_count,
"Incorrect number of (non-batch) deallocations"); "Incorrect number of (non-batch) deallocations");
expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count, expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
"Incorrect number of batch deallocations"); "Incorrect number of batch deallocations");
size_t old_dalloc_batch_count = ta.dalloc_batch_count;
/* /*
* If we free into a disabled SEC, it should forward to the fallback. * If we free into a disabled SEC, it should forward to the fallback.
@ -290,11 +303,11 @@ do_disable_flush_test(bool is_disable) {
*/ */
pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]); pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
expect_zu_eq(NALLOCS, ta.alloc_count, expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
"Incorrect number of allocations"); "Incorrect number of allocations");
expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count, expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
"Incorrect number of (non-batch) deallocations"); "Incorrect number of (non-batch) deallocations");
expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count, expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
"Incorrect number of batch deallocations"); "Incorrect number of batch deallocations");
} }
@ -404,7 +417,7 @@ expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
*/ */
stats.bytes = 123; stats.bytes = 123;
sec_stats_merge(tsdn, sec, &stats); sec_stats_merge(tsdn, sec, &stats);
assert_zu_eq(npages * PAGE + 123, stats.bytes, ""); assert_zu_le(npages * PAGE + 123, stats.bytes, "");
} }
TEST_BEGIN(test_stats_simple) { TEST_BEGIN(test_stats_simple) {
@ -417,7 +430,7 @@ TEST_BEGIN(test_stats_simple) {
enum { enum {
NITERS = 100, NITERS = 100,
FLUSH_PAGES = 10, FLUSH_PAGES = 20,
}; };
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
@ -470,26 +483,22 @@ TEST_BEGIN(test_stats_auto_flush) {
for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) { for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
/* zero */ false); /* zero */ false);
expect_stats_pages(tsdn, &sec, 0);
} }
for (size_t i = 0; i < FLUSH_PAGES; i++) { for (size_t i = 0; i < FLUSH_PAGES; i++) {
pai_dalloc(tsdn, &sec.pai, allocs[i]); pai_dalloc(tsdn, &sec.pai, allocs[i]);
expect_stats_pages(tsdn, &sec, i + 1);
} }
pai_dalloc(tsdn, &sec.pai, extra_alloc0); pai_dalloc(tsdn, &sec.pai, extra_alloc0);
/* The last dalloc should have triggered a flush. */
expect_stats_pages(tsdn, &sec, 0);
/* Flush the remaining pages; stats should still work. */ /* Flush the remaining pages; stats should still work. */
for (size_t i = 0; i < FLUSH_PAGES; i++) { for (size_t i = 0; i < FLUSH_PAGES; i++) {
pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]); pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
expect_stats_pages(tsdn, &sec, i + 1);
} }
pai_dalloc(tsdn, &sec.pai, extra_alloc1); pai_dalloc(tsdn, &sec.pai, extra_alloc1);
/* The last dalloc should have triggered a flush, again. */
expect_stats_pages(tsdn, &sec, 0); expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
- ta.dalloc_count - ta.dalloc_batch_count);
} }
TEST_END TEST_END