SEC: Use batch fills.
Currently, this doesn't help much, since no PAI implementation supports flushing. This will change in subsequent commits.
This commit is contained in:
parent
480f3b11cd
commit
cdae6706a6
@ -45,6 +45,24 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
|
||||
/* A collections of free extents, all of the same size. */
|
||||
typedef struct sec_bin_s sec_bin_t;
|
||||
struct sec_bin_s {
|
||||
/*
|
||||
* When we fail to fulfill an allocation, we do a batch-alloc on the
|
||||
* underlying allocator to fill extra items, as well. We drop the SEC
|
||||
* lock while doing so, to allow operations on other bins to succeed.
|
||||
* That introduces the possibility of other threads also trying to
|
||||
* allocate out of this bin, failing, and also going to the backing
|
||||
* allocator. To avoid a thundering herd problem in which lots of
|
||||
* threads do batch allocs and overfill this bin as a result, we only
|
||||
* allow one batch allocation at a time for a bin. This bool tracks
|
||||
* whether or not some thread is already batch allocating.
|
||||
*
|
||||
* Eventually, the right answer may be a smarter sharding policy for the
|
||||
* bins (e.g. a mutex per bin, which would also be more scalable
|
||||
* generally; the batch-allocating thread could hold it while
|
||||
* batch-allocating).
|
||||
*/
|
||||
bool being_batch_filled;
|
||||
|
||||
/*
|
||||
* Number of bytes in this particular bin (as opposed to the
|
||||
* sec_shard_t's bytes_cur. This isn't user visible or reported in
|
||||
@ -108,6 +126,16 @@ struct sec_s {
|
||||
*/
|
||||
size_t bytes_after_flush;
|
||||
|
||||
/*
|
||||
* When we can't satisfy an allocation out of the SEC because there are
|
||||
* no available ones cached, we allocate multiple of that size out of
|
||||
* the fallback allocator. Eventually we might want to do something
|
||||
* cleverer, but for now we just grab a fixed number.
|
||||
*
|
||||
* For now, just the constant 4. Eventually, it should be configurable.
|
||||
*/
|
||||
size_t batch_fill_extra;
|
||||
|
||||
/*
|
||||
* We don't necessarily always use all the shards; requests are
|
||||
* distributed across shards [0, nshards - 1).
|
||||
|
147
src/sec.c
147
src/sec.c
@ -13,6 +13,7 @@ static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
|
||||
|
||||
static void
|
||||
sec_bin_init(sec_bin_t *bin) {
|
||||
bin->being_batch_filled = false;
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_init(&bin->freelist);
|
||||
}
|
||||
@ -45,6 +46,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
|
||||
|
||||
sec->bytes_max = bytes_max;
|
||||
sec->bytes_after_flush = bytes_max / 2;
|
||||
sec->batch_fill_extra = 4;
|
||||
sec->nshards = nshards;
|
||||
|
||||
/*
|
||||
@ -88,14 +90,52 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
|
||||
return &sec->shards[*idxp];
|
||||
}
|
||||
|
||||
/*
|
||||
* Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
|
||||
* empty cache, we'll try to fill it, which can push the shard over it's limit.
|
||||
*/
|
||||
static void
|
||||
sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
edata_list_active_t to_flush;
|
||||
edata_list_active_init(&to_flush);
|
||||
while (shard->bytes_cur > sec->bytes_after_flush) {
|
||||
/* Pick a victim. */
|
||||
sec_bin_t *bin = &shard->bins[shard->to_flush_next];
|
||||
|
||||
/* Update our victim-picking state. */
|
||||
shard->to_flush_next++;
|
||||
if (shard->to_flush_next == SEC_NPSIZES) {
|
||||
shard->to_flush_next = 0;
|
||||
}
|
||||
|
||||
assert(shard->bytes_cur >= bin->bytes_cur);
|
||||
if (bin->bytes_cur != 0) {
|
||||
shard->bytes_cur -= bin->bytes_cur;
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_concat(&to_flush, &bin->freelist);
|
||||
}
|
||||
/*
|
||||
* Either bin->bytes_cur was 0, in which case we didn't touch
|
||||
* the bin list but it should be empty anyways (or else we
|
||||
* missed a bytes_cur update on a list modification), or it
|
||||
* *was* 0 and we emptied it ourselves. Either way, it should
|
||||
* be empty now.
|
||||
*/
|
||||
assert(edata_list_active_empty(&bin->freelist));
|
||||
}
|
||||
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
|
||||
}
|
||||
|
||||
static edata_t *
|
||||
sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
pszind_t pszind) {
|
||||
sec_bin_t *bin) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
if (!shard->enabled) {
|
||||
return NULL;
|
||||
}
|
||||
sec_bin_t *bin = &shard->bins[pszind];
|
||||
edata_t *edata = edata_list_active_first(&bin->freelist);
|
||||
if (edata != NULL) {
|
||||
edata_list_active_remove(&bin->freelist, edata);
|
||||
@ -107,6 +147,50 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
return edata;
|
||||
}
|
||||
|
||||
static edata_t *
|
||||
sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
sec_bin_t *bin, size_t size) {
|
||||
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
|
||||
|
||||
edata_list_active_t result;
|
||||
edata_list_active_init(&result);
|
||||
size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
|
||||
1 + sec->batch_fill_extra, &result);
|
||||
|
||||
edata_t *ret = edata_list_active_first(&result);
|
||||
if (ret != NULL) {
|
||||
edata_list_active_remove(&result, ret);
|
||||
}
|
||||
|
||||
malloc_mutex_lock(tsdn, &shard->mtx);
|
||||
bin->being_batch_filled = false;
|
||||
/*
|
||||
* Handle the easy case first: nothing to cache. Note that this can
|
||||
* only happen in case of OOM, since sec_alloc checks the expected
|
||||
* number of allocs, and doesn't bother going down the batch_fill
|
||||
* pathway if there won't be anything left to cache. So to be in this
|
||||
* code path, we must have asked for > 1 alloc, but only gotten 1 back.
|
||||
*/
|
||||
if (nalloc <= 1) {
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t new_cached_bytes = (nalloc - 1) * size;
|
||||
|
||||
edata_list_active_concat(&bin->freelist, &result);
|
||||
bin->bytes_cur += new_cached_bytes;
|
||||
shard->bytes_cur += new_cached_bytes;
|
||||
|
||||
if (shard->bytes_cur > sec->bytes_max) {
|
||||
sec_flush_some_and_unlock(tsdn, sec, shard);
|
||||
} else {
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static edata_t *
|
||||
sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
@ -119,16 +203,26 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
|
||||
}
|
||||
pszind_t pszind = sz_psz2ind(size);
|
||||
sec_shard_t *shard = sec_shard_pick(tsdn, sec);
|
||||
sec_bin_t *bin = &shard->bins[pszind];
|
||||
bool do_batch_fill = false;
|
||||
|
||||
malloc_mutex_lock(tsdn, &shard->mtx);
|
||||
edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
|
||||
edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
|
||||
if (edata == NULL) {
|
||||
if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
|
||||
bin->being_batch_filled = true;
|
||||
do_batch_fill = true;
|
||||
}
|
||||
}
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
if (edata == NULL) {
|
||||
/*
|
||||
* See the note in dalloc, below; really, we should add a
|
||||
* batch_alloc method to the PAI and get more than one extent at
|
||||
* a time.
|
||||
*/
|
||||
edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
|
||||
if (do_batch_fill) {
|
||||
edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
|
||||
size);
|
||||
} else {
|
||||
edata = pai_alloc(tsdn, sec->fallback, size, alignment,
|
||||
zero);
|
||||
}
|
||||
}
|
||||
return edata;
|
||||
}
|
||||
@ -168,41 +262,6 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
|
||||
}
|
||||
|
||||
static void
|
||||
sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
edata_list_active_t to_flush;
|
||||
edata_list_active_init(&to_flush);
|
||||
while (shard->bytes_cur > sec->bytes_after_flush) {
|
||||
/* Pick a victim. */
|
||||
sec_bin_t *bin = &shard->bins[shard->to_flush_next];
|
||||
|
||||
/* Update our victim-picking state. */
|
||||
shard->to_flush_next++;
|
||||
if (shard->to_flush_next == SEC_NPSIZES) {
|
||||
shard->to_flush_next = 0;
|
||||
}
|
||||
|
||||
assert(shard->bytes_cur >= bin->bytes_cur);
|
||||
if (bin->bytes_cur != 0) {
|
||||
shard->bytes_cur -= bin->bytes_cur;
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_concat(&to_flush, &bin->freelist);
|
||||
}
|
||||
/*
|
||||
* Either bin->bytes_cur was 0, in which case we didn't touch
|
||||
* the bin list but it should be empty anyways (or else we
|
||||
* missed a bytes_cur update on a list modification), or it
|
||||
* *was* 0 and we emptied it ourselves. Either way, it should
|
||||
* be empty now.
|
||||
*/
|
||||
assert(edata_list_active_empty(&bin->freelist));
|
||||
}
|
||||
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
|
||||
}
|
||||
|
||||
static void
|
||||
sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
edata_t *edata) {
|
||||
|
@ -134,14 +134,17 @@ TEST_BEGIN(test_reuse) {
|
||||
*/
|
||||
tsdn_t *tsdn = TSDN_NULL;
|
||||
/*
|
||||
* 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
|
||||
* able to get to 30 pages in the cache before triggering a flush.
|
||||
* 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
|
||||
* able to get to 33 pages in the cache before triggering a flush. We
|
||||
* set the flush liimt to twice this amount, to avoid accidentally
|
||||
* triggering a flush caused by the batch-allocation down the cache fill
|
||||
* pathway disrupting ordering.
|
||||
*/
|
||||
enum { NALLOCS = 10 };
|
||||
enum { NALLOCS = 11 };
|
||||
edata_t *one_page[NALLOCS];
|
||||
edata_t *two_page[NALLOCS];
|
||||
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
|
||||
/* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
|
||||
/* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
|
||||
for (int i = 0; i < NALLOCS; i++) {
|
||||
one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
|
||||
/* zero */ false);
|
||||
@ -150,7 +153,9 @@ TEST_BEGIN(test_reuse) {
|
||||
/* zero */ false);
|
||||
expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
|
||||
}
|
||||
expect_zu_eq(2 * NALLOCS, ta.alloc_count,
|
||||
expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
|
||||
size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
|
||||
expect_zu_le(2 * NALLOCS, max_allocs,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
@ -164,7 +169,7 @@ TEST_BEGIN(test_reuse) {
|
||||
for (int i = NALLOCS - 1; i >= 0; i--) {
|
||||
pai_dalloc(tsdn, &sec.pai, two_page[i]);
|
||||
}
|
||||
expect_zu_eq(2 * NALLOCS, ta.alloc_count,
|
||||
expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
@ -182,7 +187,7 @@ TEST_BEGIN(test_reuse) {
|
||||
expect_ptr_eq(two_page[i], alloc2,
|
||||
"Got unexpected allocation");
|
||||
}
|
||||
expect_zu_eq(2 * NALLOCS, ta.alloc_count,
|
||||
expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
@ -198,7 +203,12 @@ TEST_BEGIN(test_auto_flush) {
|
||||
tsdn_t *tsdn = TSDN_NULL;
|
||||
/*
|
||||
* 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
|
||||
* able to get to 30 pages in the cache before triggering a flush.
|
||||
* able to get to 30 pages in the cache before triggering a flush. The
|
||||
* choice of NALLOCS here is chosen to match the batch allocation
|
||||
* default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
|
||||
* empty, even in the presence of batch allocation on fill).
|
||||
* Eventually, once our allocation batching strategies become smarter,
|
||||
* this should change.
|
||||
*/
|
||||
enum { NALLOCS = 10 };
|
||||
edata_t *extra_alloc;
|
||||
@ -212,7 +222,8 @@ TEST_BEGIN(test_auto_flush) {
|
||||
}
|
||||
extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
|
||||
expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
|
||||
expect_zu_eq(NALLOCS + 1, ta.alloc_count,
|
||||
size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
|
||||
expect_zu_le(NALLOCS + 1, max_allocs,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
@ -220,7 +231,7 @@ TEST_BEGIN(test_auto_flush) {
|
||||
for (int i = 0; i < NALLOCS; i++) {
|
||||
pai_dalloc(tsdn, &sec.pai, allocs[i]);
|
||||
}
|
||||
expect_zu_eq(NALLOCS + 1, ta.alloc_count,
|
||||
expect_zu_le(NALLOCS + 1, max_allocs,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
@ -232,7 +243,7 @@ TEST_BEGIN(test_auto_flush) {
|
||||
* right now.
|
||||
*/
|
||||
pai_dalloc(tsdn, &sec.pai, extra_alloc);
|
||||
expect_zu_eq(NALLOCS + 1, ta.alloc_count,
|
||||
expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of (non-batch) deallocations");
|
||||
@ -253,7 +264,7 @@ do_disable_flush_test(bool is_disable) {
|
||||
/* See the note above -- we can't use the real tsd. */
|
||||
tsdn_t *tsdn = TSDN_NULL;
|
||||
|
||||
enum { NALLOCS = 10 };
|
||||
enum { NALLOCS = 11 };
|
||||
edata_t *allocs[NALLOCS];
|
||||
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
|
||||
/* bytes_max */ NALLOCS * PAGE);
|
||||
@ -266,8 +277,9 @@ do_disable_flush_test(bool is_disable) {
|
||||
for (int i = 0; i < NALLOCS - 1; i++) {
|
||||
pai_dalloc(tsdn, &sec.pai, allocs[i]);
|
||||
}
|
||||
expect_zu_eq(NALLOCS, ta.alloc_count,
|
||||
"Incorrect number of allocations");
|
||||
size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
|
||||
|
||||
expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
|
||||
@ -277,12 +289,13 @@ do_disable_flush_test(bool is_disable) {
|
||||
sec_flush(tsdn, &sec);
|
||||
}
|
||||
|
||||
expect_zu_eq(NALLOCS, ta.alloc_count,
|
||||
expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of (non-batch) deallocations");
|
||||
expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
|
||||
expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
|
||||
"Incorrect number of batch deallocations");
|
||||
size_t old_dalloc_batch_count = ta.dalloc_batch_count;
|
||||
|
||||
/*
|
||||
* If we free into a disabled SEC, it should forward to the fallback.
|
||||
@ -290,11 +303,11 @@ do_disable_flush_test(bool is_disable) {
|
||||
*/
|
||||
pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
|
||||
|
||||
expect_zu_eq(NALLOCS, ta.alloc_count,
|
||||
expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
|
||||
"Incorrect number of allocations");
|
||||
expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
|
||||
"Incorrect number of (non-batch) deallocations");
|
||||
expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
|
||||
expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
|
||||
"Incorrect number of batch deallocations");
|
||||
}
|
||||
|
||||
@ -404,7 +417,7 @@ expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
|
||||
*/
|
||||
stats.bytes = 123;
|
||||
sec_stats_merge(tsdn, sec, &stats);
|
||||
assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
|
||||
assert_zu_le(npages * PAGE + 123, stats.bytes, "");
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_stats_simple) {
|
||||
@ -417,7 +430,7 @@ TEST_BEGIN(test_stats_simple) {
|
||||
|
||||
enum {
|
||||
NITERS = 100,
|
||||
FLUSH_PAGES = 10,
|
||||
FLUSH_PAGES = 20,
|
||||
};
|
||||
|
||||
sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
|
||||
@ -470,26 +483,22 @@ TEST_BEGIN(test_stats_auto_flush) {
|
||||
for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
|
||||
allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
|
||||
/* zero */ false);
|
||||
expect_stats_pages(tsdn, &sec, 0);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < FLUSH_PAGES; i++) {
|
||||
pai_dalloc(tsdn, &sec.pai, allocs[i]);
|
||||
expect_stats_pages(tsdn, &sec, i + 1);
|
||||
}
|
||||
pai_dalloc(tsdn, &sec.pai, extra_alloc0);
|
||||
/* The last dalloc should have triggered a flush. */
|
||||
expect_stats_pages(tsdn, &sec, 0);
|
||||
|
||||
/* Flush the remaining pages; stats should still work. */
|
||||
for (size_t i = 0; i < FLUSH_PAGES; i++) {
|
||||
pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
|
||||
expect_stats_pages(tsdn, &sec, i + 1);
|
||||
}
|
||||
|
||||
pai_dalloc(tsdn, &sec.pai, extra_alloc1);
|
||||
/* The last dalloc should have triggered a flush, again. */
|
||||
expect_stats_pages(tsdn, &sec, 0);
|
||||
|
||||
expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
|
||||
- ta.dalloc_count - ta.dalloc_batch_count);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user