SEC: Reduce lock hold times.
Only flush a subset of extents during flushing, and drop the lock while doing so.
This commit is contained in:
parent
1944ebbe7f
commit
bf448d7a5a
@ -8,13 +8,9 @@
|
||||
* Small extent cache.
|
||||
*
|
||||
* This includes some utilities to cache small extents. We have a per-pszind
|
||||
* bin with its own lock and edata heap (including only extents of that size).
|
||||
* We don't try to do any coalescing of extents (since it would require
|
||||
* cross-bin locks). As a result, we need to be careful about fragmentation.
|
||||
* As a gesture in that direction, we limit the size of caches, apply first-fit
|
||||
* within the bins, and, when flushing a bin, flush all of its extents rather
|
||||
* than just those up to some threshold. When we allocate again, we'll get a
|
||||
* chance to move to better ones.
|
||||
* bin with its own list of extents of that size. We don't try to do any
|
||||
* coalescing of extents (since it would in general require cross-shard locks or
|
||||
* knowledge of the underlying PAI implementation).
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -46,6 +42,19 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
|
||||
dst->bytes += src->bytes;
|
||||
}
|
||||
|
||||
/* A collections of free extents, all of the same size. */
|
||||
typedef struct sec_bin_s sec_bin_t;
|
||||
struct sec_bin_s {
|
||||
/*
|
||||
* Number of bytes in this particular bin (as opposed to the
|
||||
* sec_shard_t's bytes_cur. This isn't user visible or reported in
|
||||
* stats; rather, it allows us to quickly determine the change in the
|
||||
* centralized counter when flushing.
|
||||
*/
|
||||
size_t bytes_cur;
|
||||
edata_list_active_t freelist;
|
||||
};
|
||||
|
||||
typedef struct sec_shard_s sec_shard_t;
|
||||
struct sec_shard_s {
|
||||
/*
|
||||
@ -64,8 +73,11 @@ struct sec_shard_s {
|
||||
* hooks are installed.
|
||||
*/
|
||||
bool enabled;
|
||||
edata_list_active_t freelist[SEC_NPSIZES];
|
||||
sec_bin_t bins[SEC_NPSIZES];
|
||||
/* Number of bytes in all bins in the shard. */
|
||||
size_t bytes_cur;
|
||||
/* The next pszind to flush in the flush-some pathways. */
|
||||
pszind_t to_flush_next;
|
||||
};
|
||||
|
||||
typedef struct sec_s sec_t;
|
||||
@ -83,6 +95,18 @@ struct sec_s {
|
||||
* the bins in that shard to be flushed.
|
||||
*/
|
||||
size_t bytes_max;
|
||||
/*
|
||||
* The number of bytes (in all bins) we flush down to when we exceed
|
||||
* bytes_cur. We want this to be less than bytes_cur, because
|
||||
* otherwise we could get into situations where a shard undergoing
|
||||
* net-deallocation keeps bytes_cur very near to bytes_max, so that
|
||||
* most deallocations get immediately forwarded to the underlying PAI
|
||||
* implementation, defeating the point of the SEC.
|
||||
*
|
||||
* Currently this is just set to bytes_max / 2, but eventually can be
|
||||
* configurable.
|
||||
*/
|
||||
size_t bytes_after_flush;
|
||||
|
||||
/*
|
||||
* We don't necessarily always use all the shards; requests are
|
||||
|
87
src/sec.c
87
src/sec.c
@ -11,7 +11,14 @@ static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
|
||||
size_t old_size, size_t new_size);
|
||||
static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
|
||||
|
||||
bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
|
||||
static void
|
||||
sec_bin_init(sec_bin_t *bin) {
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_init(&bin->freelist);
|
||||
}
|
||||
|
||||
bool
|
||||
sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
|
||||
size_t bytes_max) {
|
||||
if (nshards > SEC_NSHARDS_MAX) {
|
||||
nshards = SEC_NSHARDS_MAX;
|
||||
@ -25,9 +32,10 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
|
||||
}
|
||||
shard->enabled = true;
|
||||
for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
|
||||
edata_list_active_init(&shard->freelist[j]);
|
||||
sec_bin_init(&shard->bins[j]);
|
||||
}
|
||||
shard->bytes_cur = 0;
|
||||
shard->to_flush_next = 0;
|
||||
}
|
||||
sec->fallback = fallback;
|
||||
sec->alloc_max = alloc_max;
|
||||
@ -36,6 +44,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
|
||||
}
|
||||
|
||||
sec->bytes_max = bytes_max;
|
||||
sec->bytes_after_flush = bytes_max / 2;
|
||||
sec->nshards = nshards;
|
||||
|
||||
/*
|
||||
@ -85,9 +94,12 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
if (!shard->enabled) {
|
||||
return NULL;
|
||||
}
|
||||
edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
|
||||
sec_bin_t *bin = &shard->bins[pszind];
|
||||
edata_t *edata = edata_list_active_first(&bin->freelist);
|
||||
if (edata != NULL) {
|
||||
edata_list_active_remove(&shard->freelist[pszind], edata);
|
||||
edata_list_active_remove(&bin->freelist, edata);
|
||||
assert(edata_size_get(edata) <= bin->bytes_cur);
|
||||
bin->bytes_cur -= edata_size_get(edata);
|
||||
assert(edata_size_get(edata) <= shard->bytes_cur);
|
||||
shard->bytes_cur -= edata_size_get(edata);
|
||||
}
|
||||
@ -135,30 +147,75 @@ sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
|
||||
}
|
||||
|
||||
static void
|
||||
sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
shard->bytes_cur = 0;
|
||||
edata_list_active_t to_flush;
|
||||
edata_list_active_init(&to_flush);
|
||||
for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
|
||||
edata_list_active_concat(&to_flush, &shard->freelist[i]);
|
||||
sec_bin_t *bin = &shard->bins[i];
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_concat(&to_flush, &bin->freelist);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ordinarily we would try to avoid doing the batch deallocation while
|
||||
* holding the shard mutex, but the flush_all pathways only happen when
|
||||
* we're disabling the HPA or resetting the arena, both of which are
|
||||
* rare pathways.
|
||||
*/
|
||||
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
|
||||
}
|
||||
|
||||
static void
|
||||
sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
edata_list_active_t to_flush;
|
||||
edata_list_active_init(&to_flush);
|
||||
while (shard->bytes_cur > sec->bytes_after_flush) {
|
||||
/* Pick a victim. */
|
||||
sec_bin_t *bin = &shard->bins[shard->to_flush_next];
|
||||
|
||||
/* Update our victim-picking state. */
|
||||
shard->to_flush_next++;
|
||||
if (shard->to_flush_next == SEC_NPSIZES) {
|
||||
shard->to_flush_next = 0;
|
||||
}
|
||||
|
||||
assert(shard->bytes_cur >= bin->bytes_cur);
|
||||
if (bin->bytes_cur != 0) {
|
||||
shard->bytes_cur -= bin->bytes_cur;
|
||||
bin->bytes_cur = 0;
|
||||
edata_list_active_concat(&to_flush, &bin->freelist);
|
||||
}
|
||||
/*
|
||||
* Either bin->bytes_cur was 0, in which case we didn't touch
|
||||
* the bin list but it should be empty anyways (or else we
|
||||
* missed a bytes_cur update on a list modification), or it
|
||||
* *was* 0 and we emptied it ourselves. Either way, it should
|
||||
* be empty now.
|
||||
*/
|
||||
assert(edata_list_active_empty(&bin->freelist));
|
||||
}
|
||||
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
|
||||
}
|
||||
|
||||
static void
|
||||
sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
edata_t *edata) {
|
||||
malloc_mutex_assert_owner(tsdn, &shard->mtx);
|
||||
assert(shard->bytes_cur <= sec->bytes_max);
|
||||
size_t size = edata_size_get(edata);
|
||||
pszind_t pszind = sz_psz2ind(size);
|
||||
/*
|
||||
* Prepending here results in FIFO allocation per bin, which seems
|
||||
* Prepending here results in LIFO allocation per bin, which seems
|
||||
* reasonable.
|
||||
*/
|
||||
edata_list_active_prepend(&shard->freelist[pszind], edata);
|
||||
sec_bin_t *bin = &shard->bins[pszind];
|
||||
edata_list_active_prepend(&bin->freelist, edata);
|
||||
bin->bytes_cur += size;
|
||||
shard->bytes_cur += size;
|
||||
if (shard->bytes_cur > sec->bytes_max) {
|
||||
/*
|
||||
@ -170,7 +227,10 @@ sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
|
||||
* in the backing allocator). This has the extra advantage of
|
||||
* not requiring advanced cache balancing strategies.
|
||||
*/
|
||||
sec_do_flush_locked(tsdn, sec, shard);
|
||||
sec_flush_some_and_unlock(tsdn, sec, shard);
|
||||
malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
|
||||
} else {
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -184,8 +244,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
|
||||
sec_shard_t *shard = sec_shard_pick(tsdn, sec);
|
||||
malloc_mutex_lock(tsdn, &shard->mtx);
|
||||
if (shard->enabled) {
|
||||
sec_shard_dalloc_locked(tsdn, sec, shard, edata);
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
|
||||
} else {
|
||||
malloc_mutex_unlock(tsdn, &shard->mtx);
|
||||
pai_dalloc(tsdn, sec->fallback, edata);
|
||||
@ -196,7 +255,7 @@ void
|
||||
sec_flush(tsdn_t *tsdn, sec_t *sec) {
|
||||
for (size_t i = 0; i < sec->nshards; i++) {
|
||||
malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
|
||||
sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
|
||||
sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
|
||||
malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
|
||||
}
|
||||
}
|
||||
@ -206,7 +265,7 @@ sec_disable(tsdn_t *tsdn, sec_t *sec) {
|
||||
for (size_t i = 0; i < sec->nshards; i++) {
|
||||
malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
|
||||
sec->shards[i].enabled = false;
|
||||
sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
|
||||
sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
|
||||
malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
|
||||
}
|
||||
}
|
||||
|
@ -200,8 +200,11 @@ TEST_BEGIN(test_auto_flush) {
|
||||
expect_zu_eq(0, ta.dalloc_count,
|
||||
"Incorrect number of allocations");
|
||||
/*
|
||||
* Free the extra allocation; this should trigger a flush of all
|
||||
* extents in the cache.
|
||||
* Free the extra allocation; this should trigger a flush. The internal
|
||||
* flushing logic is allowed to get complicated; for now, we rely on our
|
||||
* whitebox knowledge of the fact that the SEC flushes bins in their
|
||||
* entirety when it decides to do so, and it has only one bin active
|
||||
* right now.
|
||||
*/
|
||||
pai_dalloc(tsdn, &sec.pai, extra_alloc);
|
||||
expect_zu_eq(NALLOCS + 1, ta.alloc_count,
|
||||
|
Loading…
Reference in New Issue
Block a user