From ea51e97bb893f560c70f42478d67c8159ee09b3d Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Thu, 15 Oct 2020 13:46:38 -0700 Subject: [PATCH] Add SEC module: a small extent cache. This can be used to take pressure off a more centralized, worse-sharded allocator without requiring a full break of the arena abstraction. --- Makefile.in | 2 + include/jemalloc/internal/cache_bin.h | 1 - include/jemalloc/internal/sec.h | 118 +++++ include/jemalloc/internal/witness.h | 2 + .../projects/vc2015/jemalloc/jemalloc.vcxproj | 1 + .../vc2015/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2017/jemalloc/jemalloc.vcxproj | 1 + .../vc2017/jemalloc/jemalloc.vcxproj.filters | 3 + src/sec.c | 263 +++++++++ test/unit/sec.c | 500 ++++++++++++++++++ 10 files changed, 893 insertions(+), 1 deletion(-) create mode 100644 include/jemalloc/internal/sec.h create mode 100644 src/sec.c create mode 100644 test/unit/sec.c diff --git a/Makefile.in b/Makefile.in index 67568f00..0136a40e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/rtree.c \ $(srcroot)src/safety_check.c \ $(srcroot)src/sc.c \ + $(srcroot)src/sec.c \ $(srcroot)src/stats.c \ $(srcroot)src/sz.c \ $(srcroot)src/tcache.c \ @@ -253,6 +254,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/rtree.c \ $(srcroot)test/unit/safety_check.c \ $(srcroot)test/unit/sc.c \ + $(srcroot)test/unit/sec.c \ $(srcroot)test/unit/seq.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_check.c \ diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index c016769d..0767862c 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -99,7 +99,6 @@ struct cache_bin_s { * array. */ uint16_t low_bits_empty; - }; /* diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h new file mode 100644 index 00000000..7c1465ed --- /dev/null +++ b/include/jemalloc/internal/sec.h @@ -0,0 +1,118 @@ +#ifndef JEMALLOC_INTERNAL_SEC_H +#define JEMALLOC_INTERNAL_SEC_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/pai.h" + +/* + * Small extent cache. + * + * This includes some utilities to cache small extents. We have a per-pszind + * bin with its own lock and edata heap (including only extents of that size). + * We don't try to do any coalescing of extents (since it would require + * cross-bin locks). As a result, we need to be careful about fragmentation. + * As a gesture in that direction, we limit the size of caches, apply first-fit + * within the bins, and, when flushing a bin, flush all of its extents rather + * than just those up to some threshold. When we allocate again, we'll get a + * chance to move to better ones. + */ + +/* + * This is a *small* extent cache, after all. Assuming 4k pages and an ngroup + * of 4, this allows caching of sizes up to 128k. + */ +#define SEC_NPSIZES 16 +/* + * For now, we put a cap on the number of SECs an arena can have. There's no + * reason it can't be dynamic; it's just inconvenient. This number of shards + * are embedded in the arenas, so there's a space / configurability tradeoff + * here. Eventually, we should probably dynamically allocate only however many + * we require. + */ +#define SEC_NSHARDS_MAX 8 + +/* + * For now, this is just one field; eventually, we'll probably want to get more + * fine-grained data out (like per-size class statistics). + */ +typedef struct sec_stats_s sec_stats_t; +struct sec_stats_s { + /* Sum of bytes_cur across all shards. */ + size_t bytes; +}; + +static inline void +sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) { + dst->bytes += src->bytes; +} + +typedef struct sec_shard_s sec_shard_t; +struct sec_shard_s { + /* + * We don't keep per-bin mutexes, even though that would allow more + * sharding; this allows global cache-eviction, which in turn allows for + * better balancing across free lists. + */ + malloc_mutex_t mtx; + /* + * A SEC may need to be shut down (i.e. flushed of its contents and + * prevented from further caching). To avoid tricky synchronization + * issues, we just track enabled-status in each shard, guarded by a + * mutex. In practice, this is only ever checked during brief races, + * since the arena-level atomic boolean tracking HPA enabled-ness means + * that we won't go down these pathways very often after custom extent + * hooks are installed. + */ + bool enabled; + edata_list_active_t freelist[SEC_NPSIZES]; + size_t bytes_cur; +}; + +typedef struct sec_s sec_t; +struct sec_s { + pai_t pai; + pai_t *fallback; + + /* + * We'll automatically refuse to cache any objects in this sec if + * they're larger than alloc_max bytes. + */ + size_t alloc_max; + /* + * Exceeding this amount of cached extents in a shard causes *all* of + * the shards in that bin to be flushed. + */ + size_t bytes_max; + + /* + * We don't necessarily always use all the shards; requests are + * distributed across shards [0, nshards - 1). + */ + size_t nshards; + sec_shard_t shards[SEC_NSHARDS_MAX]; +}; + +bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max, + size_t bytes_max); +void sec_flush(tsdn_t *tsdn, sec_t *sec); +void sec_disable(tsdn_t *tsdn, sec_t *sec); + +/* + * Morally, these two stats methods probably ought to be a single one (and the + * mutex_prof_data ought to live in the sec_stats_t. But splitting them apart + * lets them fit easily into the pa_shard stats framework (which also has this + * split), which simplifies the stats management. + */ +void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats); +void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec, + mutex_prof_data_t *mutex_prof_data); + +/* + * We use the arena lock ordering; these are acquired in phase 2 of forking, but + * should be acquired before the underlying allocator mutexes. + */ +void sec_prefork2(tsdn_t *tsdn, sec_t *sec); +void sec_postfork_parent(tsdn_t *tsdn, sec_t *sec); +void sec_postfork_child(tsdn_t *tsdn, sec_t *sec); + +#endif /* JEMALLOC_INTERNAL_SEC_H */ diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 686bf403..662907c8 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -44,6 +44,8 @@ enum witness_rank_e { WITNESS_RANK_DECAY = WITNESS_RANK_CORE, WITNESS_RANK_TCACHE_QL, + WITNESS_RANK_SEC_SHARD, + WITNESS_RANK_EXTENT_GROW, WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW, diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 46e497ac..f14f87ff 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -82,6 +82,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index f46a92fa..689a520c 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -130,6 +130,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index dbf6f95a..30c6b295 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -82,6 +82,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index f46a92fa..689a520c 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -130,6 +130,9 @@ Source Files + + Source Files + Source Files diff --git a/src/sec.c b/src/sec.c new file mode 100644 index 00000000..f3c906bc --- /dev/null +++ b/src/sec.c @@ -0,0 +1,263 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/sec.h" + +static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, + size_t alignment, bool zero); +static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool zero); +static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size); +static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); + +bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max, + size_t bytes_max) { + if (nshards > SEC_NSHARDS_MAX) { + nshards = SEC_NSHARDS_MAX; + } + for (size_t i = 0; i < nshards; i++) { + sec_shard_t *shard = &sec->shards[i]; + bool err = malloc_mutex_init(&shard->mtx, "sec_shard", + WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive); + if (err) { + return true; + } + shard->enabled = true; + for (pszind_t j = 0; j < SEC_NPSIZES; j++) { + edata_list_active_init(&shard->freelist[j]); + } + shard->bytes_cur = 0; + } + sec->fallback = fallback; + sec->alloc_max = alloc_max; + if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) { + sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1); + } + + sec->bytes_max = bytes_max; + sec->nshards = nshards; + + /* + * Initialize these last so that an improper use of an SEC whose + * initialization failed will segfault in an easy-to-spot way. + */ + sec->pai.alloc = &sec_alloc; + sec->pai.expand = &sec_expand; + sec->pai.shrink = &sec_shrink; + sec->pai.dalloc = &sec_dalloc; + + return false; +} + +static sec_shard_t * +sec_shard_pick(tsdn_t *tsdn, sec_t *sec) { + /* + * Eventually, we should implement affinity, tracking source shard using + * the edata_t's newly freed up fields. For now, just randomly + * distribute across all shards. + */ + if (tsdn_null(tsdn)) { + return &sec->shards[0]; + } + tsd_t *tsd = tsdn_tsd(tsdn); + /* + * Use the trick from Daniel Lemire's "A fast alternative to the modulo + * reduction. Use a 64 bit number to store 32 bits, since we'll + * deliberately overflow when we multiply by the number of shards. + */ + uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32); + uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32); + return &sec->shards[idx]; +} + +static edata_t * +sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, + pszind_t pszind) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + if (!shard->enabled) { + return NULL; + } + edata_t *edata = edata_list_active_first(&shard->freelist[pszind]); + if (edata != NULL) { + edata_list_active_remove(&shard->freelist[pszind], edata); + assert(edata_size_get(edata) <= shard->bytes_cur); + shard->bytes_cur -= edata_size_get(edata); + } + return edata; +} + +static edata_t * +sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) { + assert((size & PAGE_MASK) == 0); + + sec_t *sec = (sec_t *)self; + + if (zero || alignment > PAGE || sec->nshards == 0 + || size > sec->alloc_max) { + return pai_alloc(tsdn, sec->fallback, size, alignment, zero); + } + pszind_t pszind = sz_psz2ind(size); + sec_shard_t *shard = sec_shard_pick(tsdn, sec); + malloc_mutex_lock(tsdn, &shard->mtx); + edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind); + malloc_mutex_unlock(tsdn, &shard->mtx); + if (edata == NULL) { + /* + * See the note in dalloc, below; really, we should add a + * batch_alloc method to the PAI and get more than one extent at + * a time. + */ + edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero); + } + return edata; +} + +static bool +sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, + size_t new_size, bool zero) { + sec_t *sec = (sec_t *)self; + return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero); +} + +static bool +sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, + size_t new_size) { + sec_t *sec = (sec_t *)self; + return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size); +} + +static void +sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + shard->bytes_cur = 0; + edata_list_active_t to_flush; + edata_list_active_init(&to_flush); + for (pszind_t i = 0; i < SEC_NPSIZES; i++) { + edata_list_active_concat(&to_flush, &shard->freelist[i]); + } + /* + * A better way to do this would be to add a batch dalloc function to + * the pai_t. Practically, the current method turns into O(n) locks and + * unlocks at the fallback allocator. But some implementations (e.g. + * HPA) can straightforwardly do many deallocations in a single lock / + * unlock pair. + */ + while (!edata_list_active_empty(&to_flush)) { + edata_t *e = edata_list_active_first(&to_flush); + edata_list_active_remove(&to_flush, e); + pai_dalloc(tsdn, sec->fallback, e); + } +} + +static void +sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, + edata_t *edata) { + malloc_mutex_assert_owner(tsdn, &shard->mtx); + assert(shard->bytes_cur <= sec->bytes_max); + size_t size = edata_size_get(edata); + pszind_t pszind = sz_psz2ind(size); + /* + * Prepending here results in FIFO allocation per bin, which seems + * reasonable. + */ + edata_list_active_prepend(&shard->freelist[pszind], edata); + shard->bytes_cur += size; + if (shard->bytes_cur > sec->bytes_max) { + /* + * We've exceeded the shard limit. We make two nods in the + * direction of fragmentation avoidance: we flush everything in + * the shard, rather than one particular bin, and we hold the + * lock while flushing (in case one of the extents we flush is + * highly preferred from a fragmentation-avoidance perspective + * in the backing allocator). This has the extra advantage of + * not requiring advanced cache balancing strategies. + */ + sec_do_flush_locked(tsdn, sec, shard); + } +} + +static void +sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { + sec_t *sec = (sec_t *)self; + if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) { + pai_dalloc(tsdn, sec->fallback, edata); + return; + } + sec_shard_t *shard = sec_shard_pick(tsdn, sec); + malloc_mutex_lock(tsdn, &shard->mtx); + if (shard->enabled) { + sec_shard_dalloc_locked(tsdn, sec, shard, edata); + malloc_mutex_unlock(tsdn, &shard->mtx); + } else { + malloc_mutex_unlock(tsdn, &shard->mtx); + pai_dalloc(tsdn, sec->fallback, edata); + } +} + +void +sec_flush(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sec_do_flush_locked(tsdn, sec, &sec->shards[i]); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_disable(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sec->shards[i].enabled = false; + sec_do_flush_locked(tsdn, sec, &sec->shards[i]); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) { + size_t sum = 0; + for (size_t i = 0; i < sec->nshards; i++) { + /* + * We could save these lock acquisitions by making bytes_cur + * atomic, but stats collection is rare anyways and we expect + * the number and type of stats to get more interesting. + */ + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + sum += sec->shards[i].bytes_cur; + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } + stats->bytes += sum; +} + +void +sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec, + mutex_prof_data_t *mutex_prof_data) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_lock(tsdn, &sec->shards[i].mtx); + malloc_mutex_prof_accum(tsdn, mutex_prof_data, + &sec->shards[i].mtx); + malloc_mutex_unlock(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_prefork2(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_prefork(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx); + } +} + +void +sec_postfork_child(tsdn_t *tsdn, sec_t *sec) { + for (size_t i = 0; i < sec->nshards; i++) { + malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx); + } +} diff --git a/test/unit/sec.c b/test/unit/sec.c new file mode 100644 index 00000000..cb0c17d1 --- /dev/null +++ b/test/unit/sec.c @@ -0,0 +1,500 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/sec.h" + +typedef struct pai_test_allocator_s pai_test_allocator_t; +struct pai_test_allocator_s { + pai_t pai; + bool alloc_fail; + size_t alloc_count; + size_t dalloc_count; + /* + * We use a simple bump allocator as the implementation. This isn't + * *really* correct, since we may allow expansion into a subsequent + * allocation, but it's not like the SEC is really examining the + * pointers it gets back; this is mostly just helpful for debugging. + */ + uintptr_t next_ptr; + size_t expand_count; + bool expand_return_value; + size_t shrink_count; + bool shrink_return_value; +}; + +static inline edata_t * +pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size, + size_t alignment, bool zero) { + pai_test_allocator_t *ta = (pai_test_allocator_t *)self; + if (ta->alloc_fail) { + return NULL; + } + edata_t *edata = malloc(sizeof(edata_t)); + assert_ptr_not_null(edata, ""); + ta->next_ptr += alignment - 1; + edata_init(edata, /* arena_ind */ 0, + (void *)(ta->next_ptr & ~(alignment - 1)), size, + /* slab */ false, + /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero, + /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD); + ta->next_ptr += size; + ta->alloc_count++; + return edata; +} + +static bool +pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size, bool zero) { + pai_test_allocator_t *ta = (pai_test_allocator_t *)self; + ta->expand_count++; + return ta->expand_return_value; +} + +static bool +pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, + size_t old_size, size_t new_size) { + pai_test_allocator_t *ta = (pai_test_allocator_t *)self; + ta->shrink_count++; + return ta->shrink_return_value; +} + +static void +pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { + pai_test_allocator_t *ta = (pai_test_allocator_t *)self; + ta->dalloc_count++; + free(edata); +} + +static inline void +pai_test_allocator_init(pai_test_allocator_t *ta) { + ta->alloc_fail = false; + ta->alloc_count = 0; + ta->dalloc_count = 0; + /* Just don't start the edata at 0. */ + ta->next_ptr = 10 * PAGE; + ta->expand_count = 0; + ta->expand_return_value = false; + ta->shrink_count = 0; + ta->shrink_return_value = false; + ta->pai.alloc = &pai_test_allocator_alloc; + ta->pai.expand = &pai_test_allocator_expand; + ta->pai.shrink = &pai_test_allocator_shrink; + ta->pai.dalloc = &pai_test_allocator_dalloc; +} + +TEST_BEGIN(test_reuse) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* + * We can't use the "real" tsd, since we malloc within the test + * allocator hooks; we'd get lock inversion crashes. Eventually, we + * should have a way to mock tsds, but for now just don't do any + * lock-order checking. + */ + tsdn_t *tsdn = TSDN_NULL; + /* + * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be + * able to get to 30 pages in the cache before triggering a flush. + */ + enum { NALLOCS = 10 }; + edata_t *one_page[NALLOCS]; + edata_t *two_page[NALLOCS]; + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE, + /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE); + for (int i = 0; i < NALLOCS; i++) { + one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_ptr_not_null(one_page[i], "Unexpected alloc failure"); + two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE, + /* zero */ false); + expect_ptr_not_null(one_page[i], "Unexpected alloc failure"); + } + expect_zu_eq(2 * NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); + /* + * Free in a different order than we allocated, to make sure free-list + * separation works correctly. + */ + for (int i = NALLOCS - 1; i >= 0; i--) { + pai_dalloc(tsdn, &sec.pai, one_page[i]); + } + for (int i = NALLOCS - 1; i >= 0; i--) { + pai_dalloc(tsdn, &sec.pai, two_page[i]); + } + expect_zu_eq(2 * NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); + /* + * Check that the n'th most recent deallocated extent is returned for + * the n'th alloc request of a given size. + */ + for (int i = 0; i < NALLOCS; i++) { + edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE, + /* zero */ false); + expect_ptr_eq(one_page[i], alloc1, + "Got unexpected allocation"); + expect_ptr_eq(two_page[i], alloc2, + "Got unexpected allocation"); + } + expect_zu_eq(2 * NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); +} +TEST_END + + +TEST_BEGIN(test_auto_flush) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + /* + * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be + * able to get to 30 pages in the cache before triggering a flush. + */ + enum { NALLOCS = 10 }; + edata_t *extra_alloc; + edata_t *allocs[NALLOCS]; + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, + /* bytes_max */ NALLOCS * PAGE); + for (int i = 0; i < NALLOCS; i++) { + allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_ptr_not_null(allocs[i], "Unexpected alloc failure"); + } + extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false); + expect_ptr_not_null(extra_alloc, "Unexpected alloc failure"); + expect_zu_eq(NALLOCS + 1, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); + /* Free until the SEC is full, but should not have flushed yet. */ + for (int i = 0; i < NALLOCS; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[i]); + } + expect_zu_eq(NALLOCS + 1, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); + /* + * Free the extra allocation; this should trigger a flush of all + * extents in the cache. + */ + pai_dalloc(tsdn, &sec.pai, extra_alloc); + expect_zu_eq(NALLOCS + 1, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(NALLOCS + 1, ta.dalloc_count, + "Incorrect number of deallocations"); +} +TEST_END + +/* + * A disable and a flush are *almost* equivalent; the only difference is what + * happens afterwards; disabling disallows all future caching as well. + */ +static void +do_disable_flush_test(bool is_disable) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + enum { NALLOCS = 10 }; + edata_t *allocs[NALLOCS]; + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, + /* bytes_max */ NALLOCS * PAGE); + for (int i = 0; i < NALLOCS; i++) { + allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_ptr_not_null(allocs[i], "Unexpected alloc failure"); + } + /* Free all but the last aloc. */ + for (int i = 0; i < NALLOCS - 1; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[i]); + } + expect_zu_eq(NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(0, ta.dalloc_count, + "Incorrect number of allocations"); + + if (is_disable) { + sec_disable(tsdn, &sec); + } else { + sec_flush(tsdn, &sec); + } + + expect_zu_eq(NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(NALLOCS - 1, ta.dalloc_count, + "Incorrect number of deallocations"); + + /* + * If we free into a disabled SEC, it should forward to the fallback. + * Otherwise, the SEC should accept the allocation. + */ + pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]); + + expect_zu_eq(NALLOCS, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count, + "Incorrect number of deallocations"); +} + +TEST_BEGIN(test_disable) { + do_disable_flush_test(/* is_disable */ true); +} +TEST_END + +TEST_BEGIN(test_flush) { + do_disable_flush_test(/* is_disable */ false); +} +TEST_END + +TEST_BEGIN(test_alloc_max_respected) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + size_t alloc_max = 2 * PAGE; + size_t attempted_alloc = 3 * PAGE; + + sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max, + /* bytes_max */ 1000 * PAGE); + + for (size_t i = 0; i < 100; i++) { + expect_zu_eq(i, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(i, ta.dalloc_count, + "Incorrect number of deallocations"); + edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc, + PAGE, /* zero */ false); + expect_ptr_not_null(edata, "Unexpected alloc failure"); + expect_zu_eq(i + 1, ta.alloc_count, + "Incorrect number of allocations"); + expect_zu_eq(i, ta.dalloc_count, + "Incorrect number of deallocations"); + pai_dalloc(tsdn, &sec.pai, edata); + } +} +TEST_END + +TEST_BEGIN(test_expand_shrink_delegate) { + /* + * Expand and shrink shouldn't affect sec state; they should just + * delegate to the fallback PAI. + */ + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE, + /* bytes_max */ 1000 * PAGE); + edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_ptr_not_null(edata, "Unexpected alloc failure"); + + bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE, + /* zero */ false); + expect_false(err, "Unexpected expand failure"); + expect_zu_eq(1, ta.expand_count, ""); + ta.expand_return_value = true; + err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE, + /* zero */ false); + expect_true(err, "Unexpected expand success"); + expect_zu_eq(2, ta.expand_count, ""); + + err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE); + expect_false(err, "Unexpected shrink failure"); + expect_zu_eq(1, ta.shrink_count, ""); + ta.shrink_return_value = true; + err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE); + expect_true(err, "Unexpected shrink success"); + expect_zu_eq(2, ta.shrink_count, ""); +} +TEST_END + +TEST_BEGIN(test_nshards_0) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE, + /* bytes_max */ 1000 * PAGE); + + edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + pai_dalloc(tsdn, &sec.pai, edata); + + /* Both operations should have gone directly to the fallback. */ + expect_zu_eq(1, ta.alloc_count, ""); + expect_zu_eq(1, ta.dalloc_count, ""); +} +TEST_END + +static void +expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) { + sec_stats_t stats; + /* + * Check that the stats merging accumulates rather than overwrites by + * putting some (made up) data there to begin with. + */ + stats.bytes = 123; + sec_stats_merge(tsdn, sec, &stats); + assert_zu_eq(npages * PAGE + 123, stats.bytes, ""); +} + +TEST_BEGIN(test_stats_simple) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + enum { + NITERS = 100, + FLUSH_PAGES = 10, + }; + + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, + /* bytes_max */ FLUSH_PAGES * PAGE); + + edata_t *allocs[FLUSH_PAGES]; + for (size_t i = 0; i < FLUSH_PAGES; i++) { + allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_stats_pages(tsdn, &sec, 0); + } + + /* Increase and decrease, without flushing. */ + for (size_t i = 0; i < NITERS; i++) { + for (size_t j = 0; j < FLUSH_PAGES / 2; j++) { + pai_dalloc(tsdn, &sec.pai, allocs[j]); + expect_stats_pages(tsdn, &sec, j + 1); + } + for (size_t j = 0; j < FLUSH_PAGES / 2; j++) { + allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1); + } + } +} +TEST_END + +TEST_BEGIN(test_stats_auto_flush) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + enum { + FLUSH_PAGES = 10, + }; + + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, + /* bytes_max */ FLUSH_PAGES * PAGE); + + edata_t *extra_alloc0; + edata_t *extra_alloc1; + edata_t *allocs[2 * FLUSH_PAGES]; + + extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false); + extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false); + + for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) { + allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_stats_pages(tsdn, &sec, 0); + } + + for (size_t i = 0; i < FLUSH_PAGES; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[i]); + expect_stats_pages(tsdn, &sec, i + 1); + } + pai_dalloc(tsdn, &sec.pai, extra_alloc0); + /* The last dalloc should have triggered a flush. */ + expect_stats_pages(tsdn, &sec, 0); + + /* Flush the remaining pages; stats should still work. */ + for (size_t i = 0; i < FLUSH_PAGES; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]); + expect_stats_pages(tsdn, &sec, i + 1); + } + + pai_dalloc(tsdn, &sec.pai, extra_alloc1); + /* The last dalloc should have triggered a flush, again. */ + expect_stats_pages(tsdn, &sec, 0); +} +TEST_END + +TEST_BEGIN(test_stats_manual_flush) { + pai_test_allocator_t ta; + pai_test_allocator_init(&ta); + sec_t sec; + + /* See the note above -- we can't use the real tsd. */ + tsdn_t *tsdn = TSDN_NULL; + + enum { + FLUSH_PAGES = 10, + }; + + sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE, + /* bytes_max */ FLUSH_PAGES * PAGE); + + edata_t *allocs[FLUSH_PAGES]; + for (size_t i = 0; i < FLUSH_PAGES; i++) { + allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, + /* zero */ false); + expect_stats_pages(tsdn, &sec, 0); + } + + /* Dalloc the first half of the allocations. */ + for (size_t i = 0; i < FLUSH_PAGES / 2; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[i]); + expect_stats_pages(tsdn, &sec, i + 1); + } + + sec_flush(tsdn, &sec); + expect_stats_pages(tsdn, &sec, 0); + + /* Flush the remaining pages. */ + for (size_t i = 0; i < FLUSH_PAGES / 2; i++) { + pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]); + expect_stats_pages(tsdn, &sec, i + 1); + } + sec_disable(tsdn, &sec); + expect_stats_pages(tsdn, &sec, 0); +} +TEST_END + +int +main(void) { + return test( + test_reuse, + test_auto_flush, + test_disable, + test_flush, + test_alloc_max_respected, + test_expand_shrink_delegate, + test_nshards_0, + test_stats_simple, + test_stats_auto_flush, + test_stats_manual_flush); +}