diff --git a/Makefile.in b/Makefile.in
index 67568f00..0136a40e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/rtree.c \
$(srcroot)src/safety_check.c \
$(srcroot)src/sc.c \
+ $(srcroot)src/sec.c \
$(srcroot)src/stats.c \
$(srcroot)src/sz.c \
$(srcroot)src/tcache.c \
@@ -253,6 +254,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/rtree.c \
$(srcroot)test/unit/safety_check.c \
$(srcroot)test/unit/sc.c \
+ $(srcroot)test/unit/sec.c \
$(srcroot)test/unit/seq.c \
$(srcroot)test/unit/SFMT.c \
$(srcroot)test/unit/size_check.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c016769d..0767862c 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -99,7 +99,6 @@ struct cache_bin_s {
* array.
*/
uint16_t low_bits_empty;
-
};
/*
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
new file mode 100644
index 00000000..7c1465ed
--- /dev/null
+++ b/include/jemalloc/internal/sec.h
@@ -0,0 +1,118 @@
+#ifndef JEMALLOC_INTERNAL_SEC_H
+#define JEMALLOC_INTERNAL_SEC_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/pai.h"
+
+/*
+ * Small extent cache.
+ *
+ * This includes some utilities to cache small extents. We have a per-pszind
+ * bin with its own lock and edata heap (including only extents of that size).
+ * We don't try to do any coalescing of extents (since it would require
+ * cross-bin locks). As a result, we need to be careful about fragmentation.
+ * As a gesture in that direction, we limit the size of caches, apply first-fit
+ * within the bins, and, when flushing a bin, flush all of its extents rather
+ * than just those up to some threshold. When we allocate again, we'll get a
+ * chance to move to better ones.
+ */
+
+/*
+ * This is a *small* extent cache, after all. Assuming 4k pages and an ngroup
+ * of 4, this allows caching of sizes up to 128k.
+ */
+#define SEC_NPSIZES 16
+/*
+ * For now, we put a cap on the number of SECs an arena can have. There's no
+ * reason it can't be dynamic; it's just inconvenient. This number of shards
+ * are embedded in the arenas, so there's a space / configurability tradeoff
+ * here. Eventually, we should probably dynamically allocate only however many
+ * we require.
+ */
+#define SEC_NSHARDS_MAX 8
+
+/*
+ * For now, this is just one field; eventually, we'll probably want to get more
+ * fine-grained data out (like per-size class statistics).
+ */
+typedef struct sec_stats_s sec_stats_t;
+struct sec_stats_s {
+ /* Sum of bytes_cur across all shards. */
+ size_t bytes;
+};
+
+static inline void
+sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
+ dst->bytes += src->bytes;
+}
+
+typedef struct sec_shard_s sec_shard_t;
+struct sec_shard_s {
+ /*
+ * We don't keep per-bin mutexes, even though that would allow more
+ * sharding; this allows global cache-eviction, which in turn allows for
+ * better balancing across free lists.
+ */
+ malloc_mutex_t mtx;
+ /*
+ * A SEC may need to be shut down (i.e. flushed of its contents and
+ * prevented from further caching). To avoid tricky synchronization
+ * issues, we just track enabled-status in each shard, guarded by a
+ * mutex. In practice, this is only ever checked during brief races,
+ * since the arena-level atomic boolean tracking HPA enabled-ness means
+ * that we won't go down these pathways very often after custom extent
+ * hooks are installed.
+ */
+ bool enabled;
+ edata_list_active_t freelist[SEC_NPSIZES];
+ size_t bytes_cur;
+};
+
+typedef struct sec_s sec_t;
+struct sec_s {
+ pai_t pai;
+ pai_t *fallback;
+
+ /*
+ * We'll automatically refuse to cache any objects in this sec if
+ * they're larger than alloc_max bytes.
+ */
+ size_t alloc_max;
+ /*
+ * Exceeding this amount of cached extents in a shard causes *all* of
+ * the shards in that bin to be flushed.
+ */
+ size_t bytes_max;
+
+ /*
+ * We don't necessarily always use all the shards; requests are
+ * distributed across shards [0, nshards - 1).
+ */
+ size_t nshards;
+ sec_shard_t shards[SEC_NSHARDS_MAX];
+};
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+ size_t bytes_max);
+void sec_flush(tsdn_t *tsdn, sec_t *sec);
+void sec_disable(tsdn_t *tsdn, sec_t *sec);
+
+/*
+ * Morally, these two stats methods probably ought to be a single one (and the
+ * mutex_prof_data ought to live in the sec_stats_t. But splitting them apart
+ * lets them fit easily into the pa_shard stats framework (which also has this
+ * split), which simplifies the stats management.
+ */
+void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
+void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+ mutex_prof_data_t *mutex_prof_data);
+
+/*
+ * We use the arena lock ordering; these are acquired in phase 2 of forking, but
+ * should be acquired before the underlying allocator mutexes.
+ */
+void sec_prefork2(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_parent(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_child(tsdn_t *tsdn, sec_t *sec);
+
+#endif /* JEMALLOC_INTERNAL_SEC_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 686bf403..662907c8 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -44,6 +44,8 @@ enum witness_rank_e {
WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
WITNESS_RANK_TCACHE_QL,
+ WITNESS_RANK_SEC_SHARD,
+
WITNESS_RANK_EXTENT_GROW,
WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 46e497ac..f14f87ff 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
+
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f46a92fa..689a520c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
Source Files
+
+ Source Files
+
Source Files
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index dbf6f95a..30c6b295 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
+
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f46a92fa..689a520c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
Source Files
+
+ Source Files
+
Source Files
diff --git a/src/sec.c b/src/sec.c
new file mode 100644
index 00000000..f3c906bc
--- /dev/null
+++ b/src/sec.c
@@ -0,0 +1,263 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/sec.h"
+
+static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t alignment, bool zero);
+static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool zero);
+static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+ size_t bytes_max) {
+ if (nshards > SEC_NSHARDS_MAX) {
+ nshards = SEC_NSHARDS_MAX;
+ }
+ for (size_t i = 0; i < nshards; i++) {
+ sec_shard_t *shard = &sec->shards[i];
+ bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
+ WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
+ if (err) {
+ return true;
+ }
+ shard->enabled = true;
+ for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
+ edata_list_active_init(&shard->freelist[j]);
+ }
+ shard->bytes_cur = 0;
+ }
+ sec->fallback = fallback;
+ sec->alloc_max = alloc_max;
+ if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) {
+ sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1);
+ }
+
+ sec->bytes_max = bytes_max;
+ sec->nshards = nshards;
+
+ /*
+ * Initialize these last so that an improper use of an SEC whose
+ * initialization failed will segfault in an easy-to-spot way.
+ */
+ sec->pai.alloc = &sec_alloc;
+ sec->pai.expand = &sec_expand;
+ sec->pai.shrink = &sec_shrink;
+ sec->pai.dalloc = &sec_dalloc;
+
+ return false;
+}
+
+static sec_shard_t *
+sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
+ /*
+ * Eventually, we should implement affinity, tracking source shard using
+ * the edata_t's newly freed up fields. For now, just randomly
+ * distribute across all shards.
+ */
+ if (tsdn_null(tsdn)) {
+ return &sec->shards[0];
+ }
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ /*
+ * Use the trick from Daniel Lemire's "A fast alternative to the modulo
+ * reduction. Use a 64 bit number to store 32 bits, since we'll
+ * deliberately overflow when we multiply by the number of shards.
+ */
+ uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+ uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
+ return &sec->shards[idx];
+}
+
+static edata_t *
+sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+ pszind_t pszind) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ if (!shard->enabled) {
+ return NULL;
+ }
+ edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
+ if (edata != NULL) {
+ edata_list_active_remove(&shard->freelist[pszind], edata);
+ assert(edata_size_get(edata) <= shard->bytes_cur);
+ shard->bytes_cur -= edata_size_get(edata);
+ }
+ return edata;
+}
+
+static edata_t *
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+ assert((size & PAGE_MASK) == 0);
+
+ sec_t *sec = (sec_t *)self;
+
+ if (zero || alignment > PAGE || sec->nshards == 0
+ || size > sec->alloc_max) {
+ return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+ }
+ pszind_t pszind = sz_psz2ind(size);
+ sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+ malloc_mutex_lock(tsdn, &shard->mtx);
+ edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ if (edata == NULL) {
+ /*
+ * See the note in dalloc, below; really, we should add a
+ * batch_alloc method to the PAI and get more than one extent at
+ * a time.
+ */
+ edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+ }
+ return edata;
+}
+
+static bool
+sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size, bool zero) {
+ sec_t *sec = (sec_t *)self;
+ return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+}
+
+static bool
+sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size) {
+ sec_t *sec = (sec_t *)self;
+ return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+}
+
+static void
+sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ shard->bytes_cur = 0;
+ edata_list_active_t to_flush;
+ edata_list_active_init(&to_flush);
+ for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
+ edata_list_active_concat(&to_flush, &shard->freelist[i]);
+ }
+ /*
+ * A better way to do this would be to add a batch dalloc function to
+ * the pai_t. Practically, the current method turns into O(n) locks and
+ * unlocks at the fallback allocator. But some implementations (e.g.
+ * HPA) can straightforwardly do many deallocations in a single lock /
+ * unlock pair.
+ */
+ while (!edata_list_active_empty(&to_flush)) {
+ edata_t *e = edata_list_active_first(&to_flush);
+ edata_list_active_remove(&to_flush, e);
+ pai_dalloc(tsdn, sec->fallback, e);
+ }
+}
+
+static void
+sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+ edata_t *edata) {
+ malloc_mutex_assert_owner(tsdn, &shard->mtx);
+ assert(shard->bytes_cur <= sec->bytes_max);
+ size_t size = edata_size_get(edata);
+ pszind_t pszind = sz_psz2ind(size);
+ /*
+ * Prepending here results in FIFO allocation per bin, which seems
+ * reasonable.
+ */
+ edata_list_active_prepend(&shard->freelist[pszind], edata);
+ shard->bytes_cur += size;
+ if (shard->bytes_cur > sec->bytes_max) {
+ /*
+ * We've exceeded the shard limit. We make two nods in the
+ * direction of fragmentation avoidance: we flush everything in
+ * the shard, rather than one particular bin, and we hold the
+ * lock while flushing (in case one of the extents we flush is
+ * highly preferred from a fragmentation-avoidance perspective
+ * in the backing allocator). This has the extra advantage of
+ * not requiring advanced cache balancing strategies.
+ */
+ sec_do_flush_locked(tsdn, sec, shard);
+ }
+}
+
+static void
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+ sec_t *sec = (sec_t *)self;
+ if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) {
+ pai_dalloc(tsdn, sec->fallback, edata);
+ return;
+ }
+ sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+ malloc_mutex_lock(tsdn, &shard->mtx);
+ if (shard->enabled) {
+ sec_shard_dalloc_locked(tsdn, sec, shard, edata);
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ } else {
+ malloc_mutex_unlock(tsdn, &shard->mtx);
+ pai_dalloc(tsdn, sec->fallback, edata);
+ }
+}
+
+void
+sec_flush(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_disable(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sec->shards[i].enabled = false;
+ sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+ size_t sum = 0;
+ for (size_t i = 0; i < sec->nshards; i++) {
+ /*
+ * We could save these lock acquisitions by making bytes_cur
+ * atomic, but stats collection is rare anyways and we expect
+ * the number and type of stats to get more interesting.
+ */
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ sum += sec->shards[i].bytes_cur;
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+ stats->bytes += sum;
+}
+
+void
+sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+ mutex_prof_data_t *mutex_prof_data) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+ malloc_mutex_prof_accum(tsdn, mutex_prof_data,
+ &sec->shards[i].mtx);
+ malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+ }
+}
+
+void
+sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
+ for (size_t i = 0; i < sec->nshards; i++) {
+ malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+ }
+}
diff --git a/test/unit/sec.c b/test/unit/sec.c
new file mode 100644
index 00000000..cb0c17d1
--- /dev/null
+++ b/test/unit/sec.c
@@ -0,0 +1,500 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/sec.h"
+
+typedef struct pai_test_allocator_s pai_test_allocator_t;
+struct pai_test_allocator_s {
+ pai_t pai;
+ bool alloc_fail;
+ size_t alloc_count;
+ size_t dalloc_count;
+ /*
+ * We use a simple bump allocator as the implementation. This isn't
+ * *really* correct, since we may allow expansion into a subsequent
+ * allocation, but it's not like the SEC is really examining the
+ * pointers it gets back; this is mostly just helpful for debugging.
+ */
+ uintptr_t next_ptr;
+ size_t expand_count;
+ bool expand_return_value;
+ size_t shrink_count;
+ bool shrink_return_value;
+};
+
+static inline edata_t *
+pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t alignment, bool zero) {
+ pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+ if (ta->alloc_fail) {
+ return NULL;
+ }
+ edata_t *edata = malloc(sizeof(edata_t));
+ assert_ptr_not_null(edata, "");
+ ta->next_ptr += alignment - 1;
+ edata_init(edata, /* arena_ind */ 0,
+ (void *)(ta->next_ptr & ~(alignment - 1)), size,
+ /* slab */ false,
+ /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
+ /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
+ ta->next_ptr += size;
+ ta->alloc_count++;
+ return edata;
+}
+
+static bool
+pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool zero) {
+ pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+ ta->expand_count++;
+ return ta->expand_return_value;
+}
+
+static bool
+pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size) {
+ pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+ ta->shrink_count++;
+ return ta->shrink_return_value;
+}
+
+static void
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+ pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+ ta->dalloc_count++;
+ free(edata);
+}
+
+static inline void
+pai_test_allocator_init(pai_test_allocator_t *ta) {
+ ta->alloc_fail = false;
+ ta->alloc_count = 0;
+ ta->dalloc_count = 0;
+ /* Just don't start the edata at 0. */
+ ta->next_ptr = 10 * PAGE;
+ ta->expand_count = 0;
+ ta->expand_return_value = false;
+ ta->shrink_count = 0;
+ ta->shrink_return_value = false;
+ ta->pai.alloc = &pai_test_allocator_alloc;
+ ta->pai.expand = &pai_test_allocator_expand;
+ ta->pai.shrink = &pai_test_allocator_shrink;
+ ta->pai.dalloc = &pai_test_allocator_dalloc;
+}
+
+TEST_BEGIN(test_reuse) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /*
+ * We can't use the "real" tsd, since we malloc within the test
+ * allocator hooks; we'd get lock inversion crashes. Eventually, we
+ * should have a way to mock tsds, but for now just don't do any
+ * lock-order checking.
+ */
+ tsdn_t *tsdn = TSDN_NULL;
+ /*
+ * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+ * able to get to 30 pages in the cache before triggering a flush.
+ */
+ enum { NALLOCS = 10 };
+ edata_t *one_page[NALLOCS];
+ edata_t *two_page[NALLOCS];
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
+ /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
+ for (int i = 0; i < NALLOCS; i++) {
+ one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+ two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+ }
+ expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+ /*
+ * Free in a different order than we allocated, to make sure free-list
+ * separation works correctly.
+ */
+ for (int i = NALLOCS - 1; i >= 0; i--) {
+ pai_dalloc(tsdn, &sec.pai, one_page[i]);
+ }
+ for (int i = NALLOCS - 1; i >= 0; i--) {
+ pai_dalloc(tsdn, &sec.pai, two_page[i]);
+ }
+ expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+ /*
+ * Check that the n'th most recent deallocated extent is returned for
+ * the n'th alloc request of a given size.
+ */
+ for (int i = 0; i < NALLOCS; i++) {
+ edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_eq(one_page[i], alloc1,
+ "Got unexpected allocation");
+ expect_ptr_eq(two_page[i], alloc2,
+ "Got unexpected allocation");
+ }
+ expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+}
+TEST_END
+
+
+TEST_BEGIN(test_auto_flush) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+ /*
+ * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+ * able to get to 30 pages in the cache before triggering a flush.
+ */
+ enum { NALLOCS = 10 };
+ edata_t *extra_alloc;
+ edata_t *allocs[NALLOCS];
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+ /* bytes_max */ NALLOCS * PAGE);
+ for (int i = 0; i < NALLOCS; i++) {
+ allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+ }
+ extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+ expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
+ expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+ /* Free until the SEC is full, but should not have flushed yet. */
+ for (int i = 0; i < NALLOCS; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[i]);
+ }
+ expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+ /*
+ * Free the extra allocation; this should trigger a flush of all
+ * extents in the cache.
+ */
+ pai_dalloc(tsdn, &sec.pai, extra_alloc);
+ expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
+ "Incorrect number of deallocations");
+}
+TEST_END
+
+/*
+ * A disable and a flush are *almost* equivalent; the only difference is what
+ * happens afterwards; disabling disallows all future caching as well.
+ */
+static void
+do_disable_flush_test(bool is_disable) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ enum { NALLOCS = 10 };
+ edata_t *allocs[NALLOCS];
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+ /* bytes_max */ NALLOCS * PAGE);
+ for (int i = 0; i < NALLOCS; i++) {
+ allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+ }
+ /* Free all but the last aloc. */
+ for (int i = 0; i < NALLOCS - 1; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[i]);
+ }
+ expect_zu_eq(NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(0, ta.dalloc_count,
+ "Incorrect number of allocations");
+
+ if (is_disable) {
+ sec_disable(tsdn, &sec);
+ } else {
+ sec_flush(tsdn, &sec);
+ }
+
+ expect_zu_eq(NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
+ "Incorrect number of deallocations");
+
+ /*
+ * If we free into a disabled SEC, it should forward to the fallback.
+ * Otherwise, the SEC should accept the allocation.
+ */
+ pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+
+ expect_zu_eq(NALLOCS, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
+ "Incorrect number of deallocations");
+}
+
+TEST_BEGIN(test_disable) {
+ do_disable_flush_test(/* is_disable */ true);
+}
+TEST_END
+
+TEST_BEGIN(test_flush) {
+ do_disable_flush_test(/* is_disable */ false);
+}
+TEST_END
+
+TEST_BEGIN(test_alloc_max_respected) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ size_t alloc_max = 2 * PAGE;
+ size_t attempted_alloc = 3 * PAGE;
+
+ sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max,
+ /* bytes_max */ 1000 * PAGE);
+
+ for (size_t i = 0; i < 100; i++) {
+ expect_zu_eq(i, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(i, ta.dalloc_count,
+ "Incorrect number of deallocations");
+ edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
+ PAGE, /* zero */ false);
+ expect_ptr_not_null(edata, "Unexpected alloc failure");
+ expect_zu_eq(i + 1, ta.alloc_count,
+ "Incorrect number of allocations");
+ expect_zu_eq(i, ta.dalloc_count,
+ "Incorrect number of deallocations");
+ pai_dalloc(tsdn, &sec.pai, edata);
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_expand_shrink_delegate) {
+ /*
+ * Expand and shrink shouldn't affect sec state; they should just
+ * delegate to the fallback PAI.
+ */
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE,
+ /* bytes_max */ 1000 * PAGE);
+ edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+ bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
+ /* zero */ false);
+ expect_false(err, "Unexpected expand failure");
+ expect_zu_eq(1, ta.expand_count, "");
+ ta.expand_return_value = true;
+ err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
+ /* zero */ false);
+ expect_true(err, "Unexpected expand success");
+ expect_zu_eq(2, ta.expand_count, "");
+
+ err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+ expect_false(err, "Unexpected shrink failure");
+ expect_zu_eq(1, ta.shrink_count, "");
+ ta.shrink_return_value = true;
+ err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+ expect_true(err, "Unexpected shrink success");
+ expect_zu_eq(2, ta.shrink_count, "");
+}
+TEST_END
+
+TEST_BEGIN(test_nshards_0) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE,
+ /* bytes_max */ 1000 * PAGE);
+
+ edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ pai_dalloc(tsdn, &sec.pai, edata);
+
+ /* Both operations should have gone directly to the fallback. */
+ expect_zu_eq(1, ta.alloc_count, "");
+ expect_zu_eq(1, ta.dalloc_count, "");
+}
+TEST_END
+
+static void
+expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
+ sec_stats_t stats;
+ /*
+ * Check that the stats merging accumulates rather than overwrites by
+ * putting some (made up) data there to begin with.
+ */
+ stats.bytes = 123;
+ sec_stats_merge(tsdn, sec, &stats);
+ assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
+}
+
+TEST_BEGIN(test_stats_simple) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ enum {
+ NITERS = 100,
+ FLUSH_PAGES = 10,
+ };
+
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+ /* bytes_max */ FLUSH_PAGES * PAGE);
+
+ edata_t *allocs[FLUSH_PAGES];
+ for (size_t i = 0; i < FLUSH_PAGES; i++) {
+ allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_stats_pages(tsdn, &sec, 0);
+ }
+
+ /* Increase and decrease, without flushing. */
+ for (size_t i = 0; i < NITERS; i++) {
+ for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[j]);
+ expect_stats_pages(tsdn, &sec, j + 1);
+ }
+ for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+ allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
+ }
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_stats_auto_flush) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ enum {
+ FLUSH_PAGES = 10,
+ };
+
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+ /* bytes_max */ FLUSH_PAGES * PAGE);
+
+ edata_t *extra_alloc0;
+ edata_t *extra_alloc1;
+ edata_t *allocs[2 * FLUSH_PAGES];
+
+ extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+ extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+
+ for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
+ allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_stats_pages(tsdn, &sec, 0);
+ }
+
+ for (size_t i = 0; i < FLUSH_PAGES; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[i]);
+ expect_stats_pages(tsdn, &sec, i + 1);
+ }
+ pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+ /* The last dalloc should have triggered a flush. */
+ expect_stats_pages(tsdn, &sec, 0);
+
+ /* Flush the remaining pages; stats should still work. */
+ for (size_t i = 0; i < FLUSH_PAGES; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+ expect_stats_pages(tsdn, &sec, i + 1);
+ }
+
+ pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+ /* The last dalloc should have triggered a flush, again. */
+ expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_manual_flush) {
+ pai_test_allocator_t ta;
+ pai_test_allocator_init(&ta);
+ sec_t sec;
+
+ /* See the note above -- we can't use the real tsd. */
+ tsdn_t *tsdn = TSDN_NULL;
+
+ enum {
+ FLUSH_PAGES = 10,
+ };
+
+ sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+ /* bytes_max */ FLUSH_PAGES * PAGE);
+
+ edata_t *allocs[FLUSH_PAGES];
+ for (size_t i = 0; i < FLUSH_PAGES; i++) {
+ allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+ /* zero */ false);
+ expect_stats_pages(tsdn, &sec, 0);
+ }
+
+ /* Dalloc the first half of the allocations. */
+ for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[i]);
+ expect_stats_pages(tsdn, &sec, i + 1);
+ }
+
+ sec_flush(tsdn, &sec);
+ expect_stats_pages(tsdn, &sec, 0);
+
+ /* Flush the remaining pages. */
+ for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+ pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+ expect_stats_pages(tsdn, &sec, i + 1);
+ }
+ sec_disable(tsdn, &sec);
+ expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_reuse,
+ test_auto_flush,
+ test_disable,
+ test_flush,
+ test_alloc_max_respected,
+ test_expand_shrink_delegate,
+ test_nshards_0,
+ test_stats_simple,
+ test_stats_auto_flush,
+ test_stats_manual_flush);
+}