Wake up background threads on demand

This change allows every allocator conforming to PAI communicate that it deferred some work for the future. Without it if a background thread goes into indefinite sleep, there is no way to notify it about upcoming deferred work.
2021-08-18 19:24:37 -07:00 · 2021-08-18 19:24:37 -07:00 · 8229cc77c5
commit 8229cc77c5
parent 97da57c13a
21 changed files with 445 additions and 214 deletions
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@ -42,7 +42,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
    size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
    size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@ -13,7 +13,7 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_running(background_thread_info_t* info);
+bool background_thread_is_started(background_thread_info_t* info);
 void background_thread_wakeup_early(background_thread_info_t *info,
    nstime_t *remaining_sleep);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@ -20,7 +20,7 @@
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000

 #define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
-#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_MAX

 typedef enum {
 	background_thread_stopped,
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@ -136,6 +136,11 @@ struct hpa_shard_s {
 	 * stats.
 	 */
 	hpa_shard_nonderived_stats_t stats;
+
+	/*
+	 * Last time we performed purge on this shard.
+	 */
+	nstime_t last_purge;
 };

 /*
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@ -167,16 +167,17 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);

 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool zero);
+    size_t alignment, bool slab, szind_t szind, bool zero,
+    bool *deferred_work_generated);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero);
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
 /*
 * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
 * false otherwise.
 */
 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty);
+    size_t new_size, szind_t szind, bool *deferred_work_generated);
 /*
 * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
 * new dirty pages (well, we alwyas set it for now; but this need not be the
@ -185,7 +186,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 * consistent with the shrink pathway and our error codes here).
 */
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty);
+    bool *deferred_work_generated);
 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@ -7,7 +7,7 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero);
+	    size_t alignment, bool zero, bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
 	 * than requested, in case of OOM).  The list should already be
@ -15,15 +15,18 @@ struct pai_s {
 	 * the results are not necessarily zeroed.
 	 */
 	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results);
+	    size_t nallocs, edata_list_active_t *results,
+	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size, bool zero);
+	    size_t old_size, size_t new_size, bool zero,
+	    bool *deferred_work_generated);
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size);
-	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	    size_t old_size, size_t new_size, bool *deferred_work_generated);
+	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    bool *deferred_work_generated);
 	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list);
+	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };

@ -33,36 +36,43 @@ struct pai_s {
 */

 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
-	return self->alloc(tsdn, self, size, alignment, zero);
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
+	return self->alloc(tsdn, self, size, alignment, zero,
+	    deferred_work_generated);
 }

 static inline size_t
 pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results);
+    edata_list_active_t *results, bool *deferred_work_generated) {
+	return self->alloc_batch(tsdn, self, size, nallocs, results,
+	    deferred_work_generated);
 }

 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
-	return self->expand(tsdn, self, edata, old_size, new_size, zero);
+    size_t new_size, bool zero, bool *deferred_work_generated) {
+	return self->expand(tsdn, self, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }

 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
-	return self->shrink(tsdn, self, edata, old_size, new_size);
+    size_t new_size, bool *deferred_work_generated) {
+	return self->shrink(tsdn, self, edata, old_size, new_size,
+	    deferred_work_generated);
 }

 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	self->dalloc(tsdn, self, edata);
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
+	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }

 static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
-	self->dalloc_batch(tsdn, self, list);
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
+	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
 }

 static inline uint64_t
@ -75,9 +85,9 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 * each item in the list.
 */
 size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 /* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);

 #endif /* JEMALLOC_INTERNAL_PAI_H */
--- a/src/arena.c
+++ b/src/arena.c
@ -199,15 +199,17 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	}
 }

-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
+/*
+ * React to deferred work generated by a PAI function.
+ */
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);

-	if (arena_decay_ms_get(arena, extent_state_dirty) == 0) {
+	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
-	} else {
-		arena_background_thread_inactivity_check(tsdn, arena, false);
 	}
+	arena_background_thread_inactivity_check(tsdn, arena, false);
 }

 static void *
@ -316,11 +318,14 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
    size_t alignment, bool zero) {
+	bool deferred_work_generated;
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;

 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero);
+	    /* slab */ false, szind, zero, &deferred_work_generated);
+
+	assert(deferred_work_generated == false);

 	if (edata != NULL) {
 		if (config_stats) {
@ -471,6 +476,45 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }

+static bool
+arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    background_thread_info_t *info, nstime_t *remaining_sleep,
+    size_t npages_new) {
+	malloc_mutex_assert_owner(tsdn, &info->mtx);
+
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		return false;
+	}
+
+	if (!decay_gradually(decay)) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+
+	nstime_init(remaining_sleep, background_thread_wakeup_time_get(info));
+	if (nstime_compare(remaining_sleep, &decay->epoch) <= 0) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+	nstime_subtract(remaining_sleep, &decay->epoch);
+	if (npages_new > 0) {
+		uint64_t npurge_new = decay_npages_purge_in(decay,
+		    remaining_sleep, npages_new);
+		info->npages_to_purge_new += npurge_new;
+	}
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return info->npages_to_purge_new >
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+}
+
+/*
+ * Check if deferred work needs to be done sooner than planned.
+ * For decay we might want to wake up earlier because of an influx of dirty
+ * pages. Rather than waiting for previously estimated time, we proactively
+ * purge those pages.
+ * If background thread sleeps indefinitely, always wake up because some
+ * deferred work has been generated.
+ */
 static void
 arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
    size_t npages_new) {
@ -485,47 +529,18 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		 */
 		return;
 	}
-	if (!background_thread_running(info)) {
+	if (!background_thread_is_started(info)) {
 		goto label_done;
 	}
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		goto label_done;
-	}
-	if (!decay_gradually(decay)) {
-		goto label_done_unlock2;
-	}

-	nstime_t diff;
-	nstime_init(&diff, background_thread_wakeup_time_get(info));
-	if (nstime_compare(&diff, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-	nstime_subtract(&diff, &decay->epoch);
-
-	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
-		    npages_new);
-		info->npages_to_purge_new += npurge_new;
-	}
-
-	bool should_signal;
-	if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
-		should_signal = true;
-	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
-	    info->npages_to_purge_new > 0)) {
-		should_signal = true;
-	} else {
-		should_signal = false;
-	}
-
-	if (should_signal) {
+	nstime_t remaining_sleep;
+	if (background_thread_indefinite_sleep(info)) {
+		background_thread_wakeup_early(info, NULL);
+	} else if (arena_should_decay_early(tsdn, arena, decay, info,
+	    &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
-		background_thread_wakeup_early(info, &diff);
+		background_thread_wakeup_early(info, &remaining_sleep);
 	}
-label_done_unlock2:
-	malloc_mutex_unlock(tsdn, &decay->mtx);
 label_done:
 	malloc_mutex_unlock(tsdn, &info->mtx);
 }
@ -539,10 +554,10 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {

 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }

@ -803,11 +818,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
    const bin_info_t *bin_info) {
+	bool deferred_work_generated;
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);

 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false);
+	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false,
+	    &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}

 	if (slab == NULL) {
 		return NULL;
--- a/src/background_thread.c
+++ b/src/background_thread.c
@ -119,7 +119,8 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,

 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		assert(background_thread_indefinite_sleep(info));
+		background_thread_wakeup_time_set(tsdn, info,
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
 		assert(ret == 0);
 	} else {
@ -144,8 +145,6 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		assert(!background_thread_indefinite_sleep(info));
 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	}
 	if (config_stats) {
 		gettimeofday(&tv, NULL);
@ -177,13 +176,21 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
    unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
+	bool slept_indefinitely = background_thread_indefinite_sleep(info);

 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
 		if (!arena) {
 			continue;
 		}
+		/*
+		 * If thread was woken up from the indefinite sleep, don't
+		 * do the work instantly, but rather check when the deferred
+		 * work that caused this thread to wake up is scheduled for.
+		 */
+		if (!slept_indefinitely) {
 			arena_do_deferred_work(tsdn, arena);
+		}
 		if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
@ -574,7 +581,7 @@ background_threads_disable(tsd_t *tsd) {
 }

 bool
-background_thread_running(background_thread_info_t *info) {
+background_thread_is_started(background_thread_info_t *info) {
 	return info->state == background_thread_started;
 }

@ -586,7 +593,8 @@ background_thread_wakeup_early(background_thread_info_t *info,
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep && nstime_ns(remaining_sleep) <
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
--- a/src/decay.c
+++ b/src/decay.c
@ -3,7 +3,7 @@

 #include "jemalloc/internal/decay.h"

-const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
 		SMOOTHSTEP
--- a/src/hpa.c
+++ b/src/hpa.c
@ -9,16 +9,17 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)

 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);

 bool
@ -366,6 +367,13 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }

+static bool
+hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
+}
+
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@ -429,6 +437,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
+	shard->central->hooks.curtime(&shard->last_purge);
 	if (dehugify) {
 		shard->stats.ndehugifies++;
 	}
@ -615,7 +624,8 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,

 static size_t
 hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results) {
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
@ -628,18 +638,20 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}

 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }

 static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	assert(size <= shard->opts.slab_max_alloc);
 	bool oom = false;

 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results);
+	    nallocs, results, deferred_work_generated);

 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@ -655,7 +667,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	if (nsuccess == nallocs || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@ -683,7 +695,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	malloc_mutex_unlock(tsdn, &shard->mtx);

 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
@ -704,7 +716,7 @@ hpa_from_pai(pai_t *self) {

 static size_t
 hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@ -716,7 +728,7 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	}

 	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results);
+	    results, deferred_work_generated);

 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@ -737,7 +749,8 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 }

 static edata_t *
-hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@ -753,23 +766,25 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results);
+	    &results, deferred_work_generated);
 	assert(nallocs == 0 || nallocs == 1);
 	edata_t *edata = edata_list_active_first(&results);
 	return edata;
 }

 static bool
-hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	/* Expand not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }

 static bool
 hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }

@ -825,7 +840,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 }

 static void
-hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
 	hpa_shard_t *shard = hpa_from_pai(self);

 	edata_t *edata;
@ -840,21 +856,83 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated =
+	    hpa_shard_has_deferred_work(tsdn, shard);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }

 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
-	hpa_dalloc_batch(tsdn, self, &dalloc_list);
+	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }

+/*
+ * Calculate time until either purging or hugification ought to happen.
+ * Called by background threads.
+ */
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
-	return opt_background_thread_hpa_interval_max_ms;
+	hpa_shard_t *shard = hpa_from_pai(self);
+	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify != NULL) {
+		nstime_t time_hugify_allowed =
+		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &time_hugify_allowed);
+		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
+		/*
+		 * If not enough time has passed since hugification was allowed,
+		 * sleep for the rest.
+		 */
+		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
+			time_ns = shard->opts.hugify_delay_ms - since_hugify_allowed_ms;
+			time_ns *= 1000 * 1000;
+		} else {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+
+	if (hpa_should_purge(tsdn, shard)) {
+		/*
+		 * If we haven't purged before, no need to check interval
+		 * between purges. Simply purge as soon as possible.
+		 */
+		if (shard->stats.npurge_passes == 0) {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &shard->last_purge);
+		uint64_t since_last_purge_ms = nstime_msec(&nstime);
+
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+			uint64_t until_purge_ns;
+			until_purge_ns = shard->opts.min_purge_interval_ms -
+			    since_last_purge_ms;
+			until_purge_ns *= 1000 * 1000;
+
+			if (until_purge_ns < time_ns) {
+				time_ns = until_purge_ns;
+			}
+		} else {
+			time_ns = BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	return time_ns;
 }

 void
--- a/src/large.c
+++ b/src/large.c
@ -64,14 +64,15 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 		return true;
 	}

-	bool generated_dirty;
+	bool deferred_work_generated;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
-	    usize + sz_large_pad, sz_size2index(usize), &generated_dirty);
+	    usize + sz_large_pad, sz_size2index(usize),
+	    &deferred_work_generated);
 	if (err) {
 		return true;
 	}
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 	arena_extent_ralloc_large_shrink(tsdn, arena, edata, old_usize);

@ -88,8 +89,15 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	size_t new_size = usize + sz_large_pad;

 	szind_t szind = sz_size2index(usize);
+
+	bool deferred_work_generated;
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, zero);
+	    szind, zero, &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}
+
 	if (err) {
 		return true;
 	}
@ -241,10 +249,10 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,

 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, edata, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, edata, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }

--- a/src/pa.c
+++ b/src/pa.c
@ -118,21 +118,23 @@ pa_get_pai(pa_shard_t *shard, edata_t *edata) {

 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero) {
+    bool slab, szind_t szind, bool zero, bool *deferred_work_generated) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);

 	edata_t *edata = NULL;
+	*deferred_work_generated = false;
 	if (pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero);
+		    zero, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
 	 * allocation request.
 	 */
 	if (edata == NULL) {
-		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero);
+		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
+		    deferred_work_generated);
 	}

 	if (edata != NULL) {
@ -152,7 +154,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,

 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero) {
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@ -161,7 +163,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,

 	pai_t *pai = pa_get_pai(shard, edata);

-	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero);
+	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
@ -174,20 +177,19 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,

 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty) {
+    size_t new_size, szind_t szind, bool *deferred_work_generated) {
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
 	size_t shrink_amount = old_size - new_size;

-	*generated_dirty = false;
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size);
+	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);

 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
@ -196,7 +198,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,

 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty) {
+    bool *deferred_work_generated) {
 	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (edata_slab_get(edata)) {
 		emap_deregister_interior(tsdn, shard->emap, edata);
@ -206,8 +208,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	pai_t *pai = pa_get_pai(shard, edata);
-	pai_dalloc(tsdn, pai, edata);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
+	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }

 bool
--- a/src/pac.c
+++ b/src/pac.c
@ -4,12 +4,13 @@
 #include "jemalloc/internal/pac.h"

 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);

 static ehooks_t *
@ -109,9 +110,11 @@ pac_may_have_muzzy(pac_t *pac) {

 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero) {
+    bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;

+	*deferred_work_generated = false;
+
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero);
@ -133,10 +136,12 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,

 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);

+	*deferred_work_generated = false;
+
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;

@ -171,12 +176,13 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,

 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
-
 	ehooks_t *ehooks = pac_ehooks_get(pac);
+
 	size_t shrink_amount = old_size - new_size;

+	*deferred_work_generated = false;

 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
@ -188,14 +194,18 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
+	*deferred_work_generated = true;
 	return false;
 }

 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
+	/* Purging of deallocated pages is deferred */
+	*deferred_work_generated = true;
 }

 static uint64_t
--- a/src/pai.c
+++ b/src/pai.c
@ -2,11 +2,13 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"

 size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
+		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_by_alloc);
+		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
 		}
@ -17,10 +19,12 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,

 void
 pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
+		bool deferred_by_dalloc = false;
 		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata);
+		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
+		*deferred_work_generated |= deferred_by_dalloc;
 	}
 }
--- a/src/sec.c
+++ b/src/sec.c
@ -4,12 +4,13 @@
 #include "jemalloc/internal/sec.h"

 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);

 static void
 sec_bin_init(sec_bin_t *bin) {
@ -147,7 +148,9 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	}

 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }

 static edata_t *
@ -175,8 +178,9 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,

 	edata_list_active_t result;
 	edata_list_active_init(&result);
+	bool deferred_work_generated;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result);
+	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);

 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@ -213,14 +217,17 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }

 static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);

 	sec_t *sec = (sec_t *)self;
+	*deferred_work_generated = false;

 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
+		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@ -243,7 +250,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero);
+			    zero, deferred_work_generated);
 		}
 	}
 	return edata;
@ -251,16 +258,18 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {

 static bool
 sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }

 static bool
 sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
+	    deferred_work_generated);
 }

 static void
@ -281,7 +290,9 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * we're disabling the HPA or resetting the arena, both of which are
 	 * rare pathways.
 	 */
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }

 static void
@ -317,20 +328,24 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }

 static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
 	if (sec->opts.nshards == 0
 	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
+		*deferred_work_generated = false;
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 	}
 }

--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@ -49,7 +49,7 @@ TEST_BEGIN(test_decay_npages_purge_in) {
 	expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
 	    "Failed to initialize decay");

-	const size_t new_pages = 100;
+	size_t new_pages = 100;

 	nstime_t time;
 	nstime_copy(&time, &decay_nstime);
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@ -79,9 +79,12 @@ TEST_BEGIN(test_alloc_max) {
 	edata_t *edata;

 	/* Small max */
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
+	bool deferred_work_generated;
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");

 	destroy_test_data(shard);
@ -166,6 +169,8 @@ TEST_BEGIN(test_stress) {
 	mem_tree_t tree;
 	mem_tree_new(&tree);

+	bool deferred_work_generated;
+
 	for (size_t i = 0; i < 100 * 1000; i++) {
 		size_t operation = prng_range_zu(&prng_state, 2);
 		if (operation == 0) {
@ -183,7 +188,8 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false);
+			    npages * PAGE, PAGE, false,
+			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
@ -199,7 +205,8 @@ TEST_BEGIN(test_stress) {
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
 			node_remove(&tree, to_free);
-			pai_dalloc(tsdn, &shard->pai, to_free);
+			pai_dalloc(tsdn, &shard->pai, to_free,
+			    &deferred_work_generated);
 		}
 	}

@ -218,7 +225,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free);
+		pai_dalloc(tsdn, &shard->pai, to_free,
+		    &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);

@ -244,6 +252,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());

+	bool deferred_work_generated;
+
 	enum {NALLOCS = 8};

 	edata_t *allocs[NALLOCS];
@ -253,13 +263,13 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	 */
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
 	edata_list_active_init(&allocs_list);
 	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list);
+	    &allocs_list, &deferred_work_generated);
 	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
 		allocs[i] = edata_list_active_first(&allocs_list);
@ -279,15 +289,17 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		edata_list_active_append(&allocs_list, allocs[i]);
 	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list);
+	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
+	    &deferred_work_generated);
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i]);
+		pai_dalloc(tsdn, &shard->pai, allocs[i],
+		    &deferred_work_generated);
 	}

 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@ -355,11 +367,14 @@ TEST_BEGIN(test_defer_time) {

 	hpa_shard_t *shard = create_test_data(&hooks, &opts);

+	bool deferred_work_generated;
+
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@ -374,7 +389,8 @@ TEST_BEGIN(test_defer_time) {

 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i]);
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
 	}

 	hpa_shard_do_deferred_work(tsdn, shard);
@ -391,14 +407,16 @@ TEST_BEGIN(test_defer_time) {
 	 * be marked for pending hugify.
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0]);
+	pai_dalloc(tsdn, &shard->pai, edatas[0],
+	    &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@ -65,6 +65,23 @@ set_background_thread_enabled(bool enabled) {
 	expect_d_eq(0, err, "Unexpected mallctl failure");
 }

+static void
+wait_until_thread_is_enabled(unsigned arena_id) {
+	tsd_t* tsd = tsd_fetch();
+
+	bool sleeping = false;
+	int iterations = 0;
+	do {
+		background_thread_info_t *info =
+		    background_thread_info_get(arena_id);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		sleeping = background_thread_indefinite_sleep(info);
+		assert_d_lt(iterations, (int)1e6,
+		    "Waiting for a thread to start for too long");
+	} while (!sleeping);
+}
+
 static void
 expect_purging(unsigned arena_ind, bool expect_deferred) {
 	size_t empty_ndirty;
@ -132,6 +149,7 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	expect_purging(arena_ind, false);

 	set_background_thread_enabled(true);
+	wait_until_thread_is_enabled(arena_ind);
 	expect_purging(arena_ind, true);
 }
 TEST_END
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@ -1,4 +1,4 @@
 #!/bin/sh

-export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"

--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@ -87,12 +87,13 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
+		bool deferred_work_generated;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
-		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false);
+		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
+		    &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
-		bool generated_dirty;
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
-		    &generated_dirty);
+		    &deferred_work_generated);
 		malloc_mutex_lock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@ -50,8 +50,9 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,

 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return NULL;
 	}
@ -70,8 +71,10 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,

 static inline size_t
 pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return 0;
 	}
@ -92,31 +95,37 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,

 static bool
 pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+    size_t old_size, size_t new_size, bool zero,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->expand_count++;
 	return ta->expand_return_value;
 }

 static bool
 pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->shrink_count++;
 	return ta->shrink_return_value;
 }

 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->dalloc_count++;
 	free(edata);
 }

 static void
 pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;

 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
@ -168,14 +177,15 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@ -189,10 +199,12 @@ TEST_BEGIN(test_reuse) {
 	 * separation works correctly.
 	 */
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i]);
+		pai_dalloc(tsdn, &sec.pai, one_page[i],
+		    &deferred_work_generated);
 	}
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i]);
+		pai_dalloc(tsdn, &sec.pai, two_page[i],
+		    &deferred_work_generated);
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@ -204,9 +216,9 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@ -238,14 +250,16 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@ -254,7 +268,7 @@ TEST_BEGIN(test_auto_flush) {
 	    "Incorrect number of allocations");
 	/* Free until the SEC is full, but should not have flushed yet. */
 	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
@ -267,7 +281,7 @@ TEST_BEGIN(test_auto_flush) {
 	 * entirety when it decides to do so, and it has only one bin active
 	 * right now.
 	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
@ -291,16 +305,17 @@ do_disable_flush_test(bool is_disable) {

 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
 	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;

@ -326,7 +341,8 @@ do_disable_flush_test(bool is_disable) {
 	 * If we free into a disabled SEC, it should forward to the fallback.
 	 * Otherwise, the SEC should accept the allocation.
 	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
+	    &deferred_work_generated);

 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@ -356,6 +372,8 @@ TEST_BEGIN(test_max_alloc_respected) {
 	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;

+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
 	    /* max_bytes */ 1000 * PAGE);

@ -365,13 +383,13 @@ TEST_BEGIN(test_max_alloc_respected) {
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false);
+		    PAGE, /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata);
+		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 	}
 }
 TEST_END
@ -387,27 +405,31 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;

+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");

 	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_false(err, "Unexpected expand failure");
 	expect_zu_eq(1, ta.expand_count, "");
 	ta.expand_return_value = true;
 	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_true(err, "Unexpected expand success");
 	expect_zu_eq(2, ta.expand_count, "");

-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
+	    &deferred_work_generated);
 	expect_false(err, "Unexpected shrink failure");
 	expect_zu_eq(1, ta.shrink_count, "");
 	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
+	    &deferred_work_generated);
 	expect_true(err, "Unexpected shrink success");
 	expect_zu_eq(2, ta.shrink_count, "");
 }
@ -426,9 +448,10 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);

+	bool deferred_work_generated;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
-	pai_dalloc(tsdn, &sec.pai, edata);
+	    /* zero */ false, &deferred_work_generated);
+	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);

 	/* Both operations should have gone directly to the fallback. */
 	expect_zu_eq(1, ta.alloc_count, "");
@ -461,25 +484,28 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};

+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);

 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}

 	/* Increase and decrease, without flushing. */
 	for (size_t i = 0; i < NITERS; i++) {
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j]);
+			pai_dalloc(tsdn, &sec.pai, allocs[j],
+			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, j + 1);
 		}
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false);
+			    /* zero */ false, &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
 	}
@ -505,25 +531,30 @@ TEST_BEGIN(test_stats_auto_flush) {
 	edata_t *extra_alloc1;
 	edata_t *allocs[2 * FLUSH_PAGES];

-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	bool deferred_work_generated;
+
+	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
+	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);

 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 	}

 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);

 	/* Flush the remaining pages; stats should still work. */
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
+		    &deferred_work_generated);
 	}

-	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);

 	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
 	    - ta.dalloc_count - ta.dalloc_batch_count);
@ -545,16 +576,17 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);

+	bool deferred_work_generated;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}

 	/* Dalloc the first half of the allocations. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}

@ -563,7 +595,8 @@ TEST_BEGIN(test_stats_manual_flush) {

 	/* Flush the remaining pages. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 	sec_disable(tsdn, &sec);