diff --git a/Makefile.in b/Makefile.in
index 40c41442..11a553b0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -132,6 +132,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
+	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 45edd69c..f7f3e077 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,6 +13,8 @@ struct pai_s {
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
+	    edata_list_active_t *list);
 };
 
 /*
@@ -42,4 +44,16 @@ pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	self->dalloc(tsdn, self, edata);
 }
 
+static inline void
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+	return self->dalloc_batch(tsdn, self, list);
+}
+
+/*
+ * An implementation of batch deallocation that simply calls dalloc once for
+ * each item in the list.
+ */
+void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list);
+
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a93511d1..9ec953a2 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 06460e5a..210204a5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 916460a7..171b95f2 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 06460e5a..210204a5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa.c b/src/hpa.c
index 3c706cbf..013cd7ed 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -91,6 +91,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
+	shard->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pac.c b/src/pac.c
index 80646155..0ba0f2f0 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -94,6 +94,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
+	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pai.c b/src/pai.c
new file mode 100644
index 00000000..1035c850
--- /dev/null
+++ b/src/pai.c
@@ -0,0 +1,13 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+
+void
+pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		pai_dalloc(tsdn, self, edata);
+	}
+}
diff --git a/src/sec.c b/src/sec.c
index 262d813d..41e75b9e 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -46,6 +46,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	sec->pai.expand = &sec_expand;
 	sec->pai.shrink = &sec_shrink;
 	sec->pai.dalloc = &sec_dalloc;
+	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
@@ -142,6 +143,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
 		edata_list_active_concat(&to_flush, &shard->freelist[i]);
 	}
+
 	/*
 	 * A better way to do this would be to add a batch dalloc function to
 	 * the pai_t.  Practically, the current method turns into O(n) locks and
@@ -149,11 +151,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * HPA) can straightforwardly do many deallocations in a single lock /
 	 * unlock pair.
 	 */
-	while (!edata_list_active_empty(&to_flush)) {
-		edata_t *e = edata_list_active_first(&to_flush);
-		edata_list_active_remove(&to_flush, e);
-		pai_dalloc(tsdn, sec->fallback, e);
-	}
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
 static void
diff --git a/test/unit/sec.c b/test/unit/sec.c
index cb0c17d1..7657537b 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -8,6 +8,7 @@ struct pai_test_allocator_s {
 	bool alloc_fail;
 	size_t alloc_count;
 	size_t dalloc_count;
+	size_t dalloc_batch_count;
 	/*
 	 * We use a simple bump allocator as the implementation.  This isn't
 	 * *really* correct, since we may allow expansion into a subsequent
@@ -64,11 +65,25 @@ pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	free(edata);
 }
 
+static void
+pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		ta->dalloc_batch_count++;
+		free(edata);
+	}
+}
+
 static inline void
 pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->alloc_fail = false;
 	ta->alloc_count = 0;
 	ta->dalloc_count = 0;
+	ta->dalloc_batch_count = 0;
 	/* Just don't start the edata at 0. */
 	ta->next_ptr = 10 * PAGE;
 	ta->expand_count = 0;
@@ -79,6 +94,7 @@ pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->pai.expand = &pai_test_allocator_expand;
 	ta->pai.shrink = &pai_test_allocator_shrink;
 	ta->pai.dalloc = &pai_test_allocator_dalloc;
+	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
 }
 
 TEST_BEGIN(test_reuse) {
@@ -190,8 +206,10 @@ TEST_BEGIN(test_auto_flush) {
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
 	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 TEST_END
 
@@ -233,8 +251,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 
 	/*
 	 * If we free into a disabled SEC, it should forward to the fallback.
@@ -244,8 +264,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 
 TEST_BEGIN(test_disable) {