diff --git a/.appveyor.yml b/.appveyor.yml
index 9a7d00a9..90b03688 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,27 +5,27 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-  - MSYSTEM: MINGW64
-    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug
-  - MSYSTEM: MINGW64
-    CPU: x86_64
-    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
diff --git a/.travis.yml b/.travis.yml
index 40b2eb5f..2da5da8e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,6 +23,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -41,6 +43,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -54,6 +58,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -74,6 +80,9 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
@@ -92,6 +101,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -104,6 +115,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -114,6 +127,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -122,6 +137,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -130,6 +147,14 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
diff --git a/ChangeLog b/ChangeLog
index 7c73a8f2..e55813b7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,39 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.2.1 (August 5, 2019)
+
+  This release is primarily about Windows.  A critical virtual memory leak is
+  resolved on all Windows platforms.  The regression was present in all releases
+  since 5.0.0.
+
+  Bug fixes:
+  - Fix a severe virtual memory leak on Windows.  This regression was first
+    released in 5.0.0.  (@Ignition, @j0t, @frederik-h, @davidtgoldblatt,
+    @interwq)
+  - Fix size 0 handling in posix_memalign().  This regression was first released
+    in 5.2.0.  (@interwq)
+  - Fix the prof_log unit test which may observe unexpected backtraces from
+    compiler optimizations.  The test was first added in 5.2.0.  (@marxin,
+    @gnzlbg, @interwq)
+  - Fix the declaration of the extent_avail tree.  This regression was first
+    released in 5.1.0.  (@zoulasc)
+  - Fix an incorrect reference in jeprof.  This functionality was first released
+    in 3.0.0.  (@prehistoric-penguin)
+  - Fix an assertion on the deallocation fast-path.  This regression was first
+    released in 5.2.0.  (@yinan1048576)
+  - Fix the TLS_MODEL attribute in headers.  This regression was first released
+    in 5.0.0.  (@zoulasc, @interwq)
+
+  Optimizations and refactors:
+  - Implement opt.retain on Windows and enable by default on 64-bit.  (@interwq,
+    @davidtgoldblatt)
+  - Optimize away a branch on the operator delete[] path.  (@mgrice)
+  - Add format annotation to the format generator function.  (@zoulasc)
+  - Refactor and improve the size class header generation.  (@yinan1048576)
+  - Remove best fit.  (@djwatson)
+  - Avoid blocking on background thread locks for stats.  (@oranagra, @interwq)
+
 * 5.2.0 (April 2, 2019)
 
   This release includes a few notable improvements, which are summarized below:
diff --git a/Makefile.in b/Makefile.in
index 0777f6a8..7128b007 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -56,6 +56,7 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
+enable_doc := @enable_doc@
 enable_shared := @enable_shared@
 enable_static := @enable_static@
 enable_prof := @enable_prof@
@@ -117,6 +118,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
+	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/sc.c \
 	$(srcroot)src/sz.c \
@@ -178,6 +180,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
+	$(srcroot)test/unit/extent_util.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
@@ -208,6 +211,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
+	$(srcroot)test/unit/safety_check.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/sc.c \
@@ -513,7 +517,11 @@ done
 
 install_doc: build_doc install_doc_html install_doc_man
 
-install: install_bin install_include install_lib install_doc
+install: install_bin install_include install_lib
+
+ifeq ($(enable_doc), 1)
+install: install_doc
+endif
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
diff --git a/bin/jeprof.in b/bin/jeprof.in
index 588c6b43..3ed408c9 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2909,6 +2909,7 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',
+                      '@JEMALLOC_PREFIX@sdallocx_noflags',
                       'tc_calloc',
                       'tc_cfree',
                       'tc_malloc',
@@ -5366,7 +5367,7 @@ sub GetProcedureBoundaries {
   my $demangle_flag = "";
   my $cppfilt_flag = "";
   my $to_devnull = ">$dev_null 2>&1";
-  if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) {
+  if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) {
     # In this mode, we do "nm --demangle <foo>"
     $demangle_flag = "--demangle";
     $cppfilt_flag = "";
diff --git a/configure.ac b/configure.ac
index 96f76d35..261d81c0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -738,6 +738,9 @@ case "${host}" in
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -851,6 +854,18 @@ if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
 
+dnl Check for format_arg(...) attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
+              [const char * __attribute__((__format_arg__(1))) foo(const char *format);],
+              [je_cv_format_arg])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_format_arg}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
@@ -881,6 +896,19 @@ AC_PROG_RANLIB
 AC_PATH_PROG([LD], [ld], [false], [$PATH])
 AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
+dnl Enable documentation
+AC_ARG_ENABLE([doc],
+	      [AS_HELP_STRING([--enable-documentation], [Build documentation])],
+if test "x$enable_doc" = "xno" ; then
+  enable_doc="0"
+else
+  enable_doc="1"
+fi
+,
+enable_doc="1"
+)
+AC_SUBST([enable_doc])
+
 dnl Enable shared libs
 AC_ARG_ENABLE([shared],
   [AS_HELP_STRING([--enable-shared], [Build shared libaries])],
@@ -1406,22 +1434,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
-dnl Avoid the extra size checking by default
-AC_ARG_ENABLE([extra-size-check],
-  [AS_HELP_STRING([--enable-extra-size-check],
-  [Perform additonal size related sanity checks])],
-[if test "x$enable_extra_size_check" = "xno" ; then
-  enable_extra_size_check="0"
+dnl Avoid extra safety checks by default
+AC_ARG_ENABLE([opt-safety-checks],
+  [AS_HELP_STRING([--enable-opt-safety-checks],
+  [Perform certain low-overhead checks, even in opt mode])],
+[if test "x$enable_opt_safety_checks" = "xno" ; then
+  enable_opt_safety_checks="0"
 else
-  enable_extra_size_check="1"
+  enable_opt_safety_checks="1"
 fi
 ],
-[enable_extra_size_check="0"]
+[enable_opt_safety_checks="0"]
 )
-if test "x$enable_extra_size_check" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXTRA_SIZE_CHECK], [ ])
+if test "x$enable_opt_safety_checks" = "x1" ; then
+  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ])
 fi
-AC_SUBST([enable_extra_size_check])
+AC_SUBST([enable_opt_safety_checks])
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
@@ -2357,6 +2385,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
+AC_MSG_RESULT([documentation      : ${enable_doc}])
 AC_MSG_RESULT([shared libs        : ${enable_shared}])
 AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fd0edb30..7fecda7c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -424,7 +424,7 @@ for (i = 0; i < nbins; i++) {
       called repeatedly.  General information that never changes during
       execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message()</function> uses the
+      <function>malloc_stats_print()</function> uses the
       <function>mallctl*()</function> functions internally, so inconsistent
       statistics can be reported if multiple threads use these functions
       simultaneously.  If <option>--enable-stats</option> is specified during
@@ -904,6 +904,23 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.confirm_conf">
+        <term>
+          <mallctl>opt.confirm_conf</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+	<listitem><para>Confirm-runtime-options-when-program-starts
+	enabled/disabled.  If true, the string specified via
+	<option>--with-malloc-conf</option>, the string pointed to by the
+	global variable <varname>malloc_conf</varname>, the <quote>name</quote>
+	of the file referenced by the symbolic link named
+	<filename class="symlink">/etc/malloc.conf</filename>, and the value of
+	the environment variable <envar>MALLOC_CONF</envar>, will be printed in
+	order.  Then, each option being set will be individually printed.  This
+	option is disabled by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.abort_conf">
         <term>
           <mallctl>opt.abort_conf</mallctl>
@@ -946,17 +963,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="stats.retained">stats.retained</link> for related details).
         It also makes jemalloc use <citerefentry>
         <refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum>
-        </citerefentry> in a more greedy way, mapping larger chunks in one go.
-        This option is disabled by default unless discarding virtual memory is
-        known to trigger
-        platform-specific performance problems, e.g. for [64-bit] Linux, which
-        has a quirk in its virtual memory allocation algorithm that causes
-        semi-permanent VM map holes under normal jemalloc operation.  Although
-        <citerefentry><refentrytitle>munmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> causes issues on 32-bit Linux as
-        well, retaining virtual memory for 32-bit Linux is disabled by default
-        due to the practical possibility of address space exhaustion.
-        </para></listitem>
+        </citerefentry> or equivalent in a more greedy way, mapping larger
+        chunks in one go.  This option is disabled by default unless discarding
+        virtual memory is known to trigger platform-specific performance
+        problems, namely 1) for [64-bit] Linux, which has a quirk in its virtual
+        memory allocation algorithm that causes semi-permanent VM map holes
+        under normal jemalloc operation; and 2) for [64-bit] Windows, which
+        disallows split / merged regions with
+        <parameter><constant>MEM_RELEASE</constant></parameter>.  Although the
+        same issues may present on 32-bit platforms as well, retaining virtual
+        memory for 32-bit Linux and Windows is disabled by default due to the
+        practical possibility of address space exhaustion.  </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.dss">
@@ -2798,6 +2815,28 @@ struct extent_hooks_s {
         all bin size classes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.small.nfills">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nfills</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache fills by all small size
+	classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.small.nflushes">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nflushes</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache flushes by all small size
+        classes.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.large.allocated">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.large.allocated</mallctl>
@@ -2848,6 +2887,28 @@ struct extent_hooks_s {
         all large size classes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.large.nfills">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nfills</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache fills by all large size
+	classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.large.nflushes">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nflushes</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache flushes by all large size
+        classes.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.bins.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</mallctl>
@@ -2947,6 +3008,17 @@ struct extent_hooks_s {
         <listitem><para>Current number of slabs.</para></listitem>
       </varlistentry>
 
+
+      <varlistentry id="stats.arenas.i.bins.j.nonfull_slabs">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nonfull_slabs</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Current number of nonfull slabs.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.bins.mutex">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2bdddb77..a4523ae0 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -60,7 +60,7 @@ void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 614deddd..dd926575 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -90,7 +90,7 @@ arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	/* 
+	/*
 	 * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
 	 * sure we have a sampled allocation.
 	 */
@@ -228,6 +228,16 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(szind);
 }
 
+static inline void
+arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+	if (config_prof && unlikely(szind < SC_NBINS)) {
+		arena_dalloc_promoted(tsdn, ptr, NULL, true);
+	} else {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
 static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
@@ -251,6 +261,21 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	if (likely(slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
+	} else {
+		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
+    bool slow_path) {
+	if (szind < nhbins) {
+		if (config_prof && unlikely(szind < SC_NBINS)) {
+			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
+		} else {
+			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
+			    slow_path);
+		}
 	} else {
 		extent_t *extent = iealloc(tsdn, ptr);
 		large_dalloc(tsdn, extent);
@@ -295,18 +320,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 		    slow_path);
 	} else {
-		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < SC_NBINS)) {
-				arena_dalloc_promoted(tsdn, ptr, tcache,
-				    slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    szind, slow_path);
-			}
-		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			large_dalloc(tsdn, extent);
-		}
+		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
 	}
 }
 
@@ -349,8 +363,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
 	}
 }
 
@@ -407,18 +420,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 		    slow_path);
 	} else {
-		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < SC_NBINS)) {
-				arena_dalloc_promoted(tsdn, ptr, tcache,
-				    slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn),
-				    tcache, ptr, szind, slow_path);
-			}
-		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			large_dalloc(tsdn, extent);
-		}
+		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index ef1e25b3..23949ed9 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -35,6 +35,13 @@ struct arena_stats_large_s {
 	 * periodically merges into this counter.
 	 */
 	arena_stats_u64_t	nrequests; /* Partially derived. */
+	/*
+	 * Number of tcache fills / flushes for large (similarly, periodically
+	 * merged).  Note that there is no large tcache batch-fill currently
+	 * (i.e. only fill 1 at a time); however flush may be batched.
+	 */
+	arena_stats_u64_t	nfills; /* Partially derived. */
+	arena_stats_u64_t	nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -101,8 +108,13 @@ struct arena_stats_s {
 	atomic_zu_t		allocated_large; /* Derived. */
 	arena_stats_u64_t	nmalloc_large; /* Derived. */
 	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nfills_large; /* Derived. */
+	arena_stats_u64_t	nflushes_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t		abandoned_vm;
+
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
@@ -240,11 +252,12 @@ arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
 }
 
 static inline void
-arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
 	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
-	    SC_NBINS].nrequests, nrequests);
+	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
+	arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
+	arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
 	arena_stats_unlock(tsdn, arena_stats);
 }
 
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 950bd13c..eeab57fd 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -116,7 +116,6 @@ struct arena_s {
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
-	uint64_t		prof_accumbytes;
 
 	/*
 	 * PRNG state for cache index randomization of large allocation base
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 6b73a14f..471515e8 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -67,7 +67,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
 									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    UNUSED type *expected, type desired,				\
+    atomic_memory_order_t success_mo,					\
     atomic_memory_order_t failure_mo) {					\
 	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
 	    true, atomic_enum_to_builtin(success_mo),			\
@@ -76,7 +77,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
 									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    UNUSED type *expected, type desired,				\
+    atomic_memory_order_t success_mo,					\
     atomic_memory_order_t failure_mo) {					\
 	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
 	    false,							\
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index f542c882..8547e893 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -116,6 +116,7 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
 	dst_bin_stats->nslabs += bin->stats.nslabs;
 	dst_bin_stats->reslabs += bin->stats.reslabs;
 	dst_bin_stats->curslabs += bin->stats.curslabs;
+	dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 86e673ec..d04519c8 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -45,6 +45,9 @@ struct bin_stats_s {
 	/* Current number of slabs in this bin. */
 	size_t		curslabs;
 
+	/* Current size of nonfull slabs heap in this bin. */
+	size_t		nonfull_slabs;
+
 	mutex_prof_data_t mutex_data;
 };
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 775fdec0..1d1aacc6 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -39,6 +39,8 @@ typedef struct ctl_arena_stats_s {
 	uint64_t nmalloc_small;
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
+	uint64_t nfills_small;
+	uint64_t nflushes_small;
 
 	bin_stats_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 0a8bc2c0..542bc79c 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -86,10 +86,11 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline void
+static inline const char * JEMALLOC_FORMAT_ARG(3)
 emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
     emitter_justify_t justify, int width) {
 	size_t written;
+	fmt_specifier++;
 	if (justify == emitter_justify_none) {
 		written = malloc_snprintf(out_fmt, out_size,
 		    "%%%s", fmt_specifier);
@@ -102,6 +103,7 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 	}
 	/* Only happens in case of bad format string, which *we* choose. */
 	assert(written <  out_size);
+	return out_fmt;
 }
 
 /*
@@ -127,26 +129,27 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	char buf[BUF_SIZE];
 
 #define EMIT_SIMPLE(type, format)					\
-	emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width);		\
-	emitter_printf(emitter, fmt, *(const type *)value);		\
+	emitter_printf(emitter,						\
+	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),	\
+	    *(const type *)value);
 
 	switch (value_type) {
 	case emitter_type_bool:
-		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
-		emitter_printf(emitter, fmt, *(const bool *)value ?
-		    "true" : "false");
+		emitter_printf(emitter, 
+		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
+		    *(const bool *)value ?  "true" : "false");
 		break;
 	case emitter_type_int:
-		EMIT_SIMPLE(int, "d")
+		EMIT_SIMPLE(int, "%d")
 		break;
 	case emitter_type_unsigned:
-		EMIT_SIMPLE(unsigned, "u")
+		EMIT_SIMPLE(unsigned, "%u")
 		break;
 	case emitter_type_ssize:
-		EMIT_SIMPLE(ssize_t, "zd")
+		EMIT_SIMPLE(ssize_t, "%zd")
 		break;
 	case emitter_type_size:
-		EMIT_SIMPLE(size_t, "zu")
+		EMIT_SIMPLE(size_t, "%zu")
 		break;
 	case emitter_type_string:
 		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
@@ -156,17 +159,17 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		 * anywhere near the fmt size.
 		 */
 		assert(str_written < BUF_SIZE);
-		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
-		emitter_printf(emitter, fmt, buf);
+		emitter_printf(emitter, 
+		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
-		EMIT_SIMPLE(uint32_t, FMTu32)
+		EMIT_SIMPLE(uint32_t, "%" FMTu32)
 		break;
 	case emitter_type_uint64:
-		EMIT_SIMPLE(uint64_t, FMTu64)
+		EMIT_SIMPLE(uint64_t, "%" FMTu64)
 		break;
 	case emitter_type_title:
-		EMIT_SIMPLE(char *const, "s");
+		EMIT_SIMPLE(char *const, "%s");
 		break;
 	default:
 		unreachable();
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 8680251a..8aba5763 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -24,7 +24,7 @@ size_t extent_size_quantize_floor(size_t size);
 size_t extent_size_quantize_ceil(size_t size);
 #endif
 
-rb_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
@@ -74,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 bool extent_boot(void);
 
+void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size);
+void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 63b710dc..77fa4c4a 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -343,10 +343,30 @@ extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
 	nstime_copy(&extent->e_alloc_time, &t);
 }
 
+static inline bool
+extent_is_head_get(extent_t *extent) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+	    EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
 static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable) {
+    bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -360,6 +380,10 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_committed_set(extent, committed);
 	extent_dumpable_set(extent, dumpable);
 	ql_elm_new(extent, ql_link);
+	if (!maps_coalesce) {
+		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+		    false);
+	}
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
 	}
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index ceb18979..767cd893 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -128,7 +128,11 @@ struct extent_s {
 #define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
 #define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
 #define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
@@ -228,4 +232,25 @@ struct extents_s {
 	bool			delay_coalesce;
 };
 
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+
+struct extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+struct extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index acbcf27b..96925cf9 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -4,6 +4,9 @@
 typedef struct extent_s extent_t;
 typedef struct extents_s extents_t;
 
+typedef struct extent_util_stats_s extent_util_stats_t;
+typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
+
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
 /*
@@ -12,4 +15,9 @@ typedef struct extents_s extents_t;
  */
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 
+typedef enum {
+	EXTENT_NOT_HEAD,
+	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+} extent_head_state_t;
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 21b65147..c442a219 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -360,7 +360,7 @@
  */
 #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
 
-/* Performs additional size-matching sanity checks when defined. */
-#undef JEMALLOC_EXTRA_SIZE_CHECK
+/* Performs additional safety checks when defined. */
+#undef JEMALLOC_OPT_SAFETY_CHECKS
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b7843623..d291170b 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -10,6 +10,7 @@ extern bool malloc_slow;
 /* Run-time options. */
 extern bool opt_abort;
 extern bool opt_abort_conf;
+extern bool opt_confirm_conf;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
@@ -51,5 +52,6 @@ void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
 bool malloc_initialized(void);
+void je_sdallocx_noflags(void *ptr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 4bfdb32c..3418cbfa 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -161,6 +161,25 @@ static const bool config_log =
     false
 #endif
     ;
+/*
+ * Are extra safety checks enabled; things like checking the size of sized
+ * deallocations, double-frees, etc.
+ */
+static const bool config_opt_safety_checks =
+#ifdef JEMALLOC_OPT_SAFETY_CHECKS
+    true
+#elif defined(JEMALLOC_DEBUG)
+    /*
+     * This lets us only guard safety checks by one flag instead of two; fast
+     * checks can guard solely by config_opt_safety_checks and run in debug mode
+     * too.
+     */
+    true
+#else
+    false
+#endif
+    ;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index bfe556b5..1d1a414e 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -54,7 +54,7 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 /*
- * The caller can set write_cb and cbopaque to null to choose to print with the
+ * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
 void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 8358bffb..8ba8a1e1 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_B_H
 
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -71,7 +72,7 @@ prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 JEMALLOC_ALWAYS_INLINE void
 prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) { 
+    nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
new file mode 100644
index 00000000..53339ac1
--- /dev/null
+++ b/include/jemalloc/internal/safety_check.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
+#define JEMALLOC_INTERNAL_SAFETY_CHECK_H
+
+void safety_check_fail(const char *format, ...);
+/* Can set to NULL for a default. */
+void safety_check_set_abort(void (*abort_fn)());
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
+	assert(usize < bumped_usize);
+	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+		*((unsigned char *)ptr + i) = 0xBC;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
+{
+	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+		if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
+			safety_check_fail("Use after free error\n");
+		}
+	}
+}
+
+#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index ef0a4512..9a099d8b 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -18,7 +18,7 @@
  * each one covers allocations for base / SC_NGROUP possible allocation sizes.
  * We call that value (base / SC_NGROUP) the delta of the group. Each size class
  * is delta larger than the one before it (including the initial size class in a
- * group, which is delta large than 2**base, the largest size class in the
+ * group, which is delta larger than base, the largest size class in the
  * previous group).
  * To make the math all work out nicely, we require that SC_NGROUP is a power of
  * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of
@@ -53,10 +53,11 @@
  * classes; one per power of two, up until we hit the quantum size. There are
  * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes.
  *
- * Next, we have a size class of size LG_QUANTUM. This can't be the start of a
- * group in the sense we described above (covering a power of two range) since,
- * if we divided into it to pick a value of delta, we'd get a delta smaller than
- * (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which is against the rules.
+ * Next, we have a size class of size (1 << LG_QUANTUM).  This can't be the
+ * start of a group in the sense we described above (covering a power of two
+ * range) since, if we divided into it to pick a value of delta, we'd get a
+ * delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which
+ * is against the rules.
  *
  * The first base we can divide by SC_NGROUP while still being at least
  * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by
@@ -196,7 +197,7 @@
     (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
- /* The number of size classes that are a multiple of the page size. */
+/* The number of size classes that are a multiple of the page size. */
 #define SC_NPSIZES (							\
     /* Start with all the size classes. */				\
     SC_NSIZES								\
@@ -206,8 +207,20 @@
     - SC_NPSEUDO							\
     /* And the tiny group. */						\
     - SC_NTINY								\
-    /* Groups where ndelta*delta is not a multiple of the page size. */	\
-    - (2 * (SC_NGROUP)))
+    /* Sizes where ndelta*delta is not a multiple of the page size. */	\
+    - (SC_LG_NGROUP * SC_NGROUP))
+/*
+ * Note that the last line is computed as the sum of the second column in the
+ * following table:
+ *                      lg(base) | count of sizes to exclude
+ * ------------------------------|-----------------------------
+ *                   LG_PAGE - 1 | SC_NGROUP - 1
+ *                       LG_PAGE | SC_NGROUP - 1
+ *                   LG_PAGE + 1 | SC_NGROUP - 2
+ *                   LG_PAGE + 2 | SC_NGROUP - 4
+ *                           ... | ...
+ *  LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
+ */
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index bf8801ef..65852d5c 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,8 +3,10 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
-extern __thread tsd_t tsd_tls;
-extern __thread bool tsd_initialized;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
+extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized;
 extern bool tsd_booted;
 
 /* Initialization/cleanup. */
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index f4f165c7..7d6c805b 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
-extern __thread tsd_t tsd_tls;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
 extern pthread_key_t tsd_tsd;
 extern bool tsd_booted;
 
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 6d89435c..11c39181 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -4,6 +4,9 @@
 /* Defined if alloc_size attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
 
+/* Defined if format_arg(...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_ARG
+
 /* Defined if format(gnu_printf, ...) attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
 
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index a00ce11a..59e29558 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -69,6 +69,7 @@
 #      define JEMALLOC_EXPORT __declspec(dllimport)
 #    endif
 #  endif
+#  define JEMALLOC_FORMAT_ARG(i)
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
 #  define JEMALLOC_NOINLINE __declspec(noinline)
 #  ifdef __cplusplus
@@ -96,6 +97,11 @@
 #  ifndef JEMALLOC_EXPORT
 #    define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG
+#    define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3))
+#  else
+#    define JEMALLOC_FORMAT_ARG(i)
+#  endif
 #  ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
 #    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
 #  elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ddc6781c..228e8be0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
@@ -346,4 +347,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1dcf4ed5..d839515b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -100,5 +100,8 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 21481d5e..edcceede 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
@@ -346,4 +347,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 466dc63f..6df72601 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -103,5 +103,8 @@
     <ClCompile Include="..\..\..\..\src\test_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 5052b3e0..a414f812 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -40,6 +40,7 @@ possible_config_opts = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
+    '--enable-opt-safety-checks',
 ]
 if bits_64:
     possible_config_opts.append('--with-lg-vaddr=56')
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 65b0b67c..f1478c62 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -46,6 +46,7 @@ configure_flag_unusuals = [
     '--enable-prof',
     '--disable-stats',
     '--disable-libdl',
+    '--enable-opt-safety-checks',
 ]
 
 malloc_conf_unusuals = [
diff --git a/src/arena.c b/src/arena.c
index 60eac232..ba50e410 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/util.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -131,6 +132,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    extents_npages_get(&arena->extents_dirty) +
 	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
+	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
@@ -150,6 +153,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		arena_stats_accum_u64(&astats->nrequests_large,
 		    nmalloc + nrequests);
 
+		/* nfill == nmalloc for large currently. */
+		arena_stats_accum_u64(&lstats[i].nfills, nmalloc);
+		arena_stats_accum_u64(&astats->nfills_large, nmalloc);
+
+		uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats,
+		    &arena->stats.lstats[i].nflushes);
+		arena_stats_accum_u64(&lstats[i].nflushes, nflush);
+		arena_stats_accum_u64(&astats->nflushes_large, nflush);
+
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
@@ -1001,11 +1013,17 @@ static void
 arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs++;
+	}
 }
 
 static void
 arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
 	extent_heap_remove(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs--;
+	}
 }
 
 static extent_t *
@@ -1016,6 +1034,7 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 	}
 	if (config_stats) {
 		bin->stats.reslabs++;
+		bin->stats.nonfull_slabs--;
 	}
 	return slab;
 }
@@ -1531,12 +1550,16 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 	assert(usize <= SC_SMALL_MAXCLASS);
 
+	if (config_opt_safety_checks) {
+		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
+	}
+
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1577,10 +1600,19 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	assert(opt_prof);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	size_t usize = arena_prof_demote(tsdn, extent, ptr);
-	if (usize <= tcache_maxclass) {
+	size_t usize = extent_usize_get(extent);
+	size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr);
+	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
+		/*
+		 * Currently, we only do redzoning for small sampled
+		 * allocations.
+		 */
+		assert(bumped_usize == SC_LARGE_MINCLASS);
+		safety_check_verify_redzone(ptr, usize, bumped_usize);
+	}
+	if (bumped_usize <= tcache_maxclass && tcache != NULL) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(usize), slow_path);
+		    sz_size2index(bumped_usize), slow_path);
 	} else {
 		large_dalloc(tsdn, extent);
 	}
diff --git a/src/background_thread.c b/src/background_thread.c
index 5ed6c1c9..57b9b256 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -799,7 +799,13 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	nstime_init(&stats->run_interval, 0);
 	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
-		malloc_mutex_lock(tsdn, &info->mtx);
+		if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+			/*
+			 * Each background thread run may take a long time;
+			 * avoid waiting on the stats if the thread is active.
+			 */
+			continue;
+		}
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
diff --git a/src/ctl.c b/src/ctl.c
index 09310a9d..48afaa61 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -72,6 +72,7 @@ CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
 CTL_PROTO(config_malloc_conf)
+CTL_PROTO(config_opt_safety_checks)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
@@ -80,6 +81,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -155,10 +157,14 @@ CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
 CTL_PROTO(stats_arenas_i_small_nrequests)
+CTL_PROTO(stats_arenas_i_small_nfills)
+CTL_PROTO(stats_arenas_i_small_nflushes)
 CTL_PROTO(stats_arenas_i_large_allocated)
 CTL_PROTO(stats_arenas_i_large_nmalloc)
 CTL_PROTO(stats_arenas_i_large_ndalloc)
 CTL_PROTO(stats_arenas_i_large_nrequests)
+CTL_PROTO(stats_arenas_i_large_nfills)
+CTL_PROTO(stats_arenas_i_large_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
 CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
 CTL_PROTO(stats_arenas_i_bins_j_nrequests)
@@ -168,6 +174,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
+CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -203,6 +210,7 @@ CTL_PROTO(stats_arenas_i_internal)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
+CTL_PROTO(stats_arenas_i_abandoned_vm)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -216,6 +224,10 @@ CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_utilization_query)
+CTL_PROTO(experimental_utilization_batch_query)
+CTL_PROTO(experimental_arenas_i_pactivep)
+INDEX_PROTO(experimental_arenas_i)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -284,6 +296,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("fill"),		CTL(config_fill)},
 	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
 	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
+	{NAME("opt_safety_checks"),	CTL(config_opt_safety_checks)},
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
@@ -295,6 +308,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -409,14 +423,18 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_small_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)}
+	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)},
+	{NAME("nfills"),	CTL(stats_arenas_i_small_nfills)},
+	{NAME("nflushes"),	CTL(stats_arenas_i_small_nflushes)}
 };
 
 static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
+	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)},
+	{NAME("nfills"),	CTL(stats_arenas_i_large_nfills)},
+	{NAME("nflushes"),	CTL(stats_arenas_i_large_nflushes)}
 };
 
 #define MUTEX_PROF_DATA_NODE(prefix)					\
@@ -450,6 +468,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
+	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
 	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
 };
 
@@ -525,6 +544,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
+	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -572,13 +592,31 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
-static const ctl_named_node_t hooks_node[] = {
+static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)},
+	{NAME("remove"),	CTL(experimental_hooks_remove)}
+};
+
+static const ctl_named_node_t experimental_utilization_node[] = {
+	{NAME("query"),		CTL(experimental_utilization_query)},
+	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
+};
+
+static const ctl_named_node_t experimental_arenas_i_node[] = {
+	{NAME("pactivep"),	CTL(experimental_arenas_i_pactivep)}
+};
+static const ctl_named_node_t super_experimental_arenas_i_node[] = {
+	{NAME(""),		CHILD(named, experimental_arenas_i)}
+};
+
+static const ctl_indexed_node_t experimental_arenas_node[] = {
+	{INDEX(experimental_arenas_i)}
 };
 
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, hooks)}
+	{NAME("hooks"),		CHILD(named, experimental_hooks)},
+	{NAME("utilization"),	CHILD(named, experimental_utilization)},
+	{NAME("arenas"),	CHILD(indexed, experimental_arenas)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -742,6 +780,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->nmalloc_small = 0;
 		ctl_arena->astats->ndalloc_small = 0;
 		ctl_arena->astats->nrequests_small = 0;
+		ctl_arena->astats->nfills_small = 0;
+		ctl_arena->astats->nflushes_small = 0;
 		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
 		    sizeof(bin_stats_t));
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
@@ -773,6 +813,10 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 			    ctl_arena->astats->bstats[i].ndalloc;
 			ctl_arena->astats->nrequests_small +=
 			    ctl_arena->astats->bstats[i].nrequests;
+			ctl_arena->astats->nfills_small +=
+			    ctl_arena->astats->bstats[i].nfills;
+			ctl_arena->astats->nflushes_small +=
+			    ctl_arena->astats->bstats[i].nflushes;
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
@@ -855,6 +899,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nmalloc_small += astats->nmalloc_small;
 		sdstats->ndalloc_small += astats->ndalloc_small;
 		sdstats->nrequests_small += astats->nrequests_small;
+		sdstats->nfills_small += astats->nfills_small;
+		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.allocated_large,
@@ -869,6 +915,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.ndalloc_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
+		accum_atomic_zu(&sdstats->astats.abandoned_vm,
+		    &astats->astats.abandoned_vm);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
@@ -897,8 +945,11 @@ MUTEX_PROF_ARENA_MUTEXES
 			if (!destroyed) {
 				sdstats->bstats[i].curslabs +=
 				    astats->bstats[i].curslabs;
+				sdstats->bstats[i].nonfull_slabs +=
+				    astats->bstats[i].nonfull_slabs;
 			} else {
 				assert(astats->bstats[i].curslabs == 0);
+				assert(astats->bstats[i].nonfull_slabs == 0);
 			}
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -1698,6 +1749,7 @@ CTL_RO_CONFIG_GEN(config_debug, bool)
 CTL_RO_CONFIG_GEN(config_fill, bool)
 CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
 CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
+CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
@@ -1709,6 +1761,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
@@ -2714,7 +2767,7 @@ static int
 prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	
+
 	const char *filename = NULL;
 
 	if (!config_prof) {
@@ -2726,7 +2779,7 @@ prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	if (prof_log_start(tsd_tsdn(tsd), filename)) {
 		ret = EFAULT;
-		goto label_return; 
+		goto label_return;
 	}
 
 	ret = 0;
@@ -2822,6 +2875,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.abandoned_vm,
+    ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
@@ -2831,6 +2887,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
     arenas_i(mib[2])->astats->ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     arenas_i(mib[2])->astats->nrequests_small, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills,
+    arenas_i(mib[2])->astats->nfills_small, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes,
+    arenas_i(mib[2])->astats->nflushes_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
@@ -2840,12 +2900,19 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
     ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
-/*
- * Note: "nmalloc" here instead of "nrequests" in the read.  This is intentional.
- */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     ctl_arena_stats_read_u64(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */
+    &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
+/*
+ * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
+ * intentional (large has no batch fill).
+ */
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\
@@ -2955,6 +3022,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
+    arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
@@ -3020,15 +3089,23 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+static bool
+ctl_arenas_i_verify(size_t i) {
+	size_t a = arenas_i2a_impl(i, true, true);
+	if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
+		return true;
+	}
+
+	return false;
+}
+
 static const ctl_named_node_t *
 stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
-	size_t a;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	a = arenas_i2a_impl(i, true, true);
-	if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
+	if (ctl_arenas_i_verify(i)) {
 		ret = NULL;
 		goto label_return;
 	}
@@ -3083,3 +3160,276 @@ experimental_hooks_remove_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 label_return:
 	return ret;
 }
+
+/*
+ * Output six memory utilization entries for an input pointer, the first one of
+ * type (void *) and the remaining five of type size_t, describing the following
+ * (in the same order):
+ *
+ * (a) memory address of the extent a potential reallocation would go into,
+ * == the five fields below describe about the extent the pointer resides in ==
+ * (b) number of free regions in the extent,
+ * (c) number of regions in the extent,
+ * (d) size of the extent in terms of bytes,
+ * (e) total number of free regions in the bin the extent belongs to, and
+ * (f) total number of regions in the bin the extent belongs to.
+ *
+ * Note that "(e)" and "(f)" are only available when stats are enabled;
+ * otherwise their values are undefined.
+ *
+ * This API is mainly intended for small class allocations, where extents are
+ * used as slab.
+ *
+ * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)"
+ * will be zero (if stats are enabled; otherwise undefined).  The other three
+ * fields will be properly set though the values are trivial: "(b)" will be 0,
+ * "(c)" will be 1, and "(d)" will be the usable size.
+ *
+ * The input pointer and size are respectively passed in by newp and newlen,
+ * and the output fields and size are respectively oldp and *oldlenp.
+ *
+ * It can be beneficial to define the following macros to make it easier to
+ * access the output:
+ *
+ * #define SLABCUR_READ(out) (*(void **)out)
+ * #define COUNTS(out) ((size_t *)((void **)out + 1))
+ * #define NFREE_READ(out) COUNTS(out)[0]
+ * #define NREGS_READ(out) COUNTS(out)[1]
+ * #define SIZE_READ(out) COUNTS(out)[2]
+ * #define BIN_NFREE_READ(out) COUNTS(out)[3]
+ * #define BIN_NREGS_READ(out) COUNTS(out)[4]
+ *
+ * and then write e.g. NFREE_READ(oldp) to fetch the output.  See the unit test
+ * test_query in test/unit/extent_util.c for an example.
+ *
+ * For a typical defragmentation workflow making use of this API for
+ * understanding the fragmentation level, please refer to the comment for
+ * experimental_utilization_batch_query_ctl.
+ *
+ * It's up to the application how to determine the significance of
+ * fragmentation relying on the outputs returned.  Possible choices are:
+ *
+ * (a) if extent utilization ratio is below certain threshold,
+ * (b) if extent memory consumption is above certain threshold,
+ * (c) if extent utilization ratio is significantly below bin utilization ratio,
+ * (d) if input pointer deviates a lot from potential reallocation address, or
+ * (e) some selection/combination of the above.
+ *
+ * The caller needs to make sure that the input/output arguments are valid,
+ * in particular, that the size of the output is correct, i.e.:
+ *
+ *     *oldlenp = sizeof(void *) + sizeof(size_t) * 5
+ *
+ * Otherwise, the function immediately returns EINVAL without touching anything.
+ *
+ * In the rare case where there's no associated extent found for the input
+ * pointer, the function zeros out all output fields and return.  Please refer
+ * to the comment for experimental_utilization_batch_query_ctl to understand the
+ * motivation from C++.
+ */
+static int
+experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	assert(sizeof(extent_util_stats_verbose_t)
+	    == sizeof(void *) + sizeof(size_t) * 5);
+
+	if (oldp == NULL || oldlenp == NULL
+	    || *oldlenp != sizeof(extent_util_stats_verbose_t)
+	    || newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	void *ptr = NULL;
+	WRITE(ptr, void *);
+	extent_util_stats_verbose_t *util_stats
+	    = (extent_util_stats_verbose_t *)oldp;
+	extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
+	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
+	    &util_stats->bin_nfree, &util_stats->bin_nregs,
+	    &util_stats->slabcur_addr);
+	ret = 0;
+
+label_return:
+	return ret;
+}
+
+/*
+ * Given an input array of pointers, output three memory utilization entries of
+ * type size_t for each input pointer about the extent it resides in:
+ *
+ * (a) number of free regions in the extent,
+ * (b) number of regions in the extent, and
+ * (c) size of the extent in terms of bytes.
+ *
+ * This API is mainly intended for small class allocations, where extents are
+ * used as slab.  In case of large class allocations, the outputs are trivial:
+ * "(a)" will be 0, "(b)" will be 1, and "(c)" will be the usable size.
+ *
+ * Note that multiple input pointers may reside on a same extent so the output
+ * fields may contain duplicates.
+ *
+ * The format of the input/output looks like:
+ *
+ * input[0]:  1st_pointer_to_query	|  output[0]: 1st_extent_n_free_regions
+ *					|  output[1]: 1st_extent_n_regions
+ *					|  output[2]: 1st_extent_size
+ * input[1]:  2nd_pointer_to_query	|  output[3]: 2nd_extent_n_free_regions
+ *					|  output[4]: 2nd_extent_n_regions
+ *					|  output[5]: 2nd_extent_size
+ * ...					|  ...
+ *
+ * The input array and size are respectively passed in by newp and newlen, and
+ * the output array and size are respectively oldp and *oldlenp.
+ *
+ * It can be beneficial to define the following macros to make it easier to
+ * access the output:
+ *
+ * #define NFREE_READ(out, i) out[(i) * 3]
+ * #define NREGS_READ(out, i) out[(i) * 3 + 1]
+ * #define SIZE_READ(out, i) out[(i) * 3 + 2]
+ *
+ * and then write e.g. NFREE_READ(oldp, i) to fetch the output.  See the unit
+ * test test_batch in test/unit/extent_util.c for a concrete example.
+ *
+ * A typical workflow would be composed of the following steps:
+ *
+ * (1) flush tcache: mallctl("thread.tcache.flush", ...)
+ * (2) initialize input array of pointers to query fragmentation
+ * (3) allocate output array to hold utilization statistics
+ * (4) query utilization: mallctl("experimental.utilization.batch_query", ...)
+ * (5) (optional) decide if it's worthwhile to defragment; otherwise stop here
+ * (6) disable tcache: mallctl("thread.tcache.enabled", ...)
+ * (7) defragment allocations with significant fragmentation, e.g.:
+ *         for each allocation {
+ *             if it's fragmented {
+ *                 malloc(...);
+ *                 memcpy(...);
+ *                 free(...);
+ *             }
+ *         }
+ * (8) enable tcache: mallctl("thread.tcache.enabled", ...)
+ *
+ * The application can determine the significance of fragmentation themselves
+ * relying on the statistics returned, both at the overall level i.e. step "(5)"
+ * and at individual allocation level i.e. within step "(7)".  Possible choices
+ * are:
+ *
+ * (a) whether memory utilization ratio is below certain threshold,
+ * (b) whether memory consumption is above certain threshold, or
+ * (c) some combination of the two.
+ *
+ * The caller needs to make sure that the input/output arrays are valid and
+ * their sizes are proper as well as matched, meaning:
+ *
+ * (a) newlen = n_pointers * sizeof(const void *)
+ * (b) *oldlenp = n_pointers * sizeof(size_t) * 3
+ * (c) n_pointers > 0
+ *
+ * Otherwise, the function immediately returns EINVAL without touching anything.
+ *
+ * In the rare case where there's no associated extent found for some pointers,
+ * rather than immediately terminating the computation and raising an error,
+ * the function simply zeros out the corresponding output fields and continues
+ * the computation until all input pointers are handled.  The motivations of
+ * such a design are as follows:
+ *
+ * (a) The function always either processes nothing or processes everything, and
+ * never leaves the output half touched and half untouched.
+ *
+ * (b) It facilitates usage needs especially common in C++.  A vast variety of
+ * C++ objects are instantiated with multiple dynamic memory allocations.  For
+ * example, std::string and std::vector typically use at least two allocations,
+ * one for the metadata and one for the actual content.  Other types may use
+ * even more allocations.  When inquiring about utilization statistics, the
+ * caller often wants to examine into all such allocations, especially internal
+ * one(s), rather than just the topmost one.  The issue comes when some
+ * implementations do certain optimizations to reduce/aggregate some internal
+ * allocations, e.g. putting short strings directly into the metadata, and such
+ * decisions are not known to the caller.  Therefore, we permit pointers to
+ * memory usages that may not be returned by previous malloc calls, and we
+ * provide the caller a convenient way to identify such cases.
+ */
+static int
+experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3);
+
+	const size_t len = newlen / sizeof(const void *);
+	if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0
+	    || newlen != len * sizeof(const void *)
+	    || *oldlenp != len * sizeof(extent_util_stats_t)) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	void **ptrs = (void **)newp;
+	extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp;
+	size_t i;
+	for (i = 0; i < len; ++i) {
+		extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
+		    &util_stats[i].nfree, &util_stats[i].nregs,
+		    &util_stats[i].size);
+	}
+	ret = 0;
+
+label_return:
+	return ret;
+}
+
+static const ctl_named_node_t *
+experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t i) {
+	const ctl_named_node_t *ret;
+
+	malloc_mutex_lock(tsdn, &ctl_mtx);
+	if (ctl_arenas_i_verify(i)) {
+		ret = NULL;
+		goto label_return;
+	}
+	ret = super_experimental_arenas_i_node;
+label_return:
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
+	return ret;
+}
+
+static int
+experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	if (!config_stats) {
+		return ENOENT;
+	}
+	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(size_t *)) {
+		return EINVAL;
+	}
+
+	unsigned arena_ind;
+	arena_t *arena;
+	int ret;
+	size_t *pactivep;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	READONLY();
+	MIB_UNSIGNED(arena_ind, 2);
+	if (arena_ind < narenas_total_get() && (arena =
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
+    defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
+		/* Expose the underlying counter for fast read. */
+		pactivep = (size_t *)&(arena->nactive.repr);
+		READ(pactivep, size_t *);
+		ret = 0;
+#else
+		ret = EFAULT;
+#endif
+	} else {
+		ret = EFAULT;
+	}
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
diff --git a/src/extent.c b/src/extent.c
index 62086c7d..9237f903 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -50,20 +50,16 @@ static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
-#endif
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-#endif
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained);
@@ -88,11 +84,9 @@ const extent_hooks_t	extent_hooks_default = {
 	,
 	NULL
 #endif
-#ifdef JEMALLOC_MAPS_COALESCE
 	,
 	extent_split_default,
 	extent_merge_default
-#endif
 };
 
 /* Used exclusively for gdump triggering. */
@@ -441,30 +435,6 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
 	return NULL;
 }
 
-/* Do any-best-fit extent selection, i.e. select any extent that best fits. */
-static extent_t *
-extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    size_t size) {
-	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
-	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
-	    (size_t)pind);
-	if (i < SC_NPSIZES + 1) {
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large extents for much smaller sizes.
-		 */
-		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
-			return NULL;
-		}
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
-		assert(extent_size_get(extent) >= size);
-		return extent;
-	}
-
-	return NULL;
-}
-
 /*
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
@@ -475,6 +445,16 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	extent_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+
+	if (!maps_coalesce && !opt_retain) {
+		/*
+		 * No split / merge allowed (Windows w/o retain). Try exact fit
+		 * only.
+		 */
+		return extent_heap_empty(&extents->heaps[pind]) ? NULL :
+		    extent_heap_first(&extents->heaps[pind]);
+	}
+
 	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
 	    &extents_bitmap_info, (size_t)pind);
 	    i < SC_NPSIZES + 1;
@@ -483,6 +463,16 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large extents for much smaller sizes.
+		 *
+		 * Only do check for dirty extents (delay_coalesce).
+		 */
+		if (extents->delay_coalesce &&
+		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			break;
+		}
 		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
 			ret = extent;
 		}
@@ -496,10 +486,8 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 }
 
 /*
- * Do {best,first}-fit extent selection, where the selection policy choice is
- * based on extents->delay_coalesce.  Best-fit selection requires less
- * searching, but its layout policy is less stable and may cause higher virtual
- * memory fragmentation as a side effect.
+ * Do first-fit extent selection, where the selection policy choice is
+ * based on extents->delay_coalesce.
  */
 static extent_t *
 extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
@@ -512,8 +500,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		return NULL;
 	}
 
-	extent_t *extent = extents->delay_coalesce ?
-	    extents_best_fit_locked(tsdn, arena, extents, max_size) :
+	extent_t *extent =
 	    extents_first_fit_locked(tsdn, arena, extents, max_size);
 
 	if (alignment > PAGE && extent == NULL) {
@@ -640,16 +627,24 @@ label_return:
 	return extent;
 }
 
+/*
+ * This can only happen when we fail to allocate a new extent struct (which
+ * indicates OOM), e.g. when trying to split an existing extent.
+ */
 static void
-extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent, bool growing_retained) {
+	size_t sz = extent_size_get(extent);
+	if (config_stats) {
+		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (extents_state_get(extents) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
-		    extent, 0, extent_size_get(extent), growing_retained)) {
+		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
 			    extent, 0, extent_size_get(extent),
 			    growing_retained);
@@ -1073,6 +1068,17 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
+	if (!maps_coalesce && result != extent_split_interior_ok
+	    && !opt_retain) {
+		/*
+		 * Split isn't supported (implies Windows w/o retain).  Avoid
+		 * leaking the extents.
+		 */
+		assert(to_leak != NULL && lead == NULL && trail == NULL);
+		extent_deactivate(tsdn, arena, extents, to_leak);
+		return NULL;
+	}
+
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_deactivate(tsdn, arena, extents, lead);
@@ -1093,7 +1099,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -1256,7 +1262,7 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	assert(arena != NULL);
 
 	return extent_alloc_default_impl(tsdn, arena, new_addr, size,
-	    alignment, zero, commit);
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit);
 }
 
 static void
@@ -1338,15 +1344,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 	extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true);
+	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
 	}
 
 	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent, true);
+		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
 	}
 
@@ -1393,7 +1398,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_leak(tsdn, arena, r_extent_hooks,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, to_leak, true);
 		}
 		goto label_err;
@@ -1493,14 +1498,15 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	void *addr;
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
-		    alignment, zero, commit);
+		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
-		    esize, alignment, zero, commit, arena_ind_get(arena));
+		    esize, palignment, zero, commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
@@ -1509,13 +1515,12 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	extent_init(extent, arena, addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true);
+	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
 	if (extent_register(tsdn, extent)) {
-		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent, false);
+		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
 
@@ -1738,8 +1743,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, extent)) {
-		extents_leak(tsdn, arena, &extent_hooks,
-		    &arena->extents_retained, extent, false);
+		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
@@ -2059,13 +2063,20 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    offset, length, false);
 }
 
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
-	return !maps_coalesce;
+	if (!maps_coalesce) {
+		/*
+		 * Without retain, only whole regions can be purged (required by
+		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
+		 * comments in extent_head_no_merge().
+		 */
+		return !opt_retain;
+	}
+
+	return false;
 }
-#endif
 
 /*
  * Accepts the extent to split, and the characteristics of each side of the
@@ -2097,7 +2108,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
 	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_dumpable_get(extent));
+	    extent_committed_get(extent), extent_dumpable_get(extent),
+	    EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -2108,7 +2120,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent));
+		    extent_committed_get(extent), extent_dumpable_get(extent),
+		    EXTENT_NOT_HEAD);
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
 		    true, &lead_elm_a, &lead_elm_b);
@@ -2166,7 +2179,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 static bool
 extent_merge_default_impl(void *addr_a, void *addr_b) {
-	if (!maps_coalesce) {
+	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
@@ -2176,13 +2189,51 @@ extent_merge_default_impl(void *addr_a, void *addr_b) {
 	return false;
 }
 
-#ifdef JEMALLOC_MAPS_COALESCE
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+static bool
+extent_head_no_merge(extent_t *a, extent_t *b) {
+	assert(extent_base_get(a) < extent_base_get(b));
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (extent_is_head_get(b)) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(extent_sn_comp(a, b) != 0);
+		return true;
+	}
+	assert(extent_sn_comp(a, b) == 0);
+
+	return false;
+}
+
 static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	if (!maps_coalesce) {
+		tsdn_t *tsdn = tsdn_fetch();
+		extent_t *a = iealloc(tsdn, addr_a);
+		extent_t *b = iealloc(tsdn, addr_b);
+		if (extent_head_no_merge(a, b)) {
+			return true;
+		}
+	}
 	return extent_merge_default_impl(addr_a, addr_b);
 }
-#endif
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
@@ -2190,10 +2241,11 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(extent_base_get(a) < extent_base_get(b));
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	if ((*r_extent_hooks)->merge == NULL) {
+	if ((*r_extent_hooks)->merge == NULL || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
@@ -2280,3 +2332,72 @@ extent_boot(void) {
 
 	return false;
 }
+
+void
+extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = extent_arena_get(extent);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	*slabcur_addr = extent_addr_get(bin->slabcur);
+	assert(*slabcur_addr != NULL);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 6c56cf65..85817891 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -113,7 +113,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 	cassert(have_dss);
 	assert(size > 0);
-	assert(alignment > 0);
+	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
@@ -156,7 +156,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_init(gap, arena, gap_addr_page,
 				    gap_size_page, false, SC_NSIZES,
 				    arena_extent_sn_next(arena),
-				    extent_state_active, false, true, true);
+				    extent_state_active, false, true, true,
+				    EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -200,7 +201,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_init(&extent, arena, ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    true);
+					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 8d607dc8..17fd1c8f 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -21,8 +21,8 @@ bool	opt_retain =
 void *
 extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit) {
-	void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
-	    PAGE), commit);
+	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
+	void *ret = pages_map(new_addr, size, alignment, commit);
 	if (ret == NULL) {
 		return NULL;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c8afa9c4..ed13718d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -13,6 +13,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
@@ -42,6 +43,8 @@ bool	opt_abort_conf =
     false
 #endif
     ;
+/* Intentionally default off, even with debug builds. */
+bool	opt_confirm_conf = false;
 const char	*opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
@@ -928,93 +931,141 @@ malloc_slow_flag_init(void) {
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
+/* Number of sources for initializing malloc_conf */
+#define MALLOC_CONF_NSOURCES 4
+
+static const char *
+obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
+	if (config_debug) {
+		static unsigned read_source = 0;
+		/*
+		 * Each source should only be read once, to minimize # of
+		 * syscalls on init.
+		 */
+		assert(read_source++ == which_source);
+	}
+	assert(which_source < MALLOC_CONF_NSOURCES);
+
+	const char *ret;
+	switch (which_source) {
+	case 0:
+		ret = config_malloc_conf;
+		break;
+	case 1:
+		if (je_malloc_conf != NULL) {
+			/* Use options that were compiled into the program. */
+			ret = je_malloc_conf;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	case 2: {
+		ssize_t linklen = 0;
+#ifndef _WIN32
+		int saved_errno = errno;
+		const char *linkname =
+#  ifdef JEMALLOC_PREFIX
+		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
+#  else
+		    "/etc/malloc.conf"
+#  endif
+		    ;
+
+		/*
+		 * Try to use the contents of the "/etc/malloc.conf" symbolic
+		 * link's name.
+		 */
+#ifndef JEMALLOC_READLINKAT
+		linklen = readlink(linkname, buf, PATH_MAX);
+#else
+		linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX);
+#endif
+		if (linklen == -1) {
+			/* No configuration specified. */
+			linklen = 0;
+			/* Restore errno. */
+			set_errno(saved_errno);
+		}
+#endif
+		buf[linklen] = '\0';
+		ret = buf;
+		break;
+	} case 3: {
+		const char *envname =
+#ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX"MALLOC_CONF"
+#else
+		    "MALLOC_CONF"
+#endif
+		    ;
+
+		if ((ret = jemalloc_secure_getenv(envname)) != NULL) {
+			/*
+			 * Do nothing; opts is already initialized to the value
+			 * of the MALLOC_CONF environment variable.
+			 */
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	} default:
+		not_reached();
+		ret = NULL;
+	}
+	return ret;
+}
+
 static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
+    char buf[PATH_MAX + 1]) {
+	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
+		"string specified via --with-malloc-conf",
+		"string pointed to by the global variable malloc_conf",
+		"\"name\" of the file referenced by the symbolic link named "
+		    "/etc/malloc.conf",
+		"value of the environment variable MALLOC_CONF"
+	};
 	unsigned i;
-	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
 	size_t klen, vlen;
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
-		switch (i) {
-		case 0:
-			opts = config_malloc_conf;
-			break;
-		case 1:
-			if (je_malloc_conf != NULL) {
-				/*
-				 * Use options that were compiled into the
-				 * program.
-				 */
-				opts = je_malloc_conf;
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		case 2: {
-			ssize_t linklen = 0;
-#ifndef _WIN32
-			int saved_errno = errno;
-			const char *linkname =
-#  ifdef JEMALLOC_PREFIX
-			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#  else
-			    "/etc/malloc.conf"
-#  endif
-			    ;
-
-			/*
-			 * Try to use the contents of the "/etc/malloc.conf"
-			 * symbolic link's name.
-			 */
-#ifndef JEMALLOC_READLINKAT
-			linklen = readlink(linkname, buf, sizeof(buf) - 1);
-#else
-			linklen = readlinkat(AT_FDCWD, linkname, buf,
-			    sizeof(buf) - 1);
-#endif
-			if (linklen == -1) {
-				/* No configuration specified. */
-				linklen = 0;
-				/* Restore errno. */
-				set_errno(saved_errno);
-			}
-#endif
-			buf[linklen] = '\0';
-			opts = buf;
-			break;
-		} case 3: {
-			const char *envname =
-#ifdef JEMALLOC_PREFIX
-			    JEMALLOC_CPREFIX"MALLOC_CONF"
-#else
-			    "MALLOC_CONF"
-#endif
-			    ;
-
-			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
-				/*
-				 * Do nothing; opts is already initialized to
-				 * the value of the MALLOC_CONF environment
-				 * variable.
-				 */
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		} default:
-			not_reached();
-			buf[0] = '\0';
-			opts = buf;
+		if (initial_call) {
+			opts_cache[i] = obtain_malloc_conf(i, buf);
+		}
+		opts = opts_cache[i];
+		if (!initial_call && opt_confirm_conf) {
+			malloc_printf(
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n",
+			    i + 1, opts_explain[i], opts != NULL ? opts : "");
+		}
+		if (opts == NULL) {
+			continue;
 		}
 
 		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen)) {
+
+#define CONF_ERROR(msg, k, klen, v, vlen)				\
+			if (!initial_call) {				\
+				malloc_conf_error(			\
+				    msg, k, klen, v, vlen);		\
+				cur_opt_valid = false;			\
+			}
+#define CONF_CONTINUE	{						\
+				if (!initial_call && opt_confirm_conf	\
+				    && cur_opt_valid) {			\
+					malloc_printf("<jemalloc>: -- "	\
+					    "Set conf value: %.*s:%.*s"	\
+					    "\n", (int)klen, k,		\
+					    (int)vlen, v);		\
+				}					\
+				continue;				\
+			}
 #define CONF_MATCH(n)							\
 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
 #define CONF_MATCH_VALUE(n)						\
@@ -1026,11 +1077,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				} else if (CONF_MATCH_VALUE("false")) {	\
 					o = false;			\
 				} else {				\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
       /*
        * One of the CONF_MIN macros below expands, in one of the use points,
@@ -1040,10 +1090,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
       JEMALLOC_DIAGNOSTIC_PUSH
       JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 
-#define CONF_MIN_no(um, min)	false
-#define CONF_MIN_yes(um, min)	((um) < (min))
-#define CONF_MAX_no(um, max)	false
-#define CONF_MAX_yes(um, max)	((um) > (max))
+#define CONF_DONT_CHECK_MIN(um, min)	false
+#define CONF_CHECK_MIN(um, min)	((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max)	false
+#define CONF_CHECK_MAX(um, max)	((um) > (max))
 #define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
@@ -1053,26 +1103,21 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				um = malloc_strtoumax(v, &end, 0);	\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if (CONF_MIN_##check_min(um,	\
-					    (t)(min))) {		\
+					if (check_min(um, (t)(min))) {	\
 						o = (t)(min);		\
 					} else if (			\
-					    CONF_MAX_##check_max(um,	\
-					    (t)(max))) {		\
+					    check_max(um, (t)(max))) {	\
 						o = (t)(max);		\
 					} else {			\
 						o = (t)um;		\
 					}				\
 				} else {				\
-					if (CONF_MIN_##check_min(um,	\
-					    (t)(min)) ||		\
-					    CONF_MAX_##check_max(um,	\
-					    (t)(max))) {		\
-						malloc_conf_error(	\
+					if (check_min(um, (t)(min)) ||	\
+					    check_max(um, (t)(max))) {	\
+						CONF_ERROR(		\
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
@@ -1080,7 +1125,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						o = (t)um;		\
 					}				\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
 #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
@@ -1098,18 +1143,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				l = strtol(v, &end, 0);			\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (l < (ssize_t)(min) || l >	\
 				    (ssize_t)(max)) {			\
-					malloc_conf_error(		\
+					CONF_ERROR(			\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else {				\
 					o = l;				\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
 #define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
@@ -1118,7 +1162,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				    sizeof(o)-1;			\
 				strncpy(o, v, cpylen);			\
 				o[cpylen] = '\0';			\
-				continue;				\
+				CONF_CONTINUE;				\
+			}
+
+			bool cur_opt_valid = true;
+
+			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
+			if (initial_call) {
+				continue;
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
@@ -1135,10 +1186,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
@@ -1148,7 +1199,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
 						if (extent_dss_prec_set(i)) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "Error setting dss",
 							    k, klen, v, vlen);
 						} else {
@@ -1160,13 +1211,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, yes, no, false)
+			    UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    false)
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;
@@ -1180,14 +1232,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (err || bin_update_shard_size(
 					    bin_shard_sizes, size_start,
 					    size_end, nshards)) {
-						malloc_conf_error(
+						CONF_ERROR(
 						    "Invalid settings for "
 						    "bin_shards", k, klen, v,
 						    vlen);
 						break;
 					}
 				} while (vlen_left > 0);
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
@@ -1200,7 +1252,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
 				init_opt_stats_print_opts(v, vlen);
-				continue;
+				CONF_CONTINUE;
 			}
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
@@ -1221,11 +1273,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						opt_junk_alloc = false;
 						opt_junk_free = true;
 					} else {
-						malloc_conf_error(
-						    "Invalid conf value", k,
-						    klen, v, vlen);
+						CONF_ERROR(
+						    "Invalid conf value",
+						    k, klen, v, vlen);
 					}
-					continue;
+					CONF_CONTINUE;
 				}
 				CONF_HANDLE_BOOL(opt_zero, "zero")
 			}
@@ -1248,11 +1300,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			 * contention on the huge arena.
 			 */
 			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", 0, SC_LARGE_MAXCLASS, no, yes,
-			    false)
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), no, yes, false)
+			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, false)
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
@@ -1261,7 +1314,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(percpu_arena_mode_names[i],
 					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
@@ -1271,16 +1324,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_background_thread,
 			    "background_thread");
 			CONF_HANDLE_SIZE_T(opt_max_background_threads,
 					   "max_background_threads", 1,
-					   opt_max_background_threads, yes, yes,
+					   opt_max_background_threads,
+					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			if (CONF_MATCH("slab_sizes")) {
 				bool err;
@@ -1299,13 +1353,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						    sc_data, slab_start,
 						    slab_end, (int)pgs);
 					} else {
-						malloc_conf_error(
-						    "Invalid settings for "
-						    "slab_sizes", k, klen, v,
-						    vlen);
+						CONF_ERROR("Invalid settings "
+						    "for slab_sizes",
+						    k, klen, v, vlen);
 					}
 				} while (!err && vlen_left > 0);
-				continue;
+				CONF_CONTINUE;
 			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
@@ -1316,7 +1369,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
-				    - 1, no, yes, true)
+				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
@@ -1333,7 +1387,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					    vlen : sizeof(log_var_names) - 1);
 					strncpy(log_var_names, v, cpylen);
 					log_var_names[cpylen] = '\0';
-					continue;
+					CONF_CONTINUE;
 				}
 			}
 			if (CONF_MATCH("thp")) {
@@ -1342,7 +1396,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(thp_mode_names[i],v, vlen)
 					    == 0) {
 						if (!have_madvise_huge) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
 						}
@@ -1352,20 +1406,21 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
-			malloc_conf_error("Invalid conf pair", k, klen, v,
-			    vlen);
+			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
+#undef CONF_ERROR
+#undef CONF_CONTINUE
 #undef CONF_MATCH
 #undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
-#undef CONF_MIN_no
-#undef CONF_MIN_yes
-#undef CONF_MAX_no
-#undef CONF_MAX_yes
+#undef CONF_DONT_CHECK_MIN
+#undef CONF_CHECK_MIN
+#undef CONF_DONT_CHECK_MAX
+#undef CONF_CHECK_MAX
 #undef CONF_HANDLE_T_U
 #undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
@@ -1381,6 +1436,19 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
+static void
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL};
+	char buf[PATH_MAX + 1];
+
+	/* The first call only set the confirm_conf option and opts_cache */
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
+	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
+	    NULL);
+}
+
+#undef MALLOC_CONF_NSOURCES
+
 static bool
 malloc_init_hard_needed(void) {
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
@@ -1749,6 +1817,11 @@ struct static_opts_s {
 	/* Whether or not allocation size may overflow. */
 	bool may_overflow;
 
+	/*
+	 * Whether or not allocations (with alignment) of size 0 should be
+	 * treated as size 1.
+	 */
+	bool bump_empty_aligned_alloc;
 	/*
 	 * Whether to assert that allocations are not of size 0 (after any
 	 * bumping).
@@ -1790,6 +1863,7 @@ struct static_opts_s {
 JEMALLOC_ALWAYS_INLINE void
 static_opts_init(static_opts_t *static_opts) {
 	static_opts->may_overflow = false;
+	static_opts->bump_empty_aligned_alloc = false;
 	static_opts->assert_nonempty_alloc = false;
 	static_opts->null_out_result_on_error = false;
 	static_opts->set_errno_on_error = false;
@@ -1977,11 +2051,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		goto label_oom;
 	}
 
-	/* Validate the user input. */
-	if (sopts->assert_nonempty_alloc) {
-		assert (size != 0);
-	}
-
 	if (unlikely(dopts->alignment < sopts->min_alignment
 	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
@@ -2001,6 +2070,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			    <= SC_LARGE_MAXCLASS);
 		}
 	} else {
+		if (sopts->bump_empty_aligned_alloc) {
+			if (unlikely(size == 0)) {
+				size = 1;
+			}
+		}
 		usize = sz_sa2u(size, dopts->alignment);
 		dopts->usize = usize;
 		if (unlikely(usize == 0
@@ -2008,6 +2082,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			goto label_oom;
 		}
 	}
+	/* Validate the user input. */
+	if (sopts->assert_nonempty_alloc) {
+		assert (size != 0);
+	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
@@ -2323,6 +2401,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.min_alignment = sizeof(void *);
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
@@ -2363,6 +2442,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.min_alignment = 1;
@@ -2732,7 +2812,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 
 	alloc_ctx_t alloc_ctx;
-	/* 
+	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
 	 * detect sampled objects.  Otherwise addresses are
 	 * randomized, and we have to look it up in the rtree anyway.
@@ -2743,12 +2823,12 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
 						      rtree_ctx, (uintptr_t)ptr,
 						      &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != SC_NSIZES);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (!res || !alloc_ctx.slab) {
 			return false;
 		}
+		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
 		 * Check for both sizes that are too large, and for sampled objects.
@@ -3522,6 +3602,18 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.exit", "");
 }
 
+void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
+		size);
+
+	if (!free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, 0);
+	}
+
+	LOG("core.sdallocx.exit", "");
+}
+
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags) {
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index f0ceddae..da0441a7 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -128,14 +128,14 @@ operator delete(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, /*flags=*/0);
+	je_sdallocx_noflags(ptr, size);
 }
 
 void operator delete[](void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, /*flags=*/0);
+	je_sdallocx_noflags(ptr, size);
 }
 
 #endif  // __cpp_sized_deallocation
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 7bdc13f9..d7cb0f52 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -362,7 +362,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	}								\
 } while (0)
 #define GET_ARG_NUMERIC(val, len) do {					\
-	switch (len) {							\
+	switch ((unsigned char)len) {					\
 	case '?':							\
 		val = va_arg(ap, int);					\
 		break;							\
@@ -632,7 +632,6 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
 		 */
 		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
 		    wrtmessage;
-		cbopaque = NULL;
 	}
 
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
diff --git a/src/prof.c b/src/prof.c
index 4d7d65db..13334cb4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -125,7 +125,7 @@ struct prof_thr_node_s {
 	uint64_t thr_uid;
 	/* Variable size based on thr_name_sz. */
 	char name[1];
-}; 
+};
 
 typedef struct prof_alloc_node_s prof_alloc_node_t;
 
@@ -388,7 +388,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 
 		new_node->next = NULL;
 		new_node->index = log_bt_index;
-		/* 
+		/*
 		 * Copy the backtrace: bt is inside a tdata or gctx, which
 		 * might die before prof_log_stop is called.
 		 */
@@ -402,7 +402,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 	} else {
 		return node->index;
 	}
-} 
+}
 static size_t
 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	assert(prof_logging_state == prof_logging_state_started);
@@ -452,7 +452,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		 * it's being destroyed).
 		 */
 		return;
-	}	
+	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
 
@@ -514,11 +514,11 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	}
 
 label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);	
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, 
+prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
@@ -1292,7 +1292,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 			}
 		}
 
-		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
 			/* Finish writing. */
 			n = slen - i;
 		} else {
@@ -1303,6 +1303,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 		prof_dump_buf_end += n;
 		i += n;
 	}
+	assert(i == slen);
 
 	return false;
 }
@@ -2604,8 +2605,8 @@ static void
 prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 	emitter_json_array_kv_begin(emitter, "stack_traces");
 	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node; 
-	/* 
+	prof_bt_node_t *bt_old_node;
+	/*
 	 * Calculate how many hex digits we need: twice number of bytes, two for
 	 * "0x", and then one more for terminating '\0'.
 	 */
@@ -2744,12 +2745,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
 	    (void *)(&arg));
 
-	emitter_json_object_begin(&emitter);
+	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
 	prof_log_emit_threads(tsd, &emitter);
 	prof_log_emit_traces(tsd, &emitter);
 	prof_log_emit_allocs(tsd, &emitter);
-	emitter_json_object_end(&emitter);
+	emitter_end(&emitter);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/safety_check.c b/src/safety_check.c
new file mode 100644
index 00000000..804155dc
--- /dev/null
+++ b/src/safety_check.c
@@ -0,0 +1,24 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+static void (*safety_check_abort)(const char *message);
+
+void safety_check_set_abort(void (*abort_fn)(const char *)) {
+	safety_check_abort = abort_fn;
+}
+
+void safety_check_fail(const char *format, ...) {
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	va_list ap;
+	va_start(ap, format);
+	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
+	va_end(ap);
+
+	if (safety_check_abort == NULL) {
+		malloc_write(buf);
+		abort();
+	} else {
+		safety_check_abort(buf);
+	}
+}
diff --git a/src/stats.c b/src/stats.c
index 4c427e0d..118e05d2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -294,6 +294,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	COL_HDR(row, nshards, NULL, right, 9, unsigned)
 	COL_HDR(row, curregs, NULL, right, 13, size)
 	COL_HDR(row, curslabs, NULL, right, 13, size)
+	COL_HDR(row, nonfull_slabs, NULL, right, 15, size)
 	COL_HDR(row, regs, NULL, right, 5, unsigned)
 	COL_HDR(row, pgs, NULL, right, 4, size)
 	/* To buffer a right- and left-justified column. */
@@ -337,6 +338,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
 		size_t curslabs;
+		size_t nonfull_slabs;
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
@@ -372,6 +374,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs,
+		    size_t);
 
 		if (mutex) {
 			mutex_stats_read_arena_bin(i, j, col_mutex64,
@@ -395,6 +399,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    &nreslabs);
 		emitter_json_kv(emitter, "curslabs", emitter_type_size,
 		    &curslabs);
+		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
+		    &nonfull_slabs);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(emitter, NULL, col_mutex64,
@@ -434,6 +440,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
+		col_nonfull_slabs.size_val = nonfull_slabs;
 		col_regs.unsigned_val = nregs;
 		col_pgs.size_val = slab_size / page;
 		col_util.str_val = util;
@@ -661,10 +668,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
-	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+	uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills,
+	    small_nflushes;
 	size_t large_allocated;
-	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
-	size_t tcache_bytes;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
+	    large_nflushes;
+	size_t tcache_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -821,11 +830,23 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	COL(alloc_count_row, count_nrequests_ps, right, 10, title);
 	col_count_nrequests_ps.str_val = "(#/sec)";
 
+	COL(alloc_count_row, count_nfills, right, 16, title);
+	col_count_nfills.str_val = "nfill";
+	COL(alloc_count_row, count_nfills_ps, right, 10, title);
+	col_count_nfills_ps.str_val = "(#/sec)";
+
+	COL(alloc_count_row, count_nflushes, right, 16, title);
+	col_count_nflushes.str_val = "nflush";
+	COL(alloc_count_row, count_nflushes_ps, right, 10, title);
+	col_count_nflushes_ps.str_val = "(#/sec)";
+
 	emitter_table_row(emitter, &alloc_count_row);
 
 	col_count_nmalloc_ps.type = emitter_type_uint64;
 	col_count_ndalloc_ps.type = emitter_type_uint64;
 	col_count_nrequests_ps.type = emitter_type_uint64;
+	col_count_nfills_ps.type = emitter_type_uint64;
+	col_count_nflushes_ps.type = emitter_type_uint64;
 
 #define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
 	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
@@ -848,6 +869,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64)
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64)
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
@@ -865,6 +892,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64)
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64)
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -877,12 +910,18 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
 	col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+	col_count_nfills.uint64_val = small_nfills + large_nfills;
+	col_count_nflushes.uint64_val = small_nflushes + large_nflushes;
 	col_count_nmalloc_ps.uint64_val =
 	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
 	col_count_ndalloc_ps.uint64_val =
 	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
@@ -924,6 +963,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(resident)
+	GET_AND_EMIT_MEM_STAT(abandoned_vm)
 	GET_AND_EMIT_MEM_STAT(extent_avail)
 #undef GET_AND_EMIT_MEM_STAT
 
@@ -976,6 +1016,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "malloc_conf", "config.malloc_conf",
 	    emitter_type_string, &config_malloc_conf);
 
+	CONFIG_WRITE_BOOL(opt_safety_checks);
 	CONFIG_WRITE_BOOL(prof);
 	CONFIG_WRITE_BOOL(prof_libgcc);
 	CONFIG_WRITE_BOOL(prof_libunwind);
@@ -1025,6 +1066,7 @@ stats_general_print(emitter_t *emitter) {
 
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
+	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/src/tcache.c b/src/tcache.c
index e7b970d9..50099a9f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 
 /******************************************************************************/
@@ -101,7 +102,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 }
 
 /* Enabled with --enable-extra-size-check. */
-#ifdef JEMALLOC_EXTRA_SIZE_CHECK
 static void
 tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
     size_t nflush, extent_t **extents){
@@ -123,13 +123,12 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
-		malloc_printf("<jemalloc>: size mismatch in thread cache "
+		safety_check_fail("<jemalloc>: size mismatch in thread cache "
 		    "detected, likely caused by sized deallocation bugs by "
 		    "application. Abort.\n");
 		abort();
 	}
 }
-#endif
 
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
@@ -144,15 +143,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	unsigned nflush = tbin->ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
-#ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
-	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	if (config_opt_safety_checks) {
+		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
+		    nflush, item_extent);
+	} else {
+		for (unsigned i = 0 ; i < nflush; i++) {
+			item_extent[i] = iealloc(tsd_tsdn(tsd),
+			    *(tbin->avail - 1 - i));
+		}
 	}
-#else
-	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_extent);
-#endif
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
@@ -282,8 +282,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena_stats_large_nrequests_add(tsd_tsdn(tsd),
-				    &tcache_arena->stats, binind,
+				arena_stats_large_flush_nrequests_add(
+				    tsd_tsdn(tsd), &tcache_arena->stats, binind,
 				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
@@ -324,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+		arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd),
 		    &tcache_arena->stats, binind, tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
@@ -615,7 +615,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	for (; i < nhbins; i++) {
 		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
+		arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
 		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
diff --git a/src/tsd.c b/src/tsd.c
index d5fb4d6f..a31f6b96 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -17,11 +17,11 @@ JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
-__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
+JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
+JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
 bool tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
-__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
 bool tsd_booted = false;
 #elif (defined(_WIN32))
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 2c2726de..d992260a 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -85,9 +85,10 @@ TEST_BEGIN(test_alignment_and_size) {
 	    alignment <= MAXALIGN;
 	    alignment <<= 1) {
 		total = 0;
-		for (size = 1;
+		for (size = 0;
 		    size < 3 * alignment && size < (1U << 31);
-		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		    size += ((size == 0) ? 1 :
+		    (alignment >> (LG_SIZEOF_PTR-1)) - 1)) {
 			for (i = 0; i < NITER; i++) {
 				err = posix_memalign(&ps[i],
 				    alignment, size);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 96b042dd..b182f31a 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -279,8 +279,11 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_dalloc) {
 		return true;
 	}
-	pages_unmap(addr, size);
 	did_dalloc = true;
+	if (!maps_coalesce && opt_retain) {
+		return true;
+	}
+	pages_unmap(addr, size);
 	return false;
 }
 
@@ -304,7 +307,9 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned nptrs;
 
 	extent_hooks_prep();
-	try_decommit = false;
+	if (maps_coalesce) {
+		try_decommit = false;
+	}
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_unmap, sizeof(extent_hooks_t));
 
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
new file mode 100644
index 00000000..97e55f0f
--- /dev/null
+++ b/test/unit/extent_util.c
@@ -0,0 +1,269 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
+	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
+	assert_zu_eq(out_sz, out_sz_ref,				\
+	    "Output size touched when given invalid arguments");	\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content touched when given invalid arguments");	\
+} while (0)
+
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
+
+#define TEST_UTIL_VALID(node) do {					\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
+	    out, &out_sz, in, in_sz), 0,				\
+	    "Should return 0 on correct arguments");			\
+        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content should be changed");			\
+} while (0)
+
+#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
+
+#define TEST_MAX_SIZE (1 << 20)
+
+TEST_BEGIN(test_query) {
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void *p = mallocx(sz, 0);
+		void **in = &p;
+		size_t in_sz = sizeof(const void *);
+		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
+		void *out = mallocx(out_sz, 0);
+		void *out_ref = mallocx(out_sz, 0);
+		size_t out_sz_ref = out_sz;
+
+		assert_ptr_not_null(p,
+		    "test pointer allocation failed");
+		assert_ptr_not_null(out,
+		    "test output allocation failed");
+		assert_ptr_not_null(out_ref,
+		    "test reference output allocation failed");
+
+#define SLABCUR_READ(out) (*(void **)out)
+#define COUNTS(out) ((size_t *)((void **)out + 1))
+#define NFREE_READ(out) COUNTS(out)[0]
+#define NREGS_READ(out) COUNTS(out)[1]
+#define SIZE_READ(out) COUNTS(out)[2]
+#define BIN_NFREE_READ(out) COUNTS(out)[3]
+#define BIN_NREGS_READ(out) COUNTS(out)[4]
+
+		SLABCUR_READ(out) = NULL;
+		NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
+		BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
+		memcpy(out_ref, out, out_sz);
+
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid newlen");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid *oldlenp");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
+
+		/* Examine output for valid call */
+		TEST_UTIL_VALID("query");
+		assert_zu_le(sz, SIZE_READ(out),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out), 0,
+			    "Extent region count must be positive");
+			assert_ptr_not_null(SLABCUR_READ(out),
+			    "Current slab is null");
+			assert_true(NFREE_READ(out) == 0
+			    || SLABCUR_READ(out) <= p,
+			    "Allocation should follow first fit principle");
+			if (config_stats) {
+				assert_zu_le(BIN_NFREE_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Bin free count exceeded region count");
+				assert_zu_ne(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be positive");
+				assert_zu_le(NFREE_READ(out),
+				    BIN_NFREE_READ(out),
+				    "Extent free count exceeded bin free count");
+				assert_zu_le(NREGS_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Extent region count exceeded "
+				    "bin region count");
+				assert_zu_eq(BIN_NREGS_READ(out)
+				    % NREGS_READ(out), 0,
+				    "Bin region count isn't a multiple of "
+				    "extent region count");
+				assert_zu_le(
+				    BIN_NFREE_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - NREGS_READ(out),
+				    "Free count in other extents in the bin "
+				    "exceeded region count in other extents "
+				    "in the bin");
+				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
+				    "Extent utilized count exceeded "
+				    "bin utilized count");
+			}
+		} else {
+			assert_zu_eq(NFREE_READ(out), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out), 1,
+			    "Extent region count should be one");
+			assert_ptr_null(SLABCUR_READ(out),
+			    "Current slab must be null for large size classes");
+			if (config_stats) {
+				assert_zu_eq(BIN_NFREE_READ(out), 0,
+				    "Bin free count must be zero for "
+				    "large sizes");
+				assert_zu_eq(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be zero for "
+				    "large sizes");
+			}
+		}
+
+#undef BIN_NREGS_READ
+#undef BIN_NFREE_READ
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+#undef COUNTS
+#undef SLABCUR_READ
+
+		free(out_ref);
+		free(out);
+		free(p);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_batch) {
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void *p = mallocx(sz, 0);
+		void *q = mallocx(sz, 0);
+		void *in[] = {p, q};
+		size_t in_sz = sizeof(const void *) * 2;
+		size_t out[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz = sizeof(size_t) * 6;
+		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz_ref = out_sz;
+
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(q, "test pointer allocation failed");
+
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "newlen is not an exact multiple");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp is not an exact multiple");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
+		in_sz -= sizeof(const void *);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp and newlen do not match");
+		in_sz += sizeof(const void *);
+
+	/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message) \
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+
+#define NFREE_READ(out, i) out[(i) * 3]
+#define NREGS_READ(out, i) out[(i) * 3 + 1]
+#define SIZE_READ(out, i) out[(i) * 3 + 2]
+
+		out_sz_ref = out_sz /= 2;
+		in_sz /= 2;
+		TEST_UTIL_BATCH_VALID;
+		assert_zu_le(sz, SIZE_READ(out, 0),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out, 0), 0,
+			    "Extent region count must be positive");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out, 0), 1,
+			    "Extent region count should be one");
+		}
+		TEST_EQUAL_REF(1,
+		    "Should not overwrite content beyond what's needed");
+		in_sz *= 2;
+		out_sz_ref = out_sz *= 2;
+
+		memcpy(out_ref, out, 3 * sizeof(size_t));
+		TEST_UTIL_BATCH_VALID;
+		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+			    "Extent free count exceeded region count");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+		}
+		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+		    "Extent region count should be same for same region size");
+		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+		    "Extent size should be same for same region size");
+
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+
+#undef TEST_EQUAL_REF
+
+		free(q);
+		free(p);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	    "Test case cannot cover large classes");
+	return test(test_query, test_batch);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 498f9e06..3a75ac04 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -159,6 +159,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 6a3464b4..92fbd7ce 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -125,12 +125,14 @@ TEST_BEGIN(test_prof_log_many_traces) {
 		assert_rep();
 	}
 	/*
-	 * There should be 8 total backtraces: two for malloc/free in f1(),
-	 * two for malloc/free in f2(), two for malloc/free in f3(), and then
-	 * two for malloc/free in f1()'s call to f3().
+	 * There should be 8 total backtraces: two for malloc/free in f1(), two
+	 * for malloc/free in f2(), two for malloc/free in f3(), and then two
+	 * for malloc/free in f1()'s call to f3().  However compiler
+	 * optimizations such as loop unrolling might generate more call sites.
+	 * So >= 8 traces are expected.
 	 */
-	assert_zu_eq(prof_log_bt_count(), 8,
-	    "Wrong number of backtraces given sample workload");
+	assert_zu_ge(prof_log_bt_count(), 8,
+	    "Expect at least 8 backtraces given sample workload");
 	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index d51a5981..7993fd3d 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -107,6 +107,9 @@ TEST_BEGIN(test_retained) {
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
 	unsigned nthreads = ncpus * 2;
+	if (LG_SIZEOF_PTR < 3 && nthreads > 16) {
+		nthreads = 16; /* 32-bit platform could run out of vaddr. */
+	}
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {
 		thd_create(&threads[i], thd_start, NULL);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b017bc03..90adca13 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -87,9 +87,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
 	extent_init(&extent_a, NULL, NULL, SC_LARGE_MINCLASS, false,
 	    sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -167,7 +167,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
new file mode 100644
index 00000000..bf4bd86d
--- /dev/null
+++ b/test/unit/safety_check.c
@@ -0,0 +1,156 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+/*
+ * Note that we get called through safety_check.sh, which turns on sampling for
+ * everything.
+ */
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+TEST_BEGIN(test_malloc_free_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	free(ptr);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_mallocx_dallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = mallocx(128, 0);
+	ptr[128] = 0;
+	dallocx(ptr, 0);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_malloc_sdallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	sdallocx(ptr, 128, 0);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_realloc_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	ptr = realloc(ptr, 129);
+	safety_check_set_abort(NULL);
+	free(ptr);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_rallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	ptr = rallocx(ptr, 129, 0);
+	safety_check_set_abort(NULL);
+	free(ptr);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_xallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	size_t result = xallocx(ptr, 129, 0, 0);
+	assert_zu_eq(result, 128, "");
+	free(ptr);
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_realloc_no_overflow) {
+	char* ptr = malloc(128);
+	ptr = realloc(ptr, 256);
+	ptr[128] = 0;
+	ptr[255] = 0;
+	free(ptr);
+
+	ptr = malloc(128);
+	ptr = realloc(ptr, 64);
+	ptr[63] = 0;
+	ptr[0] = 0;
+	free(ptr);
+}
+TEST_END
+
+TEST_BEGIN(test_rallocx_no_overflow) {
+	char* ptr = malloc(128);
+	ptr = rallocx(ptr, 256, 0);
+	ptr[128] = 0;
+	ptr[255] = 0;
+	free(ptr);
+
+	ptr = malloc(128);
+	ptr = rallocx(ptr, 64, 0);
+	ptr[63] = 0;
+	ptr[0] = 0;
+	free(ptr);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_malloc_free_overflow,
+	    test_mallocx_dallocx_overflow,
+	    test_malloc_sdallocx_overflow,
+	    test_realloc_overflow,
+	    test_rallocx_overflow,
+	    test_xallocx_overflow,
+	    test_realloc_no_overflow,
+	    test_rallocx_no_overflow);
+}
diff --git a/test/unit/safety_check.sh b/test/unit/safety_check.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/safety_check.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
diff --git a/test/unit/slab.c b/test/unit/slab.c
index ef718821..c56af25f 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -9,7 +9,8 @@ TEST_BEGIN(test_arena_slab_regind) {
 		const bin_info_t *bin_info = &bin_infos[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, true);
+		    binind, 0, extent_state_active, false, true, true,
+		    EXTENT_NOT_HEAD);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 4323bfa3..646768e8 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -228,7 +228,7 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
 
 TEST_BEGIN(test_stats_arenas_bins) {
 	void *p;
-	size_t sz, curslabs, curregs;
+	size_t sz, curslabs, curregs, nonfull_slabs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
 	int expected = config_stats ? 0 : ENOENT;
@@ -289,6 +289,9 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	gen_mallctl_str(cmd, "curslabs", arena_ind);
 	assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nonfull_slabs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -309,6 +312,8 @@ TEST_BEGIN(test_stats_arenas_bins) {
 		    "At least one slab should have been allocated");
 		assert_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
+		assert_zu_eq(nonfull_slabs, 0,
+		    "slabs_nonfull should be empty");
 	}
 
 	dallocx(p, 0);