From 994da4232621dd1210fcf39bdf0d6454cefda473 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 28 Feb 2016 15:20:40 -0800
Subject: [PATCH 01/82] Update copyright dates for 2016.

---
 COPYING | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/COPYING b/COPYING
index 611968cd..104b1f8b 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

From 86478b29989075cfe7dcf5f0c104bac3fa584a17 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 26 Feb 2016 17:53:13 -0800
Subject: [PATCH 02/82] Remove errno overrides.

---
 include/msvc_compat/windows_extra.h | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 0c5e323f..3008faa3 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -1,26 +1,6 @@
 #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
 #define	MSVC_COMPAT_WINDOWS_EXTRA_H
 
-#ifndef ENOENT
-#  define ENOENT ERROR_PATH_NOT_FOUND
-#endif
-#ifndef EINVAL
-#  define EINVAL ERROR_BAD_ARGUMENTS
-#endif
-#ifndef EAGAIN
-#  define EAGAIN ERROR_OUTOFMEMORY
-#endif
-#ifndef EPERM
-#  define EPERM  ERROR_WRITE_FAULT
-#endif
-#ifndef EFAULT
-#  define EFAULT ERROR_INVALID_ADDRESS
-#endif
-#ifndef ENOMEM
-#  define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-#endif
-#ifndef ERANGE
-#  define ERANGE ERROR_INVALID_DATA
-#endif
+#include <errno.h>
 
 #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */

From 0e1d5c25c677064ed81e3ec0f88b52f835557171 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Mon, 29 Feb 2016 21:04:29 +0100
Subject: [PATCH 03/82] Fix MSVC project and improve MSVC lib naming (v140 ->
 vc140)

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |  8 ++++++--
 .../vc2015/jemalloc/jemalloc.vcxproj.filters   | 18 +++++++++++++++---
 .../vc2015/test_threads/test_threads.vcxproj   |  4 ++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d8ad505b..f3f0260b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
@@ -69,6 +70,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
@@ -103,11 +105,13 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
   </ItemGroup>
@@ -227,7 +231,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
@@ -236,7 +240,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 89a51f76..ce70632b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -101,6 +101,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -146,6 +149,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -214,9 +220,15 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prng.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -232,14 +244,14 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\nstime.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
index b681e71e..f5e9898f 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -223,7 +223,7 @@
     <Link>
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
@@ -306,7 +306,7 @@
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>

From 33184bf69813087bf1885b0993685f9d03320c69 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 29 Feb 2016 14:30:19 -0800
Subject: [PATCH 04/82] Fix stack corruption and uninitialized var warning

Stack corruption happens in x64 bit

This resolves #347.
---
 src/arena.c      |  2 +-
 test/unit/hash.c | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 99e20fde..965c0fe2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2423,7 +2423,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	uintptr_t random_offset;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
-	UNUSED bool idump;
+	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	/* Large allocation. */
 	usize = index2size(binind);
diff --git a/test/unit/hash.c b/test/unit/hash.c
index f50ba81b..010c9d76 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -64,14 +64,15 @@ static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 {
 	const int hashbytes = hash_variant_bits(variant) / 8;
-	VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
+	const int hashes_size = hashbytes * 256;
+	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
 	VARIABLE_ARRAY(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 
 	memset(key, 0, KEY_SIZE);
-	memset(hashes, 0, sizeof(hashes));
-	memset(final, 0, sizeof(final));
+	memset(hashes, 0, hashes_size);
+	memset(final, 0, hashbytes);
 
 	/*
 	 * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the
@@ -102,17 +103,17 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 	/* Hash the result array. */
 	switch (variant) {
 	case hash_variant_x86_32: {
-		uint32_t out = hash_x86_32(hashes, hashbytes*256, 0);
+		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
 	} case hash_variant_x86_128: {
 		uint64_t out[2];
-		hash_x86_128(hashes, hashbytes*256, 0, out);
+		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} case hash_variant_x64_128: {
 		uint64_t out[2];
-		hash_x64_128(hashes, hashbytes*256, 0, out);
+		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} default: not_reached();

From 022f6891faf1fffa435f2bc613c25e8482a32702 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Mar 2016 22:41:32 -0800
Subject: [PATCH 05/82] Avoid a potential innocuous compiler warning.

Add a cast to avoid comparing a ssize_t value to a uint64_t value that
is always larger than a 32-bit ssize_t.  This silences an innocuous
compiler warning from e.g. gcc 4.2.1 about the comparison always having
the same result.
---
 include/jemalloc/internal/nstime.h | 2 +-
 src/arena.c                        | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index bd04f04b..dcb4b47f 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -7,7 +7,7 @@
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	18446744072
+#define	NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index 965c0fe2..f436959e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1352,7 +1352,11 @@ static bool
 arena_decay_time_valid(ssize_t decay_time)
 {
 
-	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
+	if (decay_time < -1)
+		return (false);
+	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
+		return (true);
+	return (false);
 }
 
 ssize_t

From e3998c681dec35fe0de25f693a39de6fb881134e Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 7 Mar 2016 17:55:55 -0800
Subject: [PATCH 06/82] Replace contributor name with github account.

---
 ChangeLog | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9cbfbf96..69f4dbb0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -59,14 +59,14 @@ brevity.  Much more detail can be found in the git revision history:
   Bug fixes:
   - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
   - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone, Christopher Ferris)
+    (@jasone, @cferris1000)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix a potential tsd cleanup leak.  (@cferris1000, @jasone)
   - Fix run quantization.  In practice this bug had no impact unless
     applications requested memory with alignment exceeding one page.
     (@jasone, @djwatson)

From 6bafa6678fc36483e638f1c3a0a9bf79fb89bfc9 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:22:52 -0800
Subject: [PATCH 07/82] Pairing heap

Initial implementation of a twopass pairing heap with aux list.
Research papers linked in comments.

Where search/nsearch/last aren't needed, this gives much faster first(),
delete(), and insert().  Insert is O(1), and first/delete don't have to
walk the whole tree.

Also tested rb_old with parent pointers - it was better than the current
rb.h for memory loads, but still much worse than a pairing heap.

An array-based heap would be much faster if everything fits in memory,
but on a cold cache it has many more memory loads for most operations.
---
 Makefile.in                                   |   1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   4 +
 include/jemalloc/internal/ph.h                | 255 ++++++++++++++++++
 include/jemalloc/internal/private_symbols.txt |   8 +
 src/ph.c                                      |   2 +
 5 files changed, 270 insertions(+)
 create mode 100644 include/jemalloc/internal/ph.h
 create mode 100644 src/ph.c

diff --git a/Makefile.in b/Makefile.in
index f60823f5..3d725be1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,6 +95,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/ph.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3f54391f..d3b94c00 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -371,6 +371,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
@@ -401,6 +402,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/ph.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
@@ -494,6 +496,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -525,6 +528,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
new file mode 100644
index 00000000..aeca693e
--- /dev/null
+++ b/include/jemalloc/internal/ph.h
@@ -0,0 +1,255 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary list, described in a follow on paper
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ * Where search/nsearch/last are not needed, ph.h outperforms rb.h by ~7x fewer
+ * cpu cycles, and ~4x fewer memory references.
+ *
+ * Tagging parent/prev pointers on the next list was also described in the
+ * original paper, such that only two pointers are needed.  This is not
+ * implemented here, as it substantially increases the memory references
+ * needed when ph_remove is called, almost overshadowing the other performance
+ * gains.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ph_node_s ph_node_t;
+typedef struct ph_heap_s ph_heap_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ph_node_s {
+	ph_node_t	*subheaps;
+	ph_node_t	*parent;
+	ph_node_t	*next;
+	ph_node_t	*prev;
+};
+
+struct ph_heap_s {
+	ph_node_t	*root;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
+ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
+void	ph_merge_aux_list(ph_heap_t *l);
+void	ph_new(ph_heap_t *n);
+ph_node_t	*ph_first(ph_heap_t *l);
+void	ph_insert(ph_heap_t *l, ph_node_t *n);
+ph_node_t	*ph_remove_first(ph_heap_t *l);
+void	ph_remove(ph_heap_t *l, ph_node_t *n);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PH_C_))
+
+/* Helper routines ************************************************************/
+
+JEMALLOC_INLINE ph_node_t *
+ph_merge(ph_node_t *heap1, ph_node_t *heap2)
+{
+
+	if (heap1 == NULL)
+		return (heap2);
+	if (heap2 == NULL)
+		return (heap1);
+	/* Optional: user-settable comparison function */
+	if ((uintptr_t)heap1 < (uintptr_t)heap2) {
+		heap2->parent = heap1;
+		heap2->prev = NULL;
+		heap2->next = heap1->subheaps;
+		if (heap1->subheaps != NULL)
+			heap1->subheaps->prev = heap2;
+		heap1->subheaps = heap2;
+		return (heap1);
+	} else {
+		heap1->parent = heap2;
+		heap1->prev = NULL;
+		heap1->next = heap2->subheaps;
+		if (heap2->subheaps != NULL)
+			heap2->subheaps->prev = heap1;
+		heap2->subheaps = heap1;
+		return (heap2);
+	}
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_merge_pairs(ph_node_t *subheaps)
+{
+
+	if (subheaps == NULL)
+		return (NULL);
+	if (subheaps->next == NULL)
+		return (subheaps);
+	{
+		ph_node_t *l0 = subheaps;
+		ph_node_t *l1 = l0->next;
+		ph_node_t *lrest = l1->next;
+
+		if (lrest != NULL)
+			lrest->prev = NULL;
+		l1->next = NULL;
+		l1->prev = NULL;
+		l0->next = NULL;
+		l0->prev = NULL;
+		return (ph_merge(ph_merge(l0, l1), ph_merge_pairs(lrest)));
+	}
+}
+
+/*
+ * Merge the aux list into the root node.
+ */
+JEMALLOC_INLINE void
+ph_merge_aux_list(ph_heap_t *l)
+{
+
+	if (l->root == NULL)
+		return;
+	if (l->root->next != NULL) {
+		ph_node_t *l0 = l->root->next;
+		ph_node_t *l1 = l0->next;
+		ph_node_t *lrest = NULL;
+
+		/* Multipass merge. */
+		while (l1 != NULL) {
+			lrest = l1->next;
+			if (lrest != NULL)
+				lrest->prev = NULL;
+			l1->next = NULL;
+			l1->prev = NULL;
+			l0->next = NULL;
+			l0->prev = NULL;
+			l0 = ph_merge(l0, l1);
+			l1 = lrest;
+		}
+		l->root->next = NULL;
+		l->root = ph_merge(l->root, l0);
+	}
+}
+
+/* User API *******************************************************************/
+
+JEMALLOC_INLINE void
+ph_new(ph_heap_t *n)
+{
+
+	memset(n, 0, sizeof(ph_heap_t));
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_first(ph_heap_t *l)
+{
+
+	/*
+	 * For the cost of an extra pointer, a l->min could be stored instead of
+	 * merging the aux list here.  Current users always call ph_remove(l,
+	 * ph_first(l)) though, and the aux list must always be merged for
+	 * delete of the min node anyway.
+	 */
+	ph_merge_aux_list(l);
+	return (l->root);
+}
+
+JEMALLOC_INLINE void
+ph_insert(ph_heap_t *l, ph_node_t *n)
+{
+
+	memset(n, 0, sizeof(ph_node_t));
+
+	/*
+	 * Non-aux list insert:
+	 *
+	 * l->root = ph_merge(l->root, n);
+	 *
+	 * Aux list insert:
+	 */
+	if (l->root == NULL)
+		l->root = n;
+	else {
+		n->next = l->root->next;
+		if (l->root->next != NULL)
+			l->root->next->prev = n;
+		n->prev = l->root;
+		l->root->next = n;
+	}
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_remove_first(ph_heap_t *l)
+{
+	ph_node_t *ret;
+
+	ph_merge_aux_list(l);
+	if (l->root == NULL)
+		return (NULL);
+
+	ret = l->root;
+
+	l->root = ph_merge_pairs(l->root->subheaps);
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+ph_remove(ph_heap_t *l, ph_node_t *n)
+{
+	ph_node_t *replace;
+
+	/*
+	 * We can delete from aux list without merging it, but we need to merge
+	 * if we are dealing with the root node.
+	 */
+	if (l->root == n) {
+		ph_merge_aux_list(l);
+		if (l->root == n) {
+			ph_remove_first(l);
+			return;
+		}
+	}
+
+	/* Find a possible replacement node, and link to parent. */
+	replace = ph_merge_pairs(n->subheaps);
+	if (n->parent != NULL && n->parent->subheaps == n) {
+		if (replace != NULL)
+			n->parent->subheaps = replace;
+		else
+			n->parent->subheaps = n->next;
+	}
+	/* Set next/prev for sibling linked list. */
+	if (replace != NULL) {
+		replace->parent = n->parent;
+		replace->prev = n->prev;
+		if (n->prev != NULL)
+			n->prev->next = replace;
+		replace->next = n->next;
+		if (n->next != NULL)
+			n->next->prev = replace;
+	} else {
+		if (n->prev != NULL)
+			n->prev->next = n->next;
+		if (n->next != NULL)
+			n->next->prev = n->prev;
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5880996a..2de1d5f3 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -379,6 +379,14 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
+ph_first
+ph_insert
+ph_merge
+ph_merge_aux_list
+ph_merge_pairs
+ph_new
+ph_remove_first
+ph_remove
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
diff --git a/src/ph.c b/src/ph.c
new file mode 100644
index 00000000..051a20d7
--- /dev/null
+++ b/src/ph.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_PH_C_
+#include "jemalloc/internal/jemalloc_internal.h"

From 34dca5671fec8c592f1ca80ce11dc808cf6b83ed Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:30:34 -0800
Subject: [PATCH 08/82] Unittest for pairing heap

---
 Makefile.in    |  1 +
 test/unit/ph.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 test/unit/ph.c

diff --git a/Makefile.in b/Makefile.in
index 3d725be1..7f2d668a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -148,6 +148,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
diff --git a/test/unit/ph.c b/test/unit/ph.c
new file mode 100644
index 00000000..b0e44028
--- /dev/null
+++ b/test/unit/ph.c
@@ -0,0 +1,92 @@
+#include "test/jemalloc_test.h"
+
+typedef struct node_s node_t;
+
+struct node_s {
+	ph_node_t link;
+};
+
+TEST_BEGIN(test_ph_empty)
+{
+	ph_heap_t heap;
+
+	ph_new(&heap);
+
+	assert_ptr_null(ph_first(&heap), "Unexpected node");
+}
+TEST_END
+
+TEST_BEGIN(test_ph_random)
+{
+#define	NNODES 25
+#define	NBAGS 250
+#define	SEED 42
+	sfmt_t *sfmt;
+	uint64_t bag[NNODES];
+	ph_heap_t heap;
+	node_t nodes[NNODES];
+	unsigned i, j, k;
+
+	sfmt = init_gen_rand(SEED);
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
+		for (j = 1; j <= NNODES; j++) {
+			/* Initialize heap and nodes. */
+			ph_new(&heap);
+
+			/* Insert nodes. */
+			for (k = 0; k < j; k++) {
+				ph_insert(&heap, &nodes[k].link);
+
+				assert_ptr_not_null(ph_first(&heap),
+				    "Heap should not be empty");
+			}
+
+			/* Remove nodes. */
+			switch (i % 2) {
+			case 0:
+				for (k = 0; k < j; k++)
+					ph_remove(&heap, &nodes[k].link);
+				break;
+			case 1:
+				for (k = j; k > 0; k--)
+					ph_remove(&heap, &nodes[k-1].link);
+				break;
+			default:
+				not_reached();
+			}
+
+			assert_ptr_null(ph_first(&heap),
+			    "Heap should not be empty");
+		}
+	}
+	fini_gen_rand(sfmt);
+#undef NNODES
+#undef NBAGS
+#undef SEED
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_ph_empty,
+	    test_ph_random));
+}

From f8d80d62a8765c54aaa9433148fd112f7c794734 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 8 Mar 2016 13:43:47 -0800
Subject: [PATCH 09/82] Refactor ph_merge_ordered() out of ph_merge().

---
 include/jemalloc/internal/ph.h                | 39 +++++++++++--------
 include/jemalloc/internal/private_symbols.txt |  1 +
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index aeca693e..519f0dda 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -49,6 +49,7 @@ struct ph_heap_s {
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+ph_node_t	*ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2);
 ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
 ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
 void	ph_merge_aux_list(ph_heap_t *l);
@@ -63,6 +64,23 @@ void	ph_remove(ph_heap_t *l, ph_node_t *n);
 
 /* Helper routines ************************************************************/
 
+JEMALLOC_INLINE ph_node_t *
+ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2)
+{
+
+	assert(heap1 != NULL);
+	assert(heap2 != NULL);
+	assert ((uintptr_t)heap1 <= (uintptr_t)heap2);
+
+	heap2->parent = heap1;
+	heap2->prev = NULL;
+	heap2->next = heap1->subheaps;
+	if (heap1->subheaps != NULL)
+		heap1->subheaps->prev = heap2;
+	heap1->subheaps = heap2;
+	return (heap1);
+}
+
 JEMALLOC_INLINE ph_node_t *
 ph_merge(ph_node_t *heap1, ph_node_t *heap2)
 {
@@ -72,23 +90,10 @@ ph_merge(ph_node_t *heap1, ph_node_t *heap2)
 	if (heap2 == NULL)
 		return (heap1);
 	/* Optional: user-settable comparison function */
-	if ((uintptr_t)heap1 < (uintptr_t)heap2) {
-		heap2->parent = heap1;
-		heap2->prev = NULL;
-		heap2->next = heap1->subheaps;
-		if (heap1->subheaps != NULL)
-			heap1->subheaps->prev = heap2;
-		heap1->subheaps = heap2;
-		return (heap1);
-	} else {
-		heap1->parent = heap2;
-		heap1->prev = NULL;
-		heap1->next = heap2->subheaps;
-		if (heap2->subheaps != NULL)
-			heap2->subheaps->prev = heap1;
-		heap2->subheaps = heap1;
-		return (heap2);
-	}
+	if ((uintptr_t)heap1 < (uintptr_t)heap2)
+		return (ph_merge_ordered(heap1, heap2));
+	else
+		return (ph_merge_ordered(heap2, heap1));
 }
 
 JEMALLOC_INLINE ph_node_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2de1d5f3..aeb43b1d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -383,6 +383,7 @@ ph_first
 ph_insert
 ph_merge
 ph_merge_aux_list
+ph_merge_ordered
 ph_merge_pairs
 ph_new
 ph_remove_first

From 4a0dbb5ac844830ebd7f89af20203a574ce1b3da Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:54:42 -0800
Subject: [PATCH 10/82] Use pairing heap for arena->runs_avail

Use pairing heap instead of red black tree in arena runs_avail.  The
extra links are unioned with the bitmap_t, so this change doesn't use
any extra memory.

Canaries show this change to be a 1% cpu win, and 2% latency win.  In
particular, large free()s, and small bin frees are now O(1) (barring
coalescing).

I also tested changing bin->runs to be a pairing heap, but saw a much
smaller win, and it would mean increasing the size of arena_run_s by two
pointers, so I left that as an rb-tree for now.
---
 include/jemalloc/internal/arena.h             | 29 +++++++++++++++----
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 28 +++++++++---------
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3519873c..babd5129 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -36,6 +36,7 @@ typedef enum {
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
+typedef struct arena_avail_links_s arena_avail_links_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
@@ -146,6 +147,11 @@ struct arena_runs_dirty_link_s {
 	qr(arena_runs_dirty_link_t)	rd_link;
 };
 
+struct arena_avail_links_s {
+	arena_runs_dirty_link_t		rd;
+	ph_node_t			ph_link;
+};
+
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
@@ -163,7 +169,7 @@ struct arena_chunk_map_misc_s {
 
 	union {
 		/* Linkage for list of dirty runs. */
-		arena_runs_dirty_link_t		rd;
+		arena_avail_links_t		avail;
 
 		/* Profile counters, used for large object runs. */
 		union {
@@ -457,10 +463,10 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered trees of this arena's available runs.  The
-	 * trees are used for first-best-fit run allocation.
+	 * Quantized address-ordered heaps of this arena's available runs.  The
+	 * heaps are used for first-best-fit run allocation.
 	 */
-	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+	ph_heap_t		runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -600,6 +606,7 @@ arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
+arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
 size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbitsp_read(size_t *mapbitsp);
@@ -702,7 +709,19 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
+	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
+
+	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+	return (miscelm);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_ph_to_miscelm(ph_node_t *ph)
+{
+	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
+	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index aeb43b1d..aed60cb1 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -79,6 +79,7 @@ arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
 arena_palloc
+arena_ph_to_miscelm
 arena_postfork_child
 arena_postfork_parent
 arena_prefork
diff --git a/src/arena.c b/src/arena.c
index f436959e..fc9852df 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,7 +199,7 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-static arena_run_tree_t *
+static ph_heap_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -217,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	ph_insert(arena_runs_avail_get(arena, ind),
+	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
 }
 
 static void
@@ -229,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	ph_remove(arena_runs_avail_get(arena, ind),
+	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
 }
 
 static void
@@ -245,8 +245,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_new(&miscelm->rd, rd_link);
-	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
+	qr_new(&miscelm->avail.rd, rd_link);
+	qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
 	arena->ndirty += npages;
 }
 
@@ -262,7 +262,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_remove(&miscelm->rd, rd_link);
+	qr_remove(&miscelm->avail.rd, rd_link);
 	assert(arena->ndirty >= npages);
 	arena->ndirty -= npages;
 }
@@ -1079,10 +1079,12 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
-		    arena_runs_avail_get(arena, i));
-		if (miscelm != NULL)
+		ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
+		if (node != NULL) {
+			arena_chunk_map_misc_t *miscelm =
+			    arena_ph_to_miscelm(node);
 			return (&miscelm->run);
+		}
 	}
 
 	return (NULL);
@@ -3323,7 +3325,7 @@ arena_new(unsigned ind)
 	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
 	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@@ -3383,7 +3385,7 @@ arena_new(unsigned ind)
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		arena_run_tree_new(&arena->runs_avail[i]);
+		ph_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 

From 613cdc80f6b61f698b3b0c3f2d22442044473f9b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 8 Mar 2016 01:04:48 -0800
Subject: [PATCH 11/82] Convert arena_bin_t's runs from a tree to a heap.

---
 include/jemalloc/internal/arena.h | 22 +++++---------
 src/arena.c                       | 50 ++++++++++---------------------
 2 files changed, 23 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index babd5129..c08a742f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -147,11 +147,6 @@ struct arena_runs_dirty_link_s {
 	qr(arena_runs_dirty_link_t)	rd_link;
 };
 
-struct arena_avail_links_s {
-	arena_runs_dirty_link_t		rd;
-	ph_node_t			ph_link;
-};
-
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
@@ -159,17 +154,17 @@ struct arena_avail_links_s {
  */
 struct arena_chunk_map_misc_s {
 	/*
-	 * Linkage for run trees.  There are two disjoint uses:
+	 * Linkage for run heaps.  There are two disjoint uses:
 	 *
-	 * 1) arena_t's runs_avail tree.
+	 * 1) arena_t's runs_avail heaps.
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	rb_node(arena_chunk_map_misc_t)		rb_link;
+	ph_node_t				ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
-		arena_avail_links_t		avail;
+		arena_runs_dirty_link_t		rd;
 
 		/* Profile counters, used for large object runs. */
 		union {
@@ -181,7 +176,6 @@ struct arena_chunk_map_misc_s {
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
@@ -278,13 +272,13 @@ struct arena_bin_s {
 	arena_run_t		*runcur;
 
 	/*
-	 * Tree of non-full runs.  This tree is used when looking for an
+	 * Heap of non-full runs.  This heap is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t	runs;
+	ph_heap_t		runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -709,7 +703,7 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
+	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
@@ -721,7 +715,7 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_ph_to_miscelm(ph_node_t *ph)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
-	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
+	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, ph_link));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
diff --git a/src/arena.c b/src/arena.c
index fc9852df..0d232ff8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -59,23 +59,6 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 	return (arena_mapbits_size_decode(mapbits));
 }
 
-JEMALLOC_INLINE_C int
-arena_run_addr_comp(const arena_chunk_map_misc_t *a,
-    const arena_chunk_map_misc_t *b)
-{
-	uintptr_t a_miscelm = (uintptr_t)a;
-	uintptr_t b_miscelm = (uintptr_t)b;
-
-	assert(a != NULL);
-	assert(b != NULL);
-
-	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
-}
-
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_addr_comp)
-
 static size_t
 run_quantize_floor_compute(size_t size)
 {
@@ -218,7 +201,7 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
+	    &arena_miscelm_get(chunk, pageind)->ph_link);
 }
 
 static void
@@ -230,7 +213,7 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
+	    &arena_miscelm_get(chunk, pageind)->ph_link);
 }
 
 static void
@@ -245,8 +228,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_new(&miscelm->avail.rd, rd_link);
-	qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
+	qr_new(&miscelm->rd, rd_link);
+	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
 	arena->ndirty += npages;
 }
 
@@ -262,7 +245,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_remove(&miscelm->avail.rd, rd_link);
+	qr_remove(&miscelm->rd, rd_link);
 	assert(arena->ndirty >= npages);
 	arena->ndirty -= npages;
 }
@@ -2069,11 +2052,14 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 static arena_run_t *
 arena_bin_runs_first(arena_bin_t *bin)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
-	if (miscelm != NULL)
-		return (&miscelm->run);
+	ph_node_t *node;
+	arena_chunk_map_misc_t *miscelm;
 
-	return (NULL);
+	node = ph_first(&bin->runs);
+	if (node == NULL)
+		return (NULL);
+	miscelm = arena_ph_to_miscelm(node);
+	return (&miscelm->run);
 }
 
 static void
@@ -2081,9 +2067,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
-
-	arena_run_tree_insert(&bin->runs, miscelm);
+	ph_insert(&bin->runs, &miscelm->ph_link);
 }
 
 static void
@@ -2091,9 +2075,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
-
-	arena_run_tree_remove(&bin->runs, miscelm);
+	ph_remove(&bin->runs, &miscelm->ph_link);
 }
 
 static arena_run_t *
@@ -2676,8 +2658,6 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 {
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) ==
-	    NULL);
 
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
@@ -3414,7 +3394,7 @@ arena_new(unsigned ind)
 		if (malloc_mutex_init(&bin->lock))
 			return (NULL);
 		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
+		ph_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}

From ca18f2834e17f31551f871cf4ca487aa9249614e Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rk_misra@hotmail.com>
Date: Thu, 10 Mar 2016 22:49:05 -0800
Subject: [PATCH 12/82] typecast address to pointer to byte to avoid unaligned
 memory access error

---
 include/jemalloc/internal/hash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 864fda81..1ff2d9a0 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -53,7 +53,7 @@ hash_get_block_32(const uint32_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
 		uint32_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint32_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
 		return (ret);
 	}
 
@@ -68,7 +68,7 @@ hash_get_block_64(const uint64_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
 		uint64_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint64_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
 		return (ret);
 	}
 

From 824b947be08e87e0c317f585c250731897c2aa2c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 11 Mar 2016 10:11:56 -0800
Subject: [PATCH 13/82] Add (size_t) casts to MALLOCX_ALIGN().

Add (size_t) casts to MALLOCX_ALIGN() macros so that passing the integer
constant 0x80000000 does not cause a compiler warning about invalid
shift amount.

This resolves #354.
---
 include/jemalloc/jemalloc_macros.h.in |  8 ++++----
 test/integration/mallocx.c            | 23 ++++++++++-------------
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 9f356f98..129240ed 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -13,11 +13,11 @@
 
 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
+#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
-       ffs((int)((a)>>32))+31))
+       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+       ffs((int)(((size_t)(a))>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
@@ -29,7 +29,7 @@
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((int)(((a)+1) << 20))
+#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 42eee105..d82bf422 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -69,18 +69,14 @@ TEST_END
 
 TEST_BEGIN(test_oom)
 {
-	size_t hugemax, size, alignment;
-
-	hugemax = get_huge_size(get_nhuge()-1);
 
 	/*
 	 * It should be impossible to allocate two objects that each consume
 	 * more than half the virtual address space.
 	 */
 	{
-		void *p;
-
-		p = mallocx(hugemax, 0);
+		size_t hugemax = get_huge_size(get_nhuge()-1);
+		void *p = mallocx(hugemax, 0);
 		if (p != NULL) {
 			assert_ptr_null(mallocx(hugemax, 0),
 			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
@@ -89,15 +85,16 @@ TEST_BEGIN(test_oom)
 	}
 
 #if LG_SIZEOF_PTR == 3
-	size      = ZU(0x8000000000000000);
-	alignment = ZU(0x8000000000000000);
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	    "Expected OOM for mallocx()");
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x80000000)),
+	    "Expected OOM for mallocx()");
 #else
-	size      = ZU(0x80000000);
-	alignment = ZU(0x80000000);
+	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	    "Expected OOM for mallocx()");
 #endif
-	assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
-	    "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
-	    alignment);
 }
 TEST_END
 

From 434ea64b267e5e9e16a66ab1cccf9fab34302ff5 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 14 Mar 2016 20:19:11 -0700
Subject: [PATCH 14/82] Add --with-version.

Also avoid deleting the VERSION file while trying to (re)generate it.

This resolves #305.
---
 INSTALL      |  4 ++++
 configure.ac | 49 +++++++++++++++++++++++++++++--------------------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/INSTALL b/INSTALL
index 5c25054a..68787165 100644
--- a/INSTALL
+++ b/INSTALL
@@ -35,6 +35,10 @@ any of the following arguments (not a definitive list) to 'configure':
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
+--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+    Use the specified version string rather than trying to generate one (if in
+    a git repository) or use existing the VERSION file (if present).
+
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
diff --git a/configure.ac b/configure.ac
index eb387ed9..3082916b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1172,27 +1172,36 @@ dnl ============================================================================
 dnl jemalloc configuration.
 dnl 
 
-dnl Set VERSION if source directory is inside a git repository.
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-  dnl Pattern globs aren't powerful enough to match both single- and
-  dnl double-digit version numbers, so iterate over patterns to support up to
-  dnl version 99.99.99 without any accidental matches.
-  rm -f "${objroot}VERSION"
-  for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(

From 22af74e10615ce6b6898ae38a378af27757f9e16 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 15 Mar 2016 09:35:14 -0700
Subject: [PATCH 15/82] Refactor out signed/unsigned comparisons.

---
 include/jemalloc/internal/util.h |  4 ++--
 src/util.c                       | 11 ++++-------
 test/src/timer.c                 |  5 ++---
 test/unit/bitmap.c               |  4 ++--
 test/unit/util.c                 |  8 ++++----
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b8885bfa..6e214702 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -106,9 +106,9 @@ void	malloc_write(const char *s);
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-int	malloc_vsnprintf(char *str, size_t size, const char *format,
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
-int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
diff --git a/src/util.c b/src/util.c
index 02673c70..982a2e31 100644
--- a/src/util.c
+++ b/src/util.c
@@ -314,10 +314,9 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
 	return (s);
 }
 
-int
+size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 {
-	int ret;
 	size_t i;
 	const char *f;
 
@@ -585,21 +584,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 		str[i] = '\0';
 	else
 		str[size - 1] = '\0';
-	assert(i < INT_MAX);
-	ret = (int)i;
 
 #undef APPEND_C
 #undef APPEND_S
 #undef APPEND_PADDED_S
 #undef GET_ARG_NUMERIC
-	return (ret);
+	return (i);
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
-int
+size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...)
 {
-	int ret;
+	size_t ret;
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/test/src/timer.c b/test/src/timer.c
index e91b3cf2..3c7e63a2 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -32,9 +32,8 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	unsigned i = 0;
-	unsigned j;
-	int n;
+	size_t i = 0;
+	size_t j, n;
 
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 1ab0bb8e..a2dd5463 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -101,7 +101,7 @@ TEST_BEGIN(test_bitmap_sfu)
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, i);
 		{
-			ssize_t j;
+			size_t j;
 			bitmap_t *bitmap = (bitmap_t *)malloc(
 			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
@@ -119,7 +119,7 @@ TEST_BEGIN(test_bitmap_sfu)
 			 * Iteratively unset bits starting at the end, and
 			 * verify that bitmap_sfu() reaches the unset bits.
 			 */
-			for (j = i - 1; j >= 0; j--) {
+			for (j = i - 1; j < i; j--) { /* (i..0] */
 				bitmap_unset(bitmap, &binfo, j);
 				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
 				    "First unset bit should the bit previously "
diff --git a/test/unit/util.c b/test/unit/util.c
index 2f65aad2..d24c1c79 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -160,14 +160,14 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
 {
 #define	BUFLEN	15
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 	size_t len;
 #define TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
 	    buf, expected_str_untruncated);		\
-	assert_d_eq(result, strlen(expected_str_untruncated),		\
+	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
@@ -193,11 +193,11 @@ TEST_BEGIN(test_malloc_snprintf)
 {
 #define	BUFLEN	128
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 #define	TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
 	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_d_eq(result, strlen(expected_str), "Unexpected result");	\
+	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");

From 18903c592fdbf2384b59051bd251d234e84647af Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Mon, 14 Mar 2016 21:44:32 -0700
Subject: [PATCH 16/82] Enable -Wsign-compare warnings.

---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 3082916b..275576bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -141,6 +141,7 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
     JE_CFLAGS_APPEND([-Wshorten-64-to-32])
+    JE_CFLAGS_APPEND([-Wsign-compare])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then

From 6c460ad91bf349ebac3b23e58d97769a982110fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 22 Mar 2016 17:54:35 -0700
Subject: [PATCH 17/82] Optimize rtree_get().

Specialize fast path to avoid code that cannot execute for dependent
loads.

Manually unroll.
---
 include/jemalloc/internal/rtree.h | 166 +++++++++++++++++++++++-------
 src/rtree.c                       |   2 +
 src/util.c                        |   1 +
 3 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 28ae9d1d..3f8db3ad 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
 #define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
-#define	RTREE_HEIGHT_MAX						\
-    ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+/*
+ * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
+ * conditionals.  The following defininitions are precomputed equivalents to:
+ *
+ *  #define	RTREE_HEIGHT_MAX					\
+ *      ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+ */
+#if LG_RTREE_BITS_PER_LEVEL == 2
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	16
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	8
+#  endif
+#elif LG_RTREE_BITS_PER_LEVEL == 3
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	8
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	4
+#  endif
+#elif LG_RTREE_BITS_PER_LEVEL == 4
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	4
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	2
+#  endif
+#else
+#  error Unsupported LG_RTREE_BITS_PER_LEVEL
+#endif
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
@@ -111,15 +137,18 @@ unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
+    bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
-    unsigned level);
+    unsigned level, bool dependent);
 extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
+rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
+    bool dependent);
+rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+    bool dependent);
 
 extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
@@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm)
+rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
 
 	/* Double-checked read (first read may be stale. */
 	child = elm->child;
-	if (!rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+    bool dependent)
 {
 	rtree_node_elm_t *child;
 
-	child = rtree_child_tryread(elm);
-	if (unlikely(!rtree_node_valid(child)))
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child)))
 		child = rtree_child_read_hard(rtree, elm, level);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
@@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale. */
 	subtree = rtree->levels[level].subtree;
-	if (!rtree_node_valid(subtree))
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level)
+rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
-	subtree = rtree_subtree_tryread(rtree, level);
-	if (unlikely(!rtree_node_valid(subtree)))
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = rtree_subtree_read_hard(rtree, level);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
@@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	unsigned start_level;
+	rtree_node_elm_t *node;
 
 	start_level = rtree_start_level(rtree, key);
 
-	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
-	    /**/; i++, node = child) {
-		if (!dependent && unlikely(!rtree_node_valid(node)))
-			return (NULL);
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			return (rtree_val_read(rtree, &node[subkey],
-			    dependent));
-		}
-		assert(i < rtree->height - 1);
-		child = rtree_child_tryread(&node[subkey]);
+	node = rtree_subtree_tryread(rtree, start_level, dependent);
+#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define	RTREE_GET_SUBTREE(level)					\
+	case level:							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		node = rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */
+#define	RTREE_GET_LEAF(level)						\
+	case level:							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		return (rtree_val_read(rtree, &node[subkey],		\
+		    dependent));
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
 	}
 	not_reached();
 }
@@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_read(rtree, start_level);
+	node = rtree_subtree_read(rtree, start_level, false);
 	if (node == NULL)
 		return (true);
 	for (i = start_level; /**/; i++, node = child) {
@@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 			return (false);
 		}
 		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i);
+		child = rtree_child_read(rtree, &node[subkey], i, false);
 		if (child == NULL)
 			return (true);
 	}
diff --git a/src/rtree.c b/src/rtree.c
index af0d97e7..3166b45f 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
 {
 	unsigned bits_in_leaf, height, i;
 
+	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
+	    RTREE_BITS_PER_LEVEL));
 	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
 
 	bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
diff --git a/src/util.c b/src/util.c
index 982a2e31..581d540b 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,6 +14,7 @@
 		malloc_write("<jemalloc>: Unreachable code reached\n");	\
 		abort();						\
 	}								\
+	unreachable();							\
 } while (0)
 
 #define	not_implemented() do {						\

From 6a885198c2a27333f1fcfae5637dc2377189a3a3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:14:41 -0700
Subject: [PATCH 18/82] Always inline performance-critical rtree operations.

---
 include/jemalloc/internal/rtree.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3f8db3ad..36aa002b 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -155,7 +155,7 @@ bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_INLINE unsigned
+JEMALLOC_ALWAYS_INLINE unsigned
 rtree_start_level(rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
@@ -169,7 +169,7 @@ rtree_start_level(rtree_t *rtree, uintptr_t key)
 	return (start_level);
 }
 
-JEMALLOC_INLINE uintptr_t
+JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
 
@@ -178,14 +178,14 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 	    rtree->levels[level].bits) - 1));
 }
 
-JEMALLOC_INLINE bool
+JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_node_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
@@ -198,7 +198,7 @@ rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 	return (child);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
     bool dependent)
 {
@@ -211,7 +211,7 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
 	return (child);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
@@ -240,7 +240,7 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 	atomic_write_p(&elm->pun, val);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
@@ -253,7 +253,7 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
@@ -265,7 +265,7 @@ rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
@@ -352,6 +352,7 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 #undef RTREE_GET_LEAF
 	default: not_reached();
 	}
+#undef RTREE_GET_BIAS
 	not_reached();
 }
 

From f6bd2e5a178aed23398996f008feee5bf070a624 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 15:32:07 -0700
Subject: [PATCH 19/82] Code formatting fixes.

---
 src/tcache.c             | 3 ++-
 test/stress/microbench.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 6e32f404..c12727a6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -23,7 +23,8 @@ static tcaches_t	*tcaches_avail;
 
 /******************************************************************************/
 
-size_t	tcache_salloc(const void *ptr)
+size_t
+tcache_salloc(const void *ptr)
 {
 
 	return (arena_salloc(ptr, false));
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index ee39fea7..7dc45f89 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,7 +1,8 @@
 #include "test/jemalloc_test.h"
 
 JEMALLOC_INLINE_C void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void))
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void))
 {
 	uint64_t i;
 

From 61a6dfcd5fd89d21f04c99fabaf7269d05f61adf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:04:38 -0700
Subject: [PATCH 20/82] Constify various internal arena APIs.

---
 include/jemalloc/internal/arena.h             | 124 +++++++++++-------
 include/jemalloc/internal/private_symbols.txt |   9 +-
 src/arena.c                                   |  51 +++----
 src/tcache.c                                  |   2 +-
 4 files changed, 112 insertions(+), 74 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index c08a742f..09ae6894 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -593,29 +593,38 @@ void	arena_postfork_child(arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
+arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
+const arena_chunk_map_bits_t	*arena_bitselm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
+const arena_chunk_map_misc_t	*arena_miscelm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
-size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbitsp_read(size_t *mapbitsp);
-size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
+const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbitsp_read(const size_t *mapbitsp);
+size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_size_decode(size_t mapbits);
+size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
+    size_t pageind);
+szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
 size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
@@ -657,7 +666,7 @@ void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
+arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -666,8 +675,15 @@ arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
 	return (&chunk->map_bits[pageind-map_bias]);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
+arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -677,6 +693,13 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
 	    (uintptr_t)map_misc_offset) + pageind-map_bias);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
+arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
@@ -691,7 +714,7 @@ arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -736,24 +759,31 @@ arena_run_to_miscelm(arena_run_t *run)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (&arena_bitselm_get(chunk, pageind)->bits);
+	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
+}
+
+JEMALLOC_ALWAYS_INLINE const size_t *
+arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
+arena_mapbitsp_read(const size_t *mapbitsp)
 {
 
 	return (*mapbitsp);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
+	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -773,7 +803,7 @@ arena_mapbits_size_decode(size_t mapbits)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -783,7 +813,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -794,7 +824,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -805,7 +835,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 	szind_t binind;
@@ -817,7 +847,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -828,7 +858,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -839,7 +869,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -850,7 +880,7 @@ arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -859,7 +889,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -895,7 +925,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -909,7 +939,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
@@ -921,7 +951,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, flags);
@@ -931,7 +961,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -946,7 +976,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
     szind_t binind)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
@@ -960,7 +990,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
     szind_t binind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert(binind < BININD_INVALID);
 	assert(pageind - runind >= map_bias);
@@ -1048,12 +1078,12 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		size_t pageind;
 		size_t actual_mapbits;
 		size_t rpages_ind;
-		arena_run_t *run;
+		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
-		arena_chunk_map_misc_t *miscelm;
-		void *rpages;
+		const arena_chunk_map_misc_t *miscelm;
+		const void *rpages;
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
@@ -1066,7 +1096,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
-		miscelm = arena_miscelm_get(chunk, rpages_ind);
+		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
@@ -1182,8 +1212,8 @@ arena_prof_tctx_get(const void *ptr)
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
 			ret = (prof_tctx_t *)(uintptr_t)1U;
 		else {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+			arena_chunk_map_misc_t *elm =
+			    arena_miscelm_get_mutable(chunk, pageind);
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
@@ -1212,7 +1242,7 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
 		} else {
 			/*
@@ -1248,7 +1278,7 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index aed60cb1..26066695 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -5,7 +5,8 @@ arena_alloc_junk_small
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get
+arena_bitselm_get_const
+arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
@@ -60,7 +61,8 @@ arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_mapbitsp_get
+arena_mapbitsp_get_const
+arena_mapbitsp_get_mutable
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxrun
@@ -69,7 +71,8 @@ arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get
+arena_miscelm_get_const
+arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
diff --git a/src/arena.c b/src/arena.c
index 0d232ff8..8291ab2a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -197,11 +197,11 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->ph_link);
+	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
 }
 
 static void
@@ -209,18 +209,19 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->ph_link);
+	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
 }
 
 static void
 arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -237,7 +238,8 @@ static void
 arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -679,17 +681,18 @@ arena_chunk_init_hard(arena_t *arena)
 	 */
 	if (!zero) {
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-		    (void *)arena_bitselm_get(chunk, map_bias+1),
-		    (size_t)((uintptr_t) arena_bitselm_get(chunk,
-		    chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
-		    map_bias+1)));
+		    (void *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		for (i = map_bias+1; i < chunk_npages-1; i++)
 			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
 	} else {
 		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-		    *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
-		    arena_bitselm_get(chunk, chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
+		    *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(chunk, i) ==
@@ -1103,7 +1106,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 	 */
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
 		return (run);
@@ -1148,7 +1151,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
 	 */
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
 			run = NULL;
 		return (run);
@@ -2043,7 +2046,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages);
+	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
 	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
 	    0));
@@ -2520,7 +2523,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena_chunk_map_misc_t *head_miscelm = miscelm;
 		arena_run_t *head_run = run;
 
-		miscelm = arena_miscelm_get(chunk,
+		miscelm = arena_miscelm_get_mutable(chunk,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
@@ -2703,7 +2706,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	binind = run->binind;
 	bin = &arena->bins[binind];
 	bin_info = &arena_bin_info[binind];
@@ -2741,7 +2744,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	size_t rpages_ind;
 
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
 	malloc_mutex_lock(&bin->lock);
 	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
@@ -2759,7 +2762,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) != BININD_INVALID);
 	}
-	bitselm = arena_bitselm_get(chunk, pageind);
+	bitselm = arena_bitselm_get_mutable(chunk, pageind);
 	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
 	arena_decay_tick(tsd, arena);
 }
@@ -2787,7 +2790,8 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	if (config_fill || config_stats) {
@@ -2832,7 +2836,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	assert(size < oldsize);
@@ -2898,7 +2903,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		if (splitsize == 0)
 			goto label_fail;
 
-		run = &arena_miscelm_get(chunk, pageind+npages)->run;
+		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
 		if (arena_run_split_large(arena, run, splitsize, zero))
 			goto label_fail;
 
diff --git a/src/tcache.c b/src/tcache.c
index c12727a6..a8620c3d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
-				    arena_bitselm_get(chunk, pageind);
+				    arena_bitselm_get_mutable(chunk, pageind);
 				arena_dalloc_bin_junked_locked(bin_arena, chunk,
 				    ptr, bitselm);
 			} else {

From ff63dca363021faf5ccacc6dce2cb05df0268214 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:06:12 -0700
Subject: [PATCH 21/82] Avoid blindly enabling assertions for header code when
 testing.

Restructure the test program master header to avoid blindly enabling
assertions.  Prior to this change, assertion code in e.g. arena.h was
always enabled for tests, which could skew performance-related testing.
---
 test/include/test/jemalloc_test.h.in | 78 ++++++++++++++++------------
 1 file changed, 45 insertions(+), 33 deletions(-)

diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0a3dbeac..1f36e469 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -19,39 +19,6 @@
 #  include <pthread.h>
 #endif
 
-/******************************************************************************/
-/*
- * Define always-enabled assertion macros, so that test assertions execute even
- * if assertions are disabled in the library code.  These definitions must
- * exist prior to including "jemalloc/internal/util.h".
- */
-#define	assert(e) do {							\
-	if (!(e)) {							\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
-
-#define	not_reached() do {						\
-	malloc_printf(							\
-	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	not_implemented() do {						\
-	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	assert_not_implemented(e) do {					\
-	if (!(e))							\
-		not_implemented();					\
-} while (0)
-
 #include "test/jemalloc_test_defs.h"
 
 #ifdef JEMALLOC_OSSPIN
@@ -86,6 +53,14 @@
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
 
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
@@ -149,3 +124,40 @@
 #include "test/thd.h"
 #define	MEXP 19937
 #include "test/SFMT.h"
+
+/******************************************************************************/
+/*
+ * Define always-enabled assertion macros, so that test assertions execute even
+ * if assertions are disabled in the library code.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+
+#define	assert(e) do {							\
+	if (!(e)) {							\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	malloc_printf(							\
+	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	not_implemented() do {						\
+	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	assert_not_implemented(e) do {					\
+	if (!(e))							\
+		not_implemented();					\
+} while (0)

From 232b13d86298b9eafc36b0610d7965a95bda0679 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Wed, 23 Mar 2016 10:13:22 +0100
Subject: [PATCH 22/82] Fix MSVC project

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 2 ++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f3f0260b..0a6c4e61 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -56,6 +56,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
@@ -105,6 +106,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\ph.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index ce70632b..412c24d6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -107,6 +107,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -226,6 +229,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ph.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From af3184cac0e0c70045d8158b9c176696f2ca1090 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Mar 2016 01:42:08 -0700
Subject: [PATCH 23/82] Use abort() for fallback implementations of
 unreachable().

---
 include/jemalloc/internal/util.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 6e214702..228584a4 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -73,12 +73,12 @@
       JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
 #	define unreachable() __builtin_unreachable()
 #  else
-#	define unreachable()
+#	define unreachable() abort()
 #  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable()
+#	define unreachable() abort()
 #endif
 
 #include "jemalloc/internal/assert.h"

From f3060284c521cc74e333c5ab3a6c8fc0648defb5 Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sat, 26 Mar 2016 00:30:11 -0700
Subject: [PATCH 24/82] Remove unused arenas_extend() function declaration.

The arenas_extend() function was renamed to arenas_init() in commit
8bb3198f72fc7587dc93527f9f19fb5be52fa553, but its function declaration
was not removed from jemalloc_internal.h.in.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d3b94c00..c1cccd64 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -465,7 +465,6 @@ void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
-arena_t	*arenas_extend(unsigned ind);
 unsigned	narenas_total_get(void);
 arena_t	*arena_init(unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);

From 0bc716ae27d1bd66faa8f165a2c4a4cf6bd8143f Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sat, 26 Mar 2016 01:19:28 -0700
Subject: [PATCH 25/82] Fix -Wunreachable-code warning in malloc_vsnprintf().

Variables s and slen are declared inside a switch statement, but outside
a case scope. clang reports these variable definitions as "unreachable",
though this is not really meaningful in this case. This is the only
-Wunreachable-code warning in jemalloc.

src/util.c:501:5 [-Wunreachable-code] code will never be executed

This resolves #364.
---
 src/util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util.c b/src/util.c
index 581d540b..a1c4a2a4 100644
--- a/src/util.c
+++ b/src/util.c
@@ -408,6 +408,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			int prec = -1;
 			int width = -1;
 			unsigned char len = '?';
+			char *s;
+			size_t slen;
 
 			f++;
 			/* Flags. */
@@ -498,8 +500,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			}
 			/* Conversion specifier. */
 			switch (*f) {
-				char *s;
-				size_t slen;
 			case '%':
 				/* %% */
 				APPEND_C(*f);

From ce7c0f999bf7634078ec759f3d13290dbb34170c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 30 Mar 2016 18:36:04 -0700
Subject: [PATCH 26/82] Fix potential chunk leaks.

Move chunk_dalloc_arena()'s implementation into chunk_dalloc_wrapper(),
so that if the dalloc hook fails, proper decommit/purge/retain cascading
occurs.  This fixes three potential chunk leaks on OOM paths, one during
dss-based chunk allocation, one during chunk header commit (currently
relevant only on Windows), and one during rtree write (e.g. if rtree
node allocation fails).

Merge chunk_purge_arena() into chunk_purge_default() (refactor, no
change to functionality).
---
 include/jemalloc/internal/chunk.h             |  6 +--
 include/jemalloc/internal/private_symbols.txt |  2 -
 src/arena.c                                   | 16 +++---
 src/chunk.c                                   | 51 ++++++-------------
 src/chunk_dss.c                               |  2 +-
 5 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 5d193835..d800478d 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -62,12 +62,8 @@ void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool committed);
-void	chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
 void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-bool	chunk_purge_arena(arena_t *arena, void *chunk, size_t offset,
-    size_t length);
+    void *chunk, size_t size, bool zeroed, bool committed);
 bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, size_t offset, size_t length);
 bool	chunk_boot(void);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 26066695..969c73df 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -152,7 +152,6 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
-chunk_dalloc_arena
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
@@ -172,7 +171,6 @@ chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
-chunk_purge_arena
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
diff --git a/src/arena.c b/src/arena.c
index 8291ab2a..45964787 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -602,8 +602,8 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks,
-			    (void *)chunk, chunksize, *commit);
+			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+			    chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
@@ -614,7 +614,7 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    LG_PAGE, arena->ind);
 		}
 		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-		    chunksize, *commit);
+		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
@@ -1010,7 +1010,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		malloc_mutex_unlock(&arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1036,8 +1036,8 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
+	    zero, true) == NULL);
 	malloc_mutex_unlock(&arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
@@ -1045,7 +1045,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1699,7 +1699,7 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			extent_node_dirty_remove(chunkselm);
 			arena_node_dalloc(arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_arena(arena, chunk_hooks, addr, size,
+			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
 			    zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
diff --git a/src/chunk.c b/src/chunk.c
index b179d213..304d4e5a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -425,8 +425,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	arena_t *arena;
 
 	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
+	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
+	    arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -579,8 +579,18 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	arena_maybe_purge(arena);
 }
 
+static bool
+chunk_dalloc_default(void *chunk, size_t size, bool committed,
+    unsigned arena_ind)
+{
+
+	if (!have_dss || !chunk_in_dss(chunk))
+		return (chunk_dalloc_mmap(chunk, size));
+	return (true);
+}
+
 void
-chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, bool zeroed, bool committed)
 {
 
@@ -604,27 +614,6 @@ chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 }
 
-static bool
-chunk_dalloc_default(void *chunk, size_t size, bool committed,
-    unsigned arena_ind)
-{
-
-	if (!have_dss || !chunk_in_dss(chunk))
-		return (chunk_dalloc_mmap(chunk, size));
-	return (true);
-}
-
-void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
-{
-
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
-	chunk_hooks->dalloc(chunk, size, committed, arena->ind);
-	if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
-}
-
 static bool
 chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
@@ -643,8 +632,9 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 	    length));
 }
 
-bool
-chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
+static bool
+chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
 {
 
 	assert(chunk != NULL);
@@ -657,15 +647,6 @@ chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
 	    length));
 }
 
-static bool
-chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset,
-	    length));
-}
-
 bool
 chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, size_t offset, size_t length)
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 61fc9169..943d0e98 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -136,7 +136,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(arena,
 					    &chunk_hooks, cpad, cpad_size,
-					    true);
+					    false, true);
 				}
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(

From f86bc081d6190be14c64aeaae9d02863b440bfb3 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 31 Mar 2016 11:19:46 -0700
Subject: [PATCH 27/82] Update a comment.

---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 45964787..38a1ce34 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -668,8 +668,8 @@ arena_chunk_init_hard(arena_t *arena)
 
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
-	 * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted
-	 * chunk.
+	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
+	 * or decommitted chunk.
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;

From a82070ef5fc3aa81fda43086cdcc22bfa826b894 Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sun, 27 Mar 2016 23:28:39 -0700
Subject: [PATCH 28/82] Add JEMALLOC_ALLOC_JUNK and JEMALLOC_FREE_JUNK macros

Replace hardcoded 0xa5 and 0x5a junk values with JEMALLOC_ALLOC_JUNK and
JEMALLOC_FREE_JUNK macros, respectively.
---
 include/jemalloc/internal/tcache.h |  7 +++---
 include/jemalloc/internal/util.h   |  4 ++++
 src/arena.c                        | 36 ++++++++++++++++--------------
 src/ckh.c                          |  2 +-
 src/huge.c                         | 15 +++++++------
 src/quarantine.c                   |  2 +-
 test/unit/junk.c                   |  6 ++---
 7 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 8357820b..1edd39fd 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -381,9 +381,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc))
-					memset(ret, 0xa5, usize);
-				else if (unlikely(opt_zero))
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero))
 					memset(ret, 0, usize);
 			}
 		} else
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 228584a4..949a0e0a 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -40,6 +40,10 @@
  */
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
+/* Junk fill patterns. */
+#define	JEMALLOC_ALLOC_JUNK	0xa5
+#define	JEMALLOC_FREE_JUNK	0x5a
+
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
diff --git a/src/arena.c b/src/arena.c
index 38a1ce34..1d30de57 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2249,15 +2249,16 @@ void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
+	size_t redzone_size = bin_info->redzone_size;
+
 	if (zero) {
-		size_t redzone_size = bin_info->redzone_size;
-		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
-		    redzone_size);
-		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
-		    redzone_size);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
+		memset((void *)((uintptr_t)ptr + bin_info->reg_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
 	} else {
-		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
-		    bin_info->reg_interval);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, bin_info->reg_interval);
 	}
 }
 
@@ -2293,22 +2294,22 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 
 		for (i = 1; i <= redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, false, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 		for (i = 0; i < redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, true, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 	}
@@ -2327,7 +2328,7 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 	size_t redzone_size = bin_info->redzone_size;
 
 	arena_redzones_validate(ptr, bin_info, false);
-	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
+	memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK,
 	    bin_info->reg_interval);
 }
 #ifdef JEMALLOC_JET
@@ -2458,7 +2459,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc))
-				memset(ret, 0xa5, usize);
+				memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 			else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
@@ -2563,7 +2564,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
-			memset(ret, 0xa5, usize);
+			memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
@@ -2776,7 +2777,7 @@ arena_dalloc_junk_large(void *ptr, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free))
-		memset(ptr, 0x5a, usize);
+		memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
@@ -2977,7 +2978,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free)) {
-		memset((void *)((uintptr_t)ptr + usize), 0x5a,
+		memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK,
 		    old_usize - usize);
 	}
 }
@@ -3012,7 +3013,8 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 		    usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
+				memset((void *)((uintptr_t)ptr + oldsize),
+				    JEMALLOC_ALLOC_JUNK,
 				    isalloc(ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
diff --git a/src/ckh.c b/src/ckh.c
index 3b423aa2..07b49dd2 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -423,7 +423,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 
 	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
 	if (config_debug)
-		memset(ckh, 0x5a, sizeof(ckh_t));
+		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
 
 size_t
diff --git a/src/huge.c b/src/huge.c
index 5f7ceaf1..a63c8258 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -92,7 +92,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		if (!is_zeroed)
 			memset(ret, 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, 0xa5, usize);
+		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
 	arena_decay_tick(tsd, arena);
 	return (ret);
@@ -112,7 +112,7 @@ huge_dalloc_junk(void *ptr, size_t usize)
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
-			memset(ptr, 0x5a, usize);
+			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
 #ifdef JEMALLOC_JET
@@ -147,7 +147,8 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
+			memset((void *)((uintptr_t)ptr + usize),
+			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
@@ -174,8 +175,8 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 				    usize - oldsize);
 			}
 		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-			    oldsize);
+			memset((void *)((uintptr_t)ptr + oldsize),
+			    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 		}
 	}
 }
@@ -268,8 +269,8 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 			    CHUNK_CEILING(oldsize));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-		    oldsize);
+		memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK,
+		    usize - oldsize);
 	}
 
 	return (false);
diff --git a/src/quarantine.c b/src/quarantine.c
index ff8801cb..c024deab 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -160,7 +160,7 @@ quarantine(tsd_t *tsd, void *ptr)
 			    && usize <= SMALL_MAXCLASS)
 				arena_quarantine_junk_small(ptr, usize);
 			else
-				memset(ptr, 0x5a, usize);
+				memset(ptr, JEMALLOC_FREE_JUNK, usize);
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
diff --git a/test/unit/junk.c b/test/unit/junk.c
index b23dd1e9..f4e62261 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -98,7 +98,7 @@ test_junk(size_t sz_min, size_t sz_max)
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], 0xa5,
+				assert_c_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}

From b582d2ad9418630d65540ce8dfa9f96e69eb4df9 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 31 Mar 2016 12:31:10 -0700
Subject: [PATCH 29/82] Update implementation details docs re: PTRDIFF_MAX.

Document that the maximum size class is limited by PTRDIFF_MAX, rather
than the full address space.  This reflects changes that were part of
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).
---
 doc/jemalloc.xml.in | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bc5dbd1d..63088cd1 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -540,8 +540,8 @@ for (i = 0; i < nbins; i++) {
     are smaller than four times the page size, large size classes are smaller
     than the chunk size (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to one size class less than
-    the full address space size.</para>
+    huge size classes extend from the chunk size up to the largest size class
+    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
@@ -659,7 +659,7 @@ for (i = 0; i < nbins; i++) {
           <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
         </row>
         <row>
-          <entry morerows="6">Huge</entry>
+          <entry morerows="8">Huge</entry>
           <entry>256 KiB</entry>
           <entry>[2 MiB]</entry>
         </row>
@@ -687,6 +687,14 @@ for (i = 0; i < nbins; i++) {
           <entry>...</entry>
           <entry>...</entry>
         </row>
+        <row>
+          <entry>512 PiB</entry>
+          <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+        </row>
+        <row>
+          <entry>1 EiB</entry>
+          <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
+        </row>
       </tbody>
       </tgroup>
     </table>

From a3c4193280b2fbd267b68f3bce091a53b5ea0b97 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 5 Apr 2016 16:32:32 -0700
Subject: [PATCH 30/82] Fix a compilation warning in the ph test code.

---
 test/unit/ph.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/test/unit/ph.c b/test/unit/ph.c
index b0e44028..103475b4 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -19,32 +19,14 @@ TEST_END
 TEST_BEGIN(test_ph_random)
 {
 #define	NNODES 25
-#define	NBAGS 250
 #define	SEED 42
 	sfmt_t *sfmt;
-	uint64_t bag[NNODES];
 	ph_heap_t heap;
 	node_t nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
-	for (i = 0; i < NBAGS; i++) {
-		switch (i) {
-		case 0:
-			/* Insert in order. */
-			for (j = 0; j < NNODES; j++)
-				bag[j] = j;
-			break;
-		case 1:
-			/* Insert in reverse order. */
-			for (j = 0; j < NNODES; j++)
-				bag[j] = NNODES - j - 1;
-			break;
-		default:
-			for (j = 0; j < NNODES; j++)
-				bag[j] = gen_rand64_range(sfmt, NNODES);
-		}
-
+	for (i = 0; i < 2; i++) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			ph_new(&heap);
@@ -77,7 +59,6 @@ TEST_BEGIN(test_ph_random)
 	}
 	fini_gen_rand(sfmt);
 #undef NNODES
-#undef NBAGS
 #undef SEED
 }
 TEST_END

From 4a8abbb400afe695f145a487380c04a946500bc6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 10:32:06 -0700
Subject: [PATCH 31/82] Fix bitmap_sfu() regression.

Fix bitmap_sfu() to shift by LG_BITMAP_GROUP_NBITS rather than
hard-coded 6 when using linear (non-USE_TREE) bitmap search.  In
practice this affects only 64-bit systems for which sizeof(long) is not
8 (i.e. Windows), since USE_TREE is defined for 32-bit systems.

This regression was caused by b8823ab02607d6f03febd32ac504bb6188c54047
(Use linear scan for small bitmaps).

This resolves #368.
---
 include/jemalloc/internal/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 2594e3a4..0e0d2476 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -223,7 +223,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << 6);
+	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);

From 2ee2f1ec57d9094643db60210c28b989f2e7da83 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 10:38:47 -0700
Subject: [PATCH 32/82] Reduce differences between alternative bitmap
 implementations.

---
 include/jemalloc/internal/bitmap.h |  2 +-
 src/bitmap.c                       | 11 ++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 0e0d2476..894695f4 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -223,7 +223,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
diff --git a/src/bitmap.c b/src/bitmap.c
index b1e66271..ac0f3b38 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -74,15 +74,11 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
-	size_t i;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
-	i = nbits >> LG_BITMAP_GROUP_NBITS;
-	if (nbits % BITMAP_GROUP_NBITS != 0)
-		i++;
-	binfo->ngroups = i;
+	binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
 	binfo->nbits = nbits;
 }
 
@@ -99,9 +95,10 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	size_t extra;
 
 	memset(bitmap, 0xffU, bitmap_size(binfo));
-	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
-		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+		bitmap[binfo->ngroups - 1] >>= extra;
 }
 
 #endif /* USE_TREE */

From c6a2c39404df9a3fb27735b93cf4cb3a76a2d4a7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 26 Mar 2016 17:30:37 -0700
Subject: [PATCH 33/82] Refactor/fix ph.

Refactor ph to support configurable comparison functions.  Use a cpp
macro code generation form equivalent to the rb macros so that pairing
heaps can be used for both run heaps and chunk heaps.

Remove per node parent pointers, and instead use leftmost siblings' prev
pointers to track parents.

Fix multi-pass sibling merging to iterate over intermediate results
using a FIFO, rather than a LIFO.  Use this fixed sibling merging
implementation for both merge phases of the auxiliary twopass algorithm
(first merging the aux list, then replacing the root with its merged
children).  This fixes both degenerate merge behavior and the potential
for deep recursion.

This regression was introduced by
6bafa6678fc36483e638f1c3a0a9bf79fb89bfc9 (Pairing heap).

This resolves #371.
---
 Makefile.in                                   |   1 -
 include/jemalloc/internal/arena.h             |  20 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   5 +-
 include/jemalloc/internal/ph.h                | 552 ++++++++++--------
 include/jemalloc/internal/private_symbols.txt |  16 +-
 src/arena.c                                   |  95 ++-
 src/ph.c                                      |   2 -
 test/unit/ph.c                                | 257 +++++++-
 8 files changed, 613 insertions(+), 335 deletions(-)
 delete mode 100644 src/ph.c

diff --git a/Makefile.in b/Makefile.in
index 7f2d668a..480ce1a1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,7 +95,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
-	$(srcroot)src/ph.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 09ae6894..6f0fa76a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -160,7 +160,7 @@ struct arena_chunk_map_misc_s {
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	ph_node_t				ph_link;
+	phn(arena_chunk_map_misc_t)		ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
@@ -176,6 +176,7 @@ struct arena_chunk_map_misc_s {
 		arena_run_t			run;
 	};
 };
+typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
@@ -278,7 +279,7 @@ struct arena_bin_s {
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	ph_heap_t		runs;
+	arena_run_heap_t	runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -460,7 +461,7 @@ struct arena_s {
 	 * Quantized address-ordered heaps of this arena's available runs.  The
 	 * heaps are used for first-best-fit run allocation.
 	 */
-	ph_heap_t		runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -604,7 +605,6 @@ const arena_chunk_map_misc_t	*arena_miscelm_get_const(
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
-arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
 size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
 const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
@@ -734,18 +734,6 @@ arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 	return (miscelm);
 }
 
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_ph_to_miscelm(ph_node_t *ph)
-{
-	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
-	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, ph_link));
-
-	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
-
-	return (miscelm);
-}
-
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_run_to_miscelm(arena_run_t *run)
 {
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c1cccd64..55ca7140 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -161,6 +161,7 @@ static const bool config_cache_oblivious =
 #include <malloc/malloc.h>
 #endif
 
+#include "jemalloc/internal/ph.h"
 #define	RB_COMPACT
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
@@ -371,7 +372,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
@@ -402,7 +402,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/ph.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
@@ -495,7 +494,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -527,7 +525,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 519f0dda..70b6e2cd 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -4,257 +4,341 @@
  * "The Pairing Heap: A New Form of Self-Adjusting Heap"
  * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
  *
- * With auxiliary list, described in a follow on paper
+ * With auxiliary twopass list, described in a follow on paper.
  *
  * "Pairing Heaps: Experiments and Analysis"
  * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
  *
- * Where search/nsearch/last are not needed, ph.h outperforms rb.h by ~7x fewer
- * cpu cycles, and ~4x fewer memory references.
- *
- * Tagging parent/prev pointers on the next list was also described in the
- * original paper, such that only two pointers are needed.  This is not
- * implemented here, as it substantially increases the memory references
- * needed when ph_remove is called, almost overshadowing the other performance
- * gains.
- *
  *******************************************************************************
  */
-#ifdef JEMALLOC_H_TYPES
 
-typedef struct ph_node_s ph_node_t;
-typedef struct ph_heap_s ph_heap_t;
+#ifndef PH_H_
+#define	PH_H_
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct ph_node_s {
-	ph_node_t	*subheaps;
-	ph_node_t	*parent;
-	ph_node_t	*next;
-	ph_node_t	*prev;
-};
-
-struct ph_heap_s {
-	ph_node_t	*root;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-ph_node_t	*ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2);
-ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
-ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
-void	ph_merge_aux_list(ph_heap_t *l);
-void	ph_new(ph_heap_t *n);
-ph_node_t	*ph_first(ph_heap_t *l);
-void	ph_insert(ph_heap_t *l, ph_node_t *n);
-ph_node_t	*ph_remove_first(ph_heap_t *l);
-void	ph_remove(ph_heap_t *l, ph_node_t *n);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PH_C_))
-
-/* Helper routines ************************************************************/
-
-JEMALLOC_INLINE ph_node_t *
-ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2)
-{
-
-	assert(heap1 != NULL);
-	assert(heap2 != NULL);
-	assert ((uintptr_t)heap1 <= (uintptr_t)heap2);
-
-	heap2->parent = heap1;
-	heap2->prev = NULL;
-	heap2->next = heap1->subheaps;
-	if (heap1->subheaps != NULL)
-		heap1->subheaps->prev = heap2;
-	heap1->subheaps = heap2;
-	return (heap1);
+/* Node structure. */
+#define	phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
 }
 
-JEMALLOC_INLINE ph_node_t *
-ph_merge(ph_node_t *heap1, ph_node_t *heap2)
-{
-
-	if (heap1 == NULL)
-		return (heap2);
-	if (heap2 == NULL)
-		return (heap1);
-	/* Optional: user-settable comparison function */
-	if ((uintptr_t)heap1 < (uintptr_t)heap2)
-		return (ph_merge_ordered(heap1, heap2));
-	else
-		return (ph_merge_ordered(heap2, heap1));
+/* Root structure. */
+#define	ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
 }
 
-JEMALLOC_INLINE ph_node_t *
-ph_merge_pairs(ph_node_t *subheaps)
-{
+/* Internal utility macros. */
+#define	phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
 
-	if (subheaps == NULL)
-		return (NULL);
-	if (subheaps->next == NULL)
-		return (subheaps);
-	{
-		ph_node_t *l0 = subheaps;
-		ph_node_t *l1 = l0->next;
-		ph_node_t *lrest = l1->next;
+#define	phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
 
-		if (lrest != NULL)
-			lrest->prev = NULL;
-		l1->next = NULL;
-		l1->prev = NULL;
-		l0->next = NULL;
-		l0->prev = NULL;
-		return (ph_merge(ph_merge(l0, l1), ph_merge_pairs(lrest)));
-	}
-}
+#define	phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL)						\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL)						\
+		r_phn = a_phn1;						\
+	else if (a_phn1 == NULL)					\
+		r_phn = a_phn0;						\
+	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL)					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL)			\
+					break;				\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL)						\
+		r_phn = NULL;						\
+	else {								\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
 
 /*
- * Merge the aux list into the root node.
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
  */
-JEMALLOC_INLINE void
-ph_merge_aux_list(ph_heap_t *l)
-{
+#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
 
-	if (l->root == NULL)
-		return;
-	if (l->root->next != NULL) {
-		ph_node_t *l0 = l->root->next;
-		ph_node_t *l1 = l0->next;
-		ph_node_t *lrest = NULL;
-
-		/* Multipass merge. */
-		while (l1 != NULL) {
-			lrest = l1->next;
-			if (lrest != NULL)
-				lrest->prev = NULL;
-			l1->next = NULL;
-			l1->prev = NULL;
-			l0->next = NULL;
-			l0->prev = NULL;
-			l0 = ph_merge(l0, l1);
-			l1 = lrest;
-		}
-		l->root->next = NULL;
-		l->root = ph_merge(l->root, l0);
-	}
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph)						\
+{									\
+									\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph) {					\
+									\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph)						\
+{									\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return (ph->ph_root);						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn)				\
+{									\
+									\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL)					\
+		ph->ph_root = phn;					\
+	else {								\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph)					\
+{									\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return (ret);							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn)				\
+{									\
+	a_type *replace, *parent;					\
+									\
+	/*								\
+	 * We can delete from aux list without merging it, but we need	\
+	 * to merge if we are dealing with the root node.		\
+	 */								\
+	if (ph->ph_root == phn) {					\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+			parent = NULL;					\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
 }
 
-/* User API *******************************************************************/
-
-JEMALLOC_INLINE void
-ph_new(ph_heap_t *n)
-{
-
-	memset(n, 0, sizeof(ph_heap_t));
-}
-
-JEMALLOC_INLINE ph_node_t *
-ph_first(ph_heap_t *l)
-{
-
-	/*
-	 * For the cost of an extra pointer, a l->min could be stored instead of
-	 * merging the aux list here.  Current users always call ph_remove(l,
-	 * ph_first(l)) though, and the aux list must always be merged for
-	 * delete of the min node anyway.
-	 */
-	ph_merge_aux_list(l);
-	return (l->root);
-}
-
-JEMALLOC_INLINE void
-ph_insert(ph_heap_t *l, ph_node_t *n)
-{
-
-	memset(n, 0, sizeof(ph_node_t));
-
-	/*
-	 * Non-aux list insert:
-	 *
-	 * l->root = ph_merge(l->root, n);
-	 *
-	 * Aux list insert:
-	 */
-	if (l->root == NULL)
-		l->root = n;
-	else {
-		n->next = l->root->next;
-		if (l->root->next != NULL)
-			l->root->next->prev = n;
-		n->prev = l->root;
-		l->root->next = n;
-	}
-}
-
-JEMALLOC_INLINE ph_node_t *
-ph_remove_first(ph_heap_t *l)
-{
-	ph_node_t *ret;
-
-	ph_merge_aux_list(l);
-	if (l->root == NULL)
-		return (NULL);
-
-	ret = l->root;
-
-	l->root = ph_merge_pairs(l->root->subheaps);
-
-	return (ret);
-}
-
-JEMALLOC_INLINE void
-ph_remove(ph_heap_t *l, ph_node_t *n)
-{
-	ph_node_t *replace;
-
-	/*
-	 * We can delete from aux list without merging it, but we need to merge
-	 * if we are dealing with the root node.
-	 */
-	if (l->root == n) {
-		ph_merge_aux_list(l);
-		if (l->root == n) {
-			ph_remove_first(l);
-			return;
-		}
-	}
-
-	/* Find a possible replacement node, and link to parent. */
-	replace = ph_merge_pairs(n->subheaps);
-	if (n->parent != NULL && n->parent->subheaps == n) {
-		if (replace != NULL)
-			n->parent->subheaps = replace;
-		else
-			n->parent->subheaps = n->next;
-	}
-	/* Set next/prev for sibling linked list. */
-	if (replace != NULL) {
-		replace->parent = n->parent;
-		replace->prev = n->prev;
-		if (n->prev != NULL)
-			n->prev->next = replace;
-		replace->next = n->next;
-		if (n->next != NULL)
-			n->next->prev = replace;
-	} else {
-		if (n->prev != NULL)
-			n->prev->next = n->next;
-		if (n->next != NULL)
-			n->next->prev = n->prev;
-	}
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* PH_H_ */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 969c73df..551cb937 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -82,7 +82,6 @@ arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
 arena_palloc
-arena_ph_to_miscelm
 arena_postfork_child
 arena_postfork_parent
 arena_prefork
@@ -101,6 +100,12 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_run_heap_empty
+arena_run_heap_first
+arena_run_heap_insert
+arena_run_heap_new
+arena_run_heap_remove_first
+arena_run_heap_remove
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
@@ -381,15 +386,6 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
-ph_first
-ph_insert
-ph_merge
-ph_merge_aux_list
-ph_merge_ordered
-ph_merge_pairs
-ph_new
-ph_remove_first
-ph_remove
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
diff --git a/src/arena.c b/src/arena.c
index 1d30de57..d884dc4c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -59,6 +59,23 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 	return (arena_mapbits_size_decode(mapbits));
 }
 
+JEMALLOC_INLINE_C int
+arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
+{
+	uintptr_t a_miscelm = (uintptr_t)a;
+	uintptr_t b_miscelm = (uintptr_t)b;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
+}
+
+/* Generate pairing heap functions. */
+ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
+    ph_link, arena_run_addr_comp)
+
 static size_t
 run_quantize_floor_compute(size_t size)
 {
@@ -182,7 +199,7 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-static ph_heap_t *
+static arena_run_heap_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -200,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
+	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
@@ -212,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
+	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
@@ -1065,12 +1082,10 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
-		if (node != NULL) {
-			arena_chunk_map_misc_t *miscelm =
-			    arena_ph_to_miscelm(node);
+		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
+		    arena_runs_avail_get(arena, i));
+		if (miscelm != NULL)
 			return (&miscelm->run);
-		}
 	}
 
 	return (NULL);
@@ -2052,45 +2067,26 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    0));
 }
 
-static arena_run_t *
-arena_bin_runs_first(arena_bin_t *bin)
-{
-	ph_node_t *node;
-	arena_chunk_map_misc_t *miscelm;
-
-	node = ph_first(&bin->runs);
-	if (node == NULL)
-		return (NULL);
-	miscelm = arena_ph_to_miscelm(node);
-	return (&miscelm->run);
-}
-
 static void
 arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	ph_insert(&bin->runs, &miscelm->ph_link);
-}
-
-static void
-arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-
-	ph_remove(&bin->runs, &miscelm->ph_link);
+	arena_run_heap_insert(&bin->runs, miscelm);
 }
 
 static arena_run_t *
 arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 {
-	arena_run_t *run = arena_bin_runs_first(bin);
-	if (run != NULL) {
-		arena_bin_runs_remove(bin, run);
-		if (config_stats)
-			bin->stats.reruns++;
-	}
-	return (run);
+	arena_chunk_map_misc_t *miscelm;
+
+	miscelm = arena_run_heap_remove_first(&bin->runs);
+	if (miscelm == NULL)
+		return (NULL);
+	if (config_stats)
+		bin->stats.reruns++;
+
+	return (&miscelm->run);
 }
 
 static arena_run_t *
@@ -2645,13 +2641,16 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 		    &chunk->node), bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
+		/*
+		 * The following block's conditional is necessary because if the
+		 * run only contains one region, then it never gets inserted
+		 * into the non-full runs tree.
+		 */
 		if (bin_info->nregs != 1) {
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_bin_runs_remove(bin, run);
+			arena_chunk_map_misc_t *miscelm =
+			    arena_run_to_miscelm(run);
+
+			arena_run_heap_remove(&bin->runs, miscelm);
 		}
 	}
 }
@@ -3312,7 +3311,7 @@ arena_new(unsigned ind)
 	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
 	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@@ -3372,7 +3371,7 @@ arena_new(unsigned ind)
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		ph_new(&arena->runs_avail[i]);
+		arena_run_heap_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3401,7 +3400,7 @@ arena_new(unsigned ind)
 		if (malloc_mutex_init(&bin->lock))
 			return (NULL);
 		bin->runcur = NULL;
-		ph_new(&bin->runs);
+		arena_run_heap_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
diff --git a/src/ph.c b/src/ph.c
deleted file mode 100644
index 051a20d7..00000000
--- a/src/ph.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	JEMALLOC_PH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 103475b4..da442f07 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -3,58 +3,275 @@
 typedef struct node_s node_t;
 
 struct node_s {
-	ph_node_t link;
+#define	NODE_MAGIC 0x9823af7e
+	uint32_t magic;
+	phn(node_t) link;
+	uint64_t key;
 };
 
+static int
+node_cmp(const node_t *a, const node_t *b)
+{
+	int ret;
+
+	ret = (a->key > b->key) - (a->key < b->key);
+	if (ret == 0) {
+		/*
+		 * Duplicates are not allowed in the heap, so force an
+		 * arbitrary ordering for non-identical items with equal keys.
+		 */
+		ret = (((uintptr_t)a) > ((uintptr_t)b))
+		    - (((uintptr_t)a) < ((uintptr_t)b));
+	}
+	return (ret);
+}
+
+static int
+node_cmp_magic(const node_t *a, const node_t *b) {
+
+	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+
+	return (node_cmp(a, b));
+}
+
+typedef ph(node_t) heap_t;
+ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+
+static void
+node_print(const node_t *node, unsigned depth)
+{
+	unsigned i;
+	node_t *leftmost_child, *sibling;
+
+	for (i = 0; i < depth; i++)
+		malloc_printf("\t");
+	malloc_printf("%2"FMTu64"\n", node->key);
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return;
+	node_print(leftmost_child, depth + 1);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		node_print(sibling, depth + 1);
+	}
+}
+
+static void
+heap_print(const heap_t *heap)
+{
+	node_t *auxelm;
+
+	malloc_printf("vvv heap %p vvv\n", heap);
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	node_print(heap->ph_root, 0);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		node_print(auxelm, 0);
+	}
+
+label_return:
+	malloc_printf("^^^ heap %p ^^^\n", heap);
+}
+
+static unsigned
+node_validate(const node_t *node, const node_t *parent)
+{
+	unsigned nnodes = 1;
+	node_t *leftmost_child, *sibling;
+
+	if (parent != NULL) {
+		assert_d_ge(node_cmp_magic(node, parent), 0,
+		    "Child is less than parent");
+	}
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return (nnodes);
+	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	    (void *)node, "Leftmost child does not link to node");
+	nnodes += node_validate(leftmost_child, node);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, sibling)), sibling,
+		    "sibling's prev doesn't link to sibling");
+		nnodes += node_validate(sibling, node);
+	}
+	return (nnodes);
+}
+
+static unsigned
+heap_validate(const heap_t *heap)
+{
+	unsigned nnodes = 0;
+	node_t *auxelm;
+
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	nnodes += node_validate(heap->ph_root, NULL);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		nnodes += node_validate(auxelm, NULL);
+	}
+
+label_return:
+	if (false)
+		heap_print(heap);
+	return (nnodes);
+}
+
 TEST_BEGIN(test_ph_empty)
 {
-	ph_heap_t heap;
+	heap_t heap;
 
-	ph_new(&heap);
-
-	assert_ptr_null(ph_first(&heap), "Unexpected node");
+	heap_new(&heap);
+	assert_true(heap_empty(&heap), "Heap should be empty");
+	assert_ptr_null(heap_first(&heap), "Unexpected node");
 }
 TEST_END
 
+static void
+node_remove(heap_t *heap, node_t *node)
+{
+
+	heap_remove(heap, node);
+
+	node->magic = 0;
+}
+
+static node_t *
+node_remove_first(heap_t *heap)
+{
+	node_t *node = heap_remove_first(heap);
+	node->magic = 0;
+	return (node);
+}
+
 TEST_BEGIN(test_ph_random)
 {
 #define	NNODES 25
+#define	NBAGS 250
 #define	SEED 42
 	sfmt_t *sfmt;
-	ph_heap_t heap;
+	uint64_t bag[NNODES];
+	heap_t heap;
 	node_t nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
-			ph_new(&heap);
+			heap_new(&heap);
+			assert_u_eq(heap_validate(&heap), 0,
+			    "Incorrect node count");
+			for (k = 0; k < j; k++) {
+				nodes[k].magic = NODE_MAGIC;
+				nodes[k].key = bag[k];
+			}
 
 			/* Insert nodes. */
 			for (k = 0; k < j; k++) {
-				ph_insert(&heap, &nodes[k].link);
-
-				assert_ptr_not_null(ph_first(&heap),
-				    "Heap should not be empty");
+				heap_insert(&heap, &nodes[k]);
+				if (i % 13 == 12) {
+					/* Trigger merging. */
+					assert_ptr_not_null(heap_first(&heap),
+					    "Heap should not be empty");
+				}
+				assert_u_eq(heap_validate(&heap), k + 1,
+				    "Incorrect node count");
 			}
 
+			assert_false(heap_empty(&heap),
+			    "Heap should not be empty");
+
 			/* Remove nodes. */
-			switch (i % 2) {
+			switch (i % 4) {
 			case 0:
-				for (k = 0; k < j; k++)
-					ph_remove(&heap, &nodes[k].link);
+				for (k = 0; k < j; k++) {
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					node_remove(&heap, &nodes[k]);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
 				break;
 			case 1:
-				for (k = j; k > 0; k--)
-					ph_remove(&heap, &nodes[k-1].link);
+				for (k = j; k > 0; k--) {
+					node_remove(&heap, &nodes[k-1]);
+					assert_u_eq(heap_validate(&heap), k - 1,
+					    "Incorrect node count");
+				}
 				break;
-			default:
+			case 2: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = node_remove_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					prev = node;
+				}
+				break;
+			} case 3: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = heap_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					node_remove(&heap, node);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					prev = node;
+				}
+				break;
+			} default:
 				not_reached();
 			}
 
-			assert_ptr_null(ph_first(&heap),
-			    "Heap should not be empty");
+			assert_ptr_null(heap_first(&heap),
+			    "Heap should be empty");
+			assert_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
 	fini_gen_rand(sfmt);

From 96aa67aca89725f0b1df3257421a3d0a48eb2700 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 4 Apr 2016 19:55:19 -0400
Subject: [PATCH 34/82] Clean up char vs. uint8_t in junk filling code.

Consistently use uint8_t rather than char for junk filling code.
---
 include/jemalloc/internal/util.h |  4 ++--
 test/unit/junk.c                 | 16 ++++++++--------
 test/unit/zero.c                 | 16 +++++++++-------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 949a0e0a..a0c2203d 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -41,8 +41,8 @@
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
 /* Junk fill patterns. */
-#define	JEMALLOC_ALLOC_JUNK	0xa5
-#define	JEMALLOC_FREE_JUNK	0x5a
+#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
 
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
diff --git a/test/unit/junk.c b/test/unit/junk.c
index f4e62261..fecf6fae 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -69,7 +69,7 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize)
 static void
 test_junk(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
 
 	if (opt_junk_free) {
@@ -82,23 +82,23 @@ test_junk(size_t sz_min, size_t sz_max)
 	}
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], JEMALLOC_ALLOC_JUNK,
+				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
@@ -107,7 +107,7 @@ test_junk(size_t sz_min, size_t sz_max)
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			watch_junking(s);
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 			assert_true(!opt_junk_free || saw_junking,
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 93afc2b8..30ebe37a 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -8,39 +8,41 @@ const char *malloc_conf =
 static void
 test_zero(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
+#define	MAGIC	((uint8_t)0x61)
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_c_eq(s[i], 0x0,
+			assert_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
-			s[i] = 'a';
+			s[i] = MAGIC;
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
 
 	dallocx(s, 0);
+#undef MAGIC
 }
 
 TEST_BEGIN(test_zero_small)

From 245ae6036c09cc11a72fab4335495d95cddd5beb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 11:54:44 -0700
Subject: [PATCH 35/82] Support --with-lg-page values larger than actual page
 size.

During over-allocation in preparation for creating aligned mappings,
allocate one more page than necessary if PAGE is the actual page size,
so that trimming still succeeds even if the system returns a mapping
that has less than PAGE alignment.  This allows compiling with e.g. 64
KiB "pages" on systems that actually use 4 KiB pages.

Note that for e.g. --with-lg-page=21, it is also necessary to increase
the chunk size (e.g. --with-malloc-conf=lg_chunk:22) so that there are
at least two "pages" per chunk.  In practice this isn't a particularly
compelling configuration because so much (unusable) virtual memory is
dedicated to chunk headers.
---
 include/jemalloc/internal/bitmap.h               | 4 ++--
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 src/arena.c                                      | 2 +-
 src/chunk_mmap.c                                 | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 894695f4..36f38b59 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -17,8 +17,8 @@ typedef unsigned long bitmap_t;
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffsl more than 2^3 times, use a
- * tree instead.
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 55ca7140..0b57b82a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -741,7 +741,7 @@ sa2u(size_t size, size_t alignment)
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
 		 */
-		if (usize + large_pad + alignment - PAGE <= arena_maxrun)
+		if (usize + large_pad + alignment <= arena_maxrun)
 			return (usize);
 	}
 
@@ -771,7 +771,7 @@ sa2u(size_t size, size_t alignment)
 	 * Calculate the multi-chunk mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + alignment - PAGE < usize) {
+	if (usize + alignment < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
diff --git a/src/arena.c b/src/arena.c
index d884dc4c..3373e1d8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2500,7 +2500,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
-	alloc_size = usize + large_pad + alignment - PAGE;
+	alloc_size = usize + large_pad + alignment;
 
 	malloc_mutex_lock(&arena->lock);
 	run = arena_run_alloc_large(arena, alloc_size, false);
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 56b2ee42..e2e66bc9 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -9,7 +9,7 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	void *ret;
 	size_t alloc_size;
 
-	alloc_size = size + alignment - PAGE;
+	alloc_size = size + alignment;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);

From 667eca2ac215153855e62a75263df7accf25cdbc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 13:05:21 -0700
Subject: [PATCH 36/82] Simplify RTREE_HEIGHT_MAX definition.

Use 1U rather than ZU(1) in macro definitions, so that the preprocessor
can evaluate the resulting expressions.
---
 include/jemalloc/internal/rtree.h | 33 ++++---------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 36aa002b..8d0c584d 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -15,35 +15,10 @@ typedef struct rtree_s rtree_t;
  * machine address width.
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
-/*
- * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
- * conditionals.  The following defininitions are precomputed equivalents to:
- *
- *  #define	RTREE_HEIGHT_MAX					\
- *      ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
- */
-#if LG_RTREE_BITS_PER_LEVEL == 2
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	16
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	8
-#  endif
-#elif LG_RTREE_BITS_PER_LEVEL == 3
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	8
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	4
-#  endif
-#elif LG_RTREE_BITS_PER_LEVEL == 4
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	4
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	2
-#  endif
-#else
-#  error Unsupported LG_RTREE_BITS_PER_LEVEL
-#endif
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
+#define	RTREE_HEIGHT_MAX						\
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)

From e7642715ac535cf88585d4e5ca191c8042cc2399 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 11 Apr 2016 18:47:18 -0700
Subject: [PATCH 37/82] Fix malloc_stats_print() to print correct opt.narenas
 value.

This regression was caused by 8f683b94a751c65af8f9fa25970ccf2917b96bb8
(Make opt_narenas unsigned rather than size_t.).
---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index a7249479..87b09e58 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -468,7 +468,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_UNSIGNED(n)						\
 		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
 			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
+			"  opt."#n": %u\n", uv);			\
 		}
 #define	OPT_WRITE_SIZE_T(n)						\
 		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\

From bc26d7d99b3d3dc7633a28da622087ed3daa9a94 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 12 Apr 2016 09:50:10 +0200
Subject: [PATCH 38/82] Cleanup MSVC project, embed PDB data inside static .lib

---
 .../projects/vc2015/jemalloc/jemalloc.vcxproj | 22 +++++++++----------
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ---
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 0a6c4e61..9315022d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -106,7 +106,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\ph.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
@@ -252,7 +251,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -269,7 +268,7 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -285,7 +284,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -302,8 +301,9 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -320,7 +320,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -341,7 +341,7 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -361,7 +361,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -382,8 +382,8 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 412c24d6..88c15efa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -229,9 +229,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ph.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 00432331b83526e3bb82f7c2aba493bf254cb9c0 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 12 Apr 2016 09:50:54 +0200
Subject: [PATCH 39/82] Fix 64-to-32 conversion warnings in 32-bit mode

---
 src/arena.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 3373e1d8..a9566af1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1268,7 +1268,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
 		sum += arena->decay_backlog[i] * h_steps[i];
-	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
 }
@@ -1276,7 +1276,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 static void
 arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 {
-	uint64_t nadvance;
+	uint64_t nadvance_u64;
 	nstime_t delta;
 	size_t ndirty_delta;
 
@@ -1285,27 +1285,31 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 
 	nstime_copy(&delta, time);
 	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance = nstime_divide(&delta, &arena->decay_interval);
-	assert(nadvance > 0);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	assert(nadvance_u64 > 0);
 
-	/* Add nadvance decay intervals to epoch. */
+	/* Add nadvance_u64 decay intervals to epoch. */
 	nstime_copy(&delta, &arena->decay_interval);
-	nstime_imultiply(&delta, nadvance);
+	nstime_imultiply(&delta, nadvance_u64);
 	nstime_add(&arena->decay_epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance >= SMOOTHSTEP_NSTEPS) {
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
-		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
-		if (nadvance > 1) {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
 			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance], 0, (nadvance-1) * sizeof(size_t));
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
 	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -

From bab58ef401b0dec8230bd2d371e135009cd06924 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 12 Apr 2016 12:39:02 -0700
Subject: [PATCH 40/82] Fix more 64-to-32 conversion warnings.

---
 test/unit/stats.c | 10 +++++-----
 test/unit/util.c  | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/unit/stats.c b/test/unit/stats.c
index 6e803160..a9a3981f 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -220,11 +220,11 @@ TEST_BEGIN(test_stats_arenas_large)
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_zu_gt(nrequests, 0,
+		assert_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
@@ -262,9 +262,9 @@ TEST_BEGIN(test_stats_arenas_huge)
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
diff --git a/test/unit/util.c b/test/unit/util.c
index d24c1c79..c4333d53 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -4,27 +4,27 @@
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i,	\
-		    "Unexpected result");				\
+		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_zu_eq(pow2_ceil_##suf(x),		\
+			assert_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\

From 8413463f3a334f14c55589e57d3e82dd594ef479 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 12 Apr 2016 23:18:25 -0700
Subject: [PATCH 41/82] Fix a style nit.

---
 include/jemalloc/internal/ph.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 70b6e2cd..4f91c333 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -211,7 +211,8 @@ a_prefix##new(a_ph_type *ph)						\
 	memset(ph, 0, sizeof(ph(a_type)));				\
 }									\
 a_attr bool								\
-a_prefix##empty(a_ph_type *ph) {					\
+a_prefix##empty(a_ph_type *ph)						\
+{									\
 									\
 	return (ph->ph_root == NULL);					\
 }									\

From b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 13 Apr 2016 23:36:15 -0700
Subject: [PATCH 42/82] Add witness, a simple online locking validator.

This resolves #358.
---
 Makefile.in                                   |   4 +-
 include/jemalloc/internal/arena.h             | 107 ++--
 include/jemalloc/internal/base.h              |  11 +-
 include/jemalloc/internal/chunk.h             |  38 +-
 include/jemalloc/internal/chunk_dss.h         |  16 +-
 include/jemalloc/internal/ctl.h               |  24 +-
 include/jemalloc/internal/huge.h              |  10 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  36 +-
 include/jemalloc/internal/mb.h                |   6 +-
 include/jemalloc/internal/mutex.h             |  56 +-
 include/jemalloc/internal/private_symbols.txt |  15 +-
 include/jemalloc/internal/prof.h              |  78 +--
 include/jemalloc/internal/tcache.h            |  22 +-
 include/jemalloc/internal/tsd.h               |   6 +-
 include/jemalloc/internal/valgrind.h          |  12 +-
 include/jemalloc/internal/witness.h           | 103 ++++
 src/arena.c                                   | 568 +++++++++---------
 src/base.c                                    |  26 +-
 src/chunk.c                                   | 186 +++---
 src/chunk_dss.c                               |  46 +-
 src/ctl.c                                     | 427 +++++++------
 src/huge.c                                    | 106 ++--
 src/jemalloc.c                                | 385 +++++++-----
 src/mutex.c                                   |  21 +-
 src/prof.c                                    | 497 ++++++++-------
 src/quarantine.c                              |   4 +-
 src/tcache.c                                  |  91 +--
 src/tsd.c                                     |  20 +-
 src/witness.c                                 | 206 +++++++
 src/zone.c                                    |   8 +-
 test/unit/junk.c                              |   4 +-
 test/unit/prof_reset.c                        |   3 +-
 test/unit/witness.c                           | 278 +++++++++
 33 files changed, 2118 insertions(+), 1302 deletions(-)
 create mode 100644 include/jemalloc/internal/witness.h
 create mode 100644 src/witness.c
 create mode 100644 test/unit/witness.c

diff --git a/Makefile.in b/Makefile.in
index 480ce1a1..a872eb5f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,7 +103,8 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c
+	$(srcroot)src/util.c \
+	$(srcroot)src/witness.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -169,6 +170,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
+	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 6f0fa76a..2130e9a0 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -506,23 +506,25 @@ void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(arena_t *arena);
-void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero);
-void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
-void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
+extent_node_t	*arena_node_alloc(tsd_t *tsd, arena_t *arena);
+void	arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero);
+void	arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t usize);
+void	arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
+void	arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
+bool	arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
-bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
-void	arena_maybe_purge(arena_t *arena);
-void	arena_purge(arena_t *arena, bool all);
+ssize_t	arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena,
+    ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
+bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
+void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
@@ -542,11 +544,11 @@ void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm);
+void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
@@ -555,8 +557,8 @@ extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
+void	arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr);
 void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
@@ -567,27 +569,28 @@ bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsd_t *tsd, arena_t *arena);
+bool	arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats);
 unsigned	arena_nthreads_get(arena_t *arena);
 void	arena_nthreads_inc(arena_t *arena);
 void	arena_nthreads_dec(arena_t *arena);
-arena_t	*arena_new(unsigned ind);
+arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork(arena_t *arena);
-void	arena_postfork_parent(arena_t *arena);
-void	arena_postfork_child(arena_t *arena);
+void	arena_prefork(tsd_t *tsd, arena_t *arena);
+void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
+void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -644,21 +647,22 @@ void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
 void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(const void *ptr, bool demote);
+size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
 void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #endif
@@ -1035,7 +1039,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
@@ -1046,9 +1050,9 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 	{
 		bool ret;
 
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (ret);
 	}
 }
@@ -1184,7 +1188,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(const void *ptr)
+arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
@@ -1205,13 +1209,14 @@ arena_prof_tctx_get(const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(ptr);
+		ret = huge_prof_tctx_get(tsd, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -1242,12 +1247,12 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(ptr, tctx);
+		huge_prof_tctx_set(tsd, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
@@ -1270,7 +1275,7 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(ptr);
+			huge_prof_tctx_reset(tsd, ptr);
 	}
 }
 
@@ -1285,7 +1290,7 @@ arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(arena, false);
+		arena_purge(tsd, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1332,7 +1337,7 @@ arena_aalloc(const void *ptr)
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
+arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
@@ -1375,7 +1380,7 @@ arena_salloc(const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(ptr);
+		ret = huge_salloc(tsd, ptr);
 
 	return (ret);
 }
@@ -1445,7 +1450,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsd, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 39e46ee4..075a2a20 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -9,12 +9,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(size_t size);
-void	base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
+void	*base_alloc(tsd_t *tsd, size_t size);
+void	base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident,
+    size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(void);
-void	base_postfork_parent(void);
-void	base_postfork_child(void);
+void	base_prefork(tsd_t *tsd);
+void	base_postfork_parent(tsd_t *tsd);
+void	base_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index d800478d..6c3ad9bf 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -48,28 +48,32 @@ extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsd_t *tsd, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsd_t *tsd, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsd_t *tsd, const void *chunk,
+    const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_node);
-void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
-void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
-bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length);
+void	*chunk_alloc_cache(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool dalloc_node);
+void	*chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+void	chunk_dalloc_cache(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+void	chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    bool committed);
+bool	chunk_purge_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(void);
-void	chunk_postfork_parent(void);
-void	chunk_postfork_child(void);
+void	chunk_prefork(tsd_t *tsd);
+void	chunk_postfork_parent(tsd_t *tsd);
+void	chunk_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 388f46be..7f3a09c7 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,15 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(void);
-bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(void *chunk);
+dss_prec_t	chunk_dss_prec_get(tsd_t *tsd);
+bool	chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec);
+void	*chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool	chunk_in_dss(tsd_t *tsd, void *chunk);
 bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(void);
-void	chunk_dss_postfork_parent(void);
-void	chunk_dss_postfork_child(void);
+void	chunk_dss_prefork(tsd_t *tsd);
+void	chunk_dss_postfork_parent(tsd_t *tsd);
+void	chunk_dss_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 9c5e9328..ec856996 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -21,13 +21,14 @@ struct ctl_named_node_s {
 	/* If (nchildren == 0), this is a terminal node. */
 	unsigned		nchildren;
 	const			ctl_node_t *children;
-	int			(*ctl)(const size_t *, size_t, void *, size_t *,
-	    void *, size_t);
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
 };
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+	const ctl_named_node_t	*(*index)(tsd_t *, const size_t *, size_t,
+	    size_t);
 };
 
 struct ctl_arena_stats_s {
@@ -68,16 +69,17 @@ struct ctl_stats_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
-
-int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
+int	ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp,
+    size_t *miblenp);
+
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(void);
-void	ctl_postfork_parent(void);
-void	ctl_postfork_child(void);
+void	ctl_prefork(tsd_t *tsd);
+void	ctl_postfork_parent(tsd_t *tsd);
+void	ctl_postfork_child(tsd_t *tsd);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index cb6f69e6..f19d3368 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -18,15 +18,15 @@ bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(void *, size_t);
+typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
-void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(const void *ptr);
+size_t	huge_salloc(tsd_t *tsd, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsd_t *tsd, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0b57b82a..ddceabca 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -368,6 +368,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
@@ -399,6 +400,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -465,7 +467,7 @@ void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(unsigned ind);
+arena_t	*arena_init(tsd_t *tsd, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
@@ -490,6 +492,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -521,6 +524,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
@@ -545,7 +549,7 @@ size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -819,7 +823,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(unsigned ind, bool init_if_missing)
+arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
@@ -829,7 +833,7 @@ arena_get(unsigned ind, bool init_if_missing)
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(ind);
+			ret = arena_init(tsd, ind);
 	}
 	return (ret);
 }
@@ -863,7 +867,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(const void *ptr, bool demote);
+size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
 void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
@@ -877,9 +881,9 @@ void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(const void *ptr, bool demote);
+size_t	ivsalloc(tsd_t *tsd, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(const void *ptr);
+size_t	p2rz(tsd_t *tsd, const void *ptr);
 void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
@@ -914,14 +918,14 @@ iaalloc(const void *ptr)
  *   size_t sz = isalloc(ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+isalloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(ptr, demote));
+	return (arena_salloc(tsd, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -934,7 +938,7 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
 		    config_prof));
 	}
 	return (ret);
@@ -982,7 +986,7 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
 		    config_prof));
 	}
 	return (ret);
@@ -1005,7 +1009,7 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
+ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
@@ -1017,7 +1021,7 @@ ivsalloc(const void *ptr, bool demote)
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(ptr, demote));
+	return (isalloc(tsd, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
@@ -1035,9 +1039,9 @@ u2rz(size_t usize)
 }
 
 JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
+p2rz(tsd_t *tsd, const void *ptr)
 {
-	size_t usize = isalloc(ptr, false);
+	size_t usize = isalloc(tsd, ptr, false);
 
 	return (u2rz(usize));
 }
@@ -1049,7 +1053,7 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 
 	assert(ptr != NULL);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
 		    config_prof));
 	}
 
diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 3cfa7872..de54f508 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -104,9 +104,9 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx);
-	malloc_mutex_lock(&mtx);
-	malloc_mutex_unlock(&mtx);
+	malloc_mutex_init(&mtx, MALLOC_MUTEX_RANK_OMIT);
+	malloc_mutex_lock(NULL, &mtx);
+	malloc_mutex_unlock(NULL, &mtx);
 }
 #endif
 #endif
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index f051f291..7d19a0f4 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,17 +6,21 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0}
+#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
@@ -39,6 +43,7 @@ struct malloc_mutex_s {
 #else
 	pthread_mutex_t		lock;
 #endif
+	witness_t		witness;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -52,27 +57,31 @@ extern bool isthreaded;
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_prefork(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool	mutex_boot(void);
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
+malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_not_owner(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -84,14 +93,19 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
+		if (config_debug)
+			witness_lock(tsd, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_owner(tsd, &mutex->witness);
+		if (config_debug)
+			witness_unlock(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -105,6 +119,22 @@ malloc_mutex_unlock(malloc_mutex_t *mutex)
 #endif
 	}
 }
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+{
+
+	if (config_debug)
+		witness_assert_owner(tsd, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+{
+
+	if (config_debug)
+		witness_assert_not_owner(tsd, &mutex->witness);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 551cb937..be5d30e7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -314,6 +314,9 @@ lg_floor
 malloc_cprintf
 malloc_mutex_init
 malloc_mutex_lock
+malloc_mutex_assert_not_owner
+malloc_mutex_assert_owner
+malloc_mutex_boot
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
@@ -333,7 +336,6 @@ malloc_write
 map_bias
 map_misc_offset
 mb_write
-mutex_boot
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
@@ -548,3 +550,14 @@ valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
+witness_assert_lockless
+witness_assert_not_owner
+witness_assert_owner
+witness_init
+witness_lock
+witness_lock_error
+witness_lockless_error
+witness_not_owner_error
+witness_owner_error
+witness_unlock
+witnesses_cleanup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index a25502a9..047bd0b7 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,7 +281,7 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
@@ -293,32 +293,32 @@ size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsd_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(void);
-bool	prof_mdump(const char *filename);
-void	prof_gdump(void);
+void	prof_idump(tsd_t *tsd);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
-const char	*prof_thread_name_get(void);
-bool	prof_active_get(void);
-bool	prof_active_set(bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
+bool	prof_active_get(tsd_t *tsd);
+bool	prof_active_set(tsd_t *tsd, bool active);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(void);
-bool	prof_thread_active_set(bool active);
-bool	prof_thread_active_init_get(void);
-bool	prof_thread_active_init_set(bool active_init);
-bool	prof_gdump_get(void);
-bool	prof_gdump_set(bool active);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsd_t *tsd);
+bool	prof_thread_active_init_set(tsd_t *tsd, bool active_init);
+bool	prof_gdump_get(tsd_t *tsd);
+bool	prof_gdump_set(tsd_t *tsd, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(void);
-void	prof_prefork(void);
-void	prof_postfork_parent(void);
-void	prof_postfork_child(void);
+bool	prof_boot2(tsd_t *tsd);
+void	prof_prefork(tsd_t *tsd);
+void	prof_postfork_parent(tsd_t *tsd);
+void	prof_postfork_child(tsd_t *tsd);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -329,17 +329,17 @@ void	prof_sample_threshold_update(prof_tdata_t *tdata);
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+void	prof_malloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
@@ -397,34 +397,34 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(const void *ptr)
+prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(ptr));
+	return (arena_prof_tctx_get(tsd, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, usize, tctx);
+	arena_prof_tctx_set(tsd, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -479,17 +479,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsd, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -503,7 +503,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(ptr, true));
+		assert(usize == isalloc(tsd, ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -520,9 +520,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	else
-		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
@@ -531,10 +531,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd, ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 1edd39fd..1aa64631 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -130,7 +130,7 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(const void *ptr);
+size_t	tcache_salloc(tsd_t *tsd, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
@@ -138,19 +138,19 @@ void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena);
-void	tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
+void	tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
+void	tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
 tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+void	tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(void);
+bool	tcache_boot(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -310,7 +310,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(ret) == usize);
+		assert(tcache_salloc(tsd, ret) == usize);
 	}
 
 	if (likely(!zero)) {
@@ -407,7 +407,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd, ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
@@ -434,8 +434,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd, ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd, ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 16cc2f17..b23b3b4c 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -542,6 +542,7 @@ struct tsd_init_head_s {
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
+    O(witnesses,		witness_list_t)				\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -554,7 +555,8 @@ struct tsd_init_head_s {
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL								\
+    NULL,								\
+    ql_head_initializer(witnesses)					\
 }
 
 struct tsd_s {
@@ -577,7 +579,7 @@ void	*malloc_tsd_malloc(size_t size);
 void	malloc_tsd_dalloc(void *wrapper);
 void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
-bool	malloc_tsd_boot0(void);
+tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index a3380df9..7c6a62fa 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -30,15 +30,17 @@
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
-	if (unlikely(in_valgrind && cond))				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {	\
+	if (unlikely(in_valgrind && cond)) {				\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsd, ptr),	\
+		    zero);						\
+	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(ptr);				\
+		size_t rzsize = p2rz(tsd, ptr);				\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
new file mode 100644
index 00000000..22f0b2c7
--- /dev/null
+++ b/include/jemalloc/internal/witness.h
@@ -0,0 +1,103 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, const witness_t *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+
+#define	WITNESS_RANK_BASE		11U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#endif
+void	witness_lock(tsd_t *tsd, witness_t *witness);
+void	witness_unlock(tsd_t *tsd, witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#endif
+void	witness_assert_owner(tsd_t *tsd, const witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#endif
+void	witness_assert_not_owner(tsd_t *tsd, const witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#endif
+void	witness_assert_lockless(tsd_t *tsd);
+
+void	witnesses_cleanup(tsd_t *tsd);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index a9566af1..cc648e31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,11 +37,12 @@ static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
  * definition.
  */
 
-static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
-static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
-    bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_purge_to_limit(tsd_t *tsd, arena_t *arena,
+    size_t ndirty_limit);
+static void	arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run,
+    bool dirty, bool cleaned, bool decommitted);
+static void	arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
@@ -591,7 +592,8 @@ arena_chunk_init_spare(arena_t *arena)
 }
 
 static bool
-arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
+arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    bool zero)
 {
 
 	/*
@@ -602,62 +604,62 @@ arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(chunk, &chunk->node));
+	return (chunk_register(tsd, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool *zero, bool *commit)
+arena_chunk_alloc_internal_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL,
-	    chunksize, chunksize, zero, commit);
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsd, arena, chunk_hooks,
+	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-			    chunksize, *zero, *commit);
+			chunk_dalloc_wrapper(tsd, arena, chunk_hooks,
+			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsd, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize,
+	chunk = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, zero, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(arena, chunk, *zero)) {
-			chunk_dalloc_cache(arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsd, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
 		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks,
-		    zero, commit);
+		chunk = arena_chunk_alloc_internal_hard(tsd, arena,
+		    &chunk_hooks, zero, commit);
 	}
 
 	if (config_stats && chunk != NULL) {
@@ -669,7 +671,7 @@ arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
+arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
@@ -679,7 +681,7 @@ arena_chunk_init_hard(arena_t *arena)
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsd, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
@@ -724,14 +726,14 @@ arena_chunk_init_hard(arena_t *arena)
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(arena_t *arena)
+arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(arena);
+		chunk = arena_chunk_init_hard(tsd, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
@@ -742,7 +744,7 @@ arena_chunk_alloc(arena_t *arena)
 }
 
 static void
-arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 {
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
@@ -782,12 +784,12 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 			 * potential for causing later access of decommitted
 			 * memory.
 			 */
-			chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks = chunk_hooks_get(tsd, arena);
 			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
 
-		chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare,
+		chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)spare,
 		    chunksize, committed);
 
 		if (config_stats) {
@@ -868,63 +870,64 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
 }
 
 extent_node_t *
-arena_node_alloc(arena_t *arena)
+arena_node_alloc(tsd_t *tsd, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(&arena->node_cache_mtx);
-		return (base_alloc(sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+		return (base_alloc(tsd, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    size_t usize, size_t alignment, bool *zero, size_t csize)
+arena_chunk_alloc_huge_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
+    size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment,
-	    zero, &commit);
+	ret = chunk_alloc_wrapper(tsd, arena, chunk_hooks, NULL, csize,
+	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero)
+arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -933,61 +936,61 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
-	    zero, true);
-	malloc_mutex_unlock(&arena->lock);
+	ret = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, csize,
+	    alignment, zero, true);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
-		    alignment, zero, csize);
+		ret = arena_chunk_alloc_huge_hard(tsd, arena, &chunk_hooks,
+		    usize, alignment, zero, csize);
 	}
 
 	return (ret);
 }
 
 void
-arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(&arena->lock);
+	chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
@@ -1000,51 +1003,52 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
+		chunk_dalloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		    true);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk,
-    size_t udiff, size_t cdiff)
+arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
+    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, &commit) == NULL);
+	err = (chunk_alloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 	return (err);
 }
 
 bool
-arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize, bool *zero)
+arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -1053,17 +1057,17 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, true) == NULL);
-	malloc_mutex_unlock(&arena->lock);
+	err = (chunk_alloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, true) == NULL);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
-		    chunk, oldsize, usize, zero, nchunk, udiff,
+		err = arena_chunk_ralloc_huge_expand_hard(tsd, arena,
+		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 
@@ -1103,7 +1107,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1119,7 +1123,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsd, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
@@ -1147,7 +1151,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1164,7 +1168,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsd, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
@@ -1189,28 +1193,28 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 }
 
 ssize_t
-arena_lg_dirty_mult_get(arena_t *arena)
+arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (false);
 }
@@ -1367,25 +1371,25 @@ arena_decay_time_valid(ssize_t decay_time)
 }
 
 ssize_t
-arena_decay_time_get(arena_t *arena)
+arena_decay_time_get(tsd_t *tsd, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	decay_time = arena->decay_time;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -1395,14 +1399,14 @@ arena_decay_time_set(arena_t *arena, ssize_t decay_time)
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(arena_t *arena)
+arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
@@ -1425,12 +1429,12 @@ arena_maybe_purge_ratio(arena_t *arena)
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(arena, threshold);
+		arena_purge_to_limit(tsd, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(arena_t *arena)
+arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 {
 	nstime_t time;
 	size_t ndirty_limit;
@@ -1440,7 +1444,7 @@ arena_maybe_purge_decay(arena_t *arena)
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay_time <= 0) {
 		if (arena->decay_time == 0)
-			arena_purge_to_limit(arena, 0);
+			arena_purge_to_limit(tsd, arena, 0);
 		return;
 	}
 
@@ -1461,11 +1465,11 @@ arena_maybe_purge_decay(arena_t *arena)
 	 */
 	if (arena->ndirty <= ndirty_limit)
 		return;
-	arena_purge_to_limit(arena, ndirty_limit);
+	arena_purge_to_limit(tsd, arena, ndirty_limit);
 }
 
 void
-arena_maybe_purge(arena_t *arena)
+arena_maybe_purge(tsd_t *tsd, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
@@ -1473,9 +1477,9 @@ arena_maybe_purge(arena_t *arena)
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(arena);
+		arena_maybe_purge_ratio(tsd, arena);
 	else
-		arena_maybe_purge_decay(arena);
+		arena_maybe_purge_decay(tsd, arena);
 }
 
 static size_t
@@ -1513,7 +1517,7 @@ arena_dirty_count(arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1544,7 +1548,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(arena, chunk_hooks,
+			chunk = chunk_alloc_cache(tsd, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
@@ -1579,7 +1583,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(arena);
+				arena_chunk_alloc(tsd, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
@@ -1603,7 +1607,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static size_t
-arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1615,7 +1619,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
@@ -1654,7 +1658,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(arena,
+				flag_unzeroed = chunk_purge_wrapper(tsd, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
@@ -1685,7 +1689,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1696,7 +1700,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1716,10 +1720,10 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(arena, chunkselm);
+			arena_node_dalloc(tsd, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
-			    zeroed, committed);
+			chunk_dalloc_wrapper(tsd, arena, chunk_hooks, addr,
+			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
@@ -1730,7 +1734,8 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(arena, run, false, true, decommitted);
+			arena_run_dalloc(tsd, arena, run, false, true,
+			    decommitted);
 		}
 	}
 }
@@ -1745,9 +1750,9 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
@@ -1768,14 +1773,14 @@ arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsd, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
+	npurged = arena_purge_stashed(tsd, arena, &chunk_hooks,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsd, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
@@ -1786,15 +1791,15 @@ label_return:
 }
 
 void
-arena_purge(arena_t *arena, bool all)
+arena_purge(tsd_t *tsd, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (all)
-		arena_purge_to_limit(arena, 0);
+		arena_purge_to_limit(tsd, arena, 0);
 	else
-		arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+		arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static void
@@ -1911,8 +1916,8 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
-    bool decommitted)
+arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
+    bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -1972,7 +1977,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(arena, chunk);
+		arena_chunk_dalloc(tsd, arena, chunk);
 	}
 
 	/*
@@ -1983,12 +1988,12 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(arena);
+		arena_maybe_purge(tsd, arena);
 }
 
 static void
-arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize)
+arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2023,12 +2028,13 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0));
+	arena_run_dalloc(tsd, arena, run, false, false, (flag_decommitted !=
+	    0));
 }
 
 static void
-arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize, bool dirty)
+arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2067,8 +2073,8 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 
 	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
-	    0));
+	arena_run_dalloc(tsd, arena, tail_run, dirty, false, (flag_decommitted
+	    != 0));
 }
 
 static void
@@ -2094,7 +2100,7 @@ arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
@@ -2110,19 +2116,19 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_small(arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsd, &arena->lock);
+	run = arena_run_alloc_small(tsd, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
@@ -2145,7 +2151,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
@@ -2154,7 +2160,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(arena, bin);
+	run = arena_bin_nonfull_run_get(tsd, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
@@ -2175,9 +2181,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 			 * were just deallocated from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
-				arena_dalloc_bin_run(arena, chunk, run, bin);
-			else
+			if (run->nfree == bin_info->nregs) {
+				arena_dalloc_bin_run(tsd, arena, chunk, run,
+				    bin);
+			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
 		}
 		return (ret);
@@ -2202,10 +2209,10 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
-		prof_idump();
+	if (config_prof && arena_prof_accum(tsd, arena, prof_accumbytes))
+		prof_idump(tsd);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
@@ -2213,7 +2220,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(arena, bin);
+			ptr = arena_bin_malloc_hard(tsd, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2240,7 +2247,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	tbin->ncached = i;
 	arena_decay_tick(tsd, arena);
 }
@@ -2365,14 +2372,14 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(arena, bin);
+		ret = arena_bin_malloc_hard(tsd, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		return (NULL);
 	}
 
@@ -2381,9 +2388,9 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
-		prof_idump();
+	malloc_mutex_unlock(tsd, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsd, arena, usize))
+		prof_idump(tsd);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2419,7 +2426,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2432,9 +2439,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsd, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
@@ -2452,9 +2459,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (config_prof && idump)
-		prof_idump();
+		prof_idump(tsd);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2506,10 +2513,10 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	alignment = PAGE_CEILING(alignment);
 	alloc_size = usize + large_pad + alignment;
 
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_large(arena, alloc_size, false);
+	malloc_mutex_lock(tsd, &arena->lock);
+	run = arena_run_alloc_large(tsd, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -2529,11 +2536,11 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsd, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsd, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
@@ -2544,8 +2551,8 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(&arena->lock);
+		arena_run_dalloc(tsd, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
@@ -2560,7 +2567,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
@@ -2609,7 +2616,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promoted(const void *ptr, size_t size)
+arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2618,8 +2625,8 @@ arena_prof_promoted(const void *ptr, size_t size)
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -2628,8 +2635,8 @@ arena_prof_promoted(const void *ptr, size_t size)
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == size);
+	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, true) == size);
 }
 
 static void
@@ -2660,19 +2667,19 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
 
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	arena_run_dalloc(arena, run, true, false, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_run_dalloc(tsd, arena, run, true, false, false);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
@@ -2699,8 +2706,8 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm, bool junked)
+arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
@@ -2721,7 +2728,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsd, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2732,15 +2739,15 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 }
 
 void
-arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
@@ -2750,9 +2757,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
 	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(&bin->lock);
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsd, &bin->lock);
 }
 
 void
@@ -2767,7 +2774,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
+	arena_dalloc_bin(tsd, arena, chunk, ptr, pageind, bitselm);
 	arena_decay_tick(tsd, arena);
 }
 
@@ -2790,7 +2797,7 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 #endif
 
 static void
-arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -2814,30 +2821,30 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
 		}
 	}
 
-	arena_run_dalloc(arena, run, true, false, false);
+	arena_run_dalloc(tsd, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr)
+arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, true);
 }
 
 void
 arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
-	malloc_mutex_lock(&arena->lock);
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	arena_decay_tick(tsd, arena);
 }
 
 static void
-arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t size)
+arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2850,8 +2857,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(&arena->lock);
-	arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_run_trim_tail(tsd, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
@@ -2869,12 +2876,12 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
+arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = (oldsize + large_pad) >> LG_PAGE;
@@ -2884,7 +2891,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
@@ -2964,11 +2971,11 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (true);
 }
 
@@ -2997,7 +3004,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
@@ -3012,16 +3019,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
-		    usize_min, usize_max, zero);
+		bool ret = arena_ralloc_large_grow(tsd, arena, chunk, ptr,
+		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
 				memset((void *)((uintptr_t)ptr + oldsize),
 				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsd, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsd, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
@@ -3030,7 +3037,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsd, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
@@ -3065,7 +3072,7 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsd, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
@@ -3138,25 +3145,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 }
 
 dss_prec_t
-arena_dss_prec_get(arena_t *arena)
+arena_dss_prec_get(tsd_t *tsd, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (false);
 }
 
@@ -3213,28 +3220,29 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty)
+arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
+arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
@@ -3264,12 +3272,12 @@ arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
@@ -3281,7 +3289,7 @@ arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 	}
 }
 
@@ -3307,7 +3315,7 @@ arena_nthreads_dec(arena_t *arena)
 }
 
 arena_t *
-arena_new(unsigned ind)
+arena_new(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 	size_t arena_size;
@@ -3322,17 +3330,17 @@ arena_new(unsigned ind)
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
-		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+		arena = (arena_t *)base_alloc(tsd, CACHELINE_CEILING(arena_size)
+		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
 		    nhclasses) * sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(arena_size);
+		arena = (arena_t *)base_alloc(tsd, arena_size);
 	if (arena == NULL)
 		return (NULL);
 
 	arena->ind = ind;
 	arena->nthreads = 0;
-	if (malloc_mutex_init(&arena->lock))
+	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
 	if (config_stats) {
@@ -3365,7 +3373,7 @@ arena_new(unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get();
+	arena->dss_prec = chunk_dss_prec_get(tsd);
 
 	arena->spare = NULL;
 
@@ -3383,17 +3391,20 @@ arena_new(unsigned ind)
 		arena_decay_init(arena, arena_decay_time_default_get());
 
 	ql_new(&arena->huge);
-	if (malloc_mutex_init(&arena->huge_mtx))
+	if (malloc_mutex_init(&arena->huge_mtx, "arena_huge",
+	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
-	if (malloc_mutex_init(&arena->chunks_mtx))
+	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
+	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
 	ql_new(&arena->node_cache);
-	if (malloc_mutex_init(&arena->node_cache_mtx))
+	if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache",
+	    WITNESS_RANK_ARENA_NODE_CACHE))
 		return (NULL);
 
 	arena->chunk_hooks = chunk_hooks_default;
@@ -3401,7 +3412,8 @@ arena_new(unsigned ind)
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
 		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
+		if (malloc_mutex_init(&bin->lock, "arena_bin",
+		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);
 		bin->runcur = NULL;
 		arena_run_heap_new(&bin->runs);
@@ -3533,7 +3545,7 @@ small_run_size_init(void)
 
 	assert(small_maxrun != 0);
 
-	small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >>
+	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
 	    LG_PAGE));
 	if (small_run_tab == NULL)
 		return (true);
@@ -3560,12 +3572,12 @@ run_quantize_init(void)
 
 	run_quantize_max = chunksize + large_pad;
 
-	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_floor_tab == NULL)
 		return (true);
 
-	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_ceil_tab == NULL)
 		return (true);
@@ -3642,40 +3654,40 @@ arena_boot(void)
 }
 
 void
-arena_prefork(arena_t *arena)
+arena_prefork(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_prefork(&arena->lock);
-	malloc_mutex_prefork(&arena->huge_mtx);
-	malloc_mutex_prefork(&arena->chunks_mtx);
-	malloc_mutex_prefork(&arena->node_cache_mtx);
+	malloc_mutex_prefork(tsd, &arena->lock);
+	malloc_mutex_prefork(tsd, &arena->huge_mtx);
+	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(&arena->bins[i].lock);
+		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
 }
 
 void
-arena_postfork_parent(arena_t *arena)
+arena_postfork_parent(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(&arena->bins[i].lock);
-	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(&arena->chunks_mtx);
-	malloc_mutex_postfork_parent(&arena->huge_mtx);
-	malloc_mutex_postfork_parent(&arena->lock);
+		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->lock);
 }
 
 void
-arena_postfork_child(arena_t *arena)
+arena_postfork_child(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(&arena->bins[i].lock);
-	malloc_mutex_postfork_child(&arena->node_cache_mtx);
-	malloc_mutex_postfork_child(&arena->chunks_mtx);
-	malloc_mutex_postfork_child(&arena->huge_mtx);
-	malloc_mutex_postfork_child(&arena->lock);
+		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 7cdcfed8..87b376b8 100644
--- a/src/base.c
+++ b/src/base.c
@@ -76,7 +76,7 @@ base_chunk_alloc(size_t minsize)
  * physical memory usage.
  */
 void *
-base_alloc(size_t size)
+base_alloc(tsd_t *tsd, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
@@ -91,7 +91,7 @@ base_alloc(size_t size)
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsd, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
@@ -123,28 +123,28 @@ base_alloc(size_t size)
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsd, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident, size_t *mapped)
 {
 
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsd, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsd, &base_mtx);
 }
 
 bool
 base_boot(void)
 {
 
-	if (malloc_mutex_init(&base_mtx))
+	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
 	extent_tree_szad_new(&base_avail_szad);
 	base_nodes = NULL;
@@ -153,22 +153,22 @@ base_boot(void)
 }
 
 void
-base_prefork(void)
+base_prefork(tsd_t *tsd)
 {
 
-	malloc_mutex_prefork(&base_mtx);
+	malloc_mutex_prefork(tsd, &base_mtx);
 }
 
 void
-base_postfork_parent(void)
+base_postfork_parent(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_parent(&base_mtx);
+	malloc_mutex_postfork_parent(tsd, &base_mtx);
 }
 
 void
-base_postfork_child(void)
+base_postfork_child(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_child(&base_mtx);
+	malloc_mutex_postfork_child(tsd, &base_mtx);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 304d4e5a..0ee2a1a7 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -49,9 +49,10 @@ const chunk_hooks_t	chunk_hooks_default = {
  * definition.
  */
 
-static void	chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed);
+static void	chunk_record(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
+    bool committed);
 
 /******************************************************************************/
 
@@ -63,23 +64,23 @@ chunk_hooks_get_locked(arena_t *arena)
 }
 
 chunk_hooks_t
-chunk_hooks_get(arena_t *arena)
+chunk_hooks_get(tsd_t *tsd, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -104,14 +105,14 @@ chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool locked)
+chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
 	    CHUNK_HOOKS_INITIALIZER;
@@ -119,27 +120,28 @@ chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(arena);
+		    chunk_hooks_get(tsd, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsd_t *tsd, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks)
+chunk_hooks_assure_initialized(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(const void *chunk, const extent_node_t *node)
+chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -159,7 +161,7 @@ chunk_register(const void *chunk, const extent_node_t *node)
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump();
+			prof_gdump(tsd);
 	}
 
 	return (false);
@@ -197,7 +199,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 }
 
 static void *
-chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
@@ -219,8 +221,8 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
@@ -232,7 +234,7 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
@@ -251,7 +253,7 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
@@ -271,20 +273,21 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(arena, node);
-			malloc_mutex_unlock(&arena->chunks_mtx);
-			chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad,
-			    cache, ret, size + trailsize, zeroed, committed);
+				arena_node_dalloc(tsd, arena, node);
+			malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+			chunk_record(tsd, arena, chunk_hooks, chunks_szad,
+			    chunks_ad, cache, ret, size + trailsize, zeroed,
+			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(arena);
+			node = arena_node_alloc(tsd, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(&arena->chunks_mtx);
-				chunk_record(arena, chunk_hooks, chunks_szad,
-				    chunks_ad, cache, ret, size + trailsize,
-				    zeroed, committed);
+				malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+				chunk_record(tsd, arena, chunk_hooks,
+				    chunks_szad, chunks_ad, cache, ret, size +
+				    trailsize, zeroed, committed);
 				return (NULL);
 			}
 		}
@@ -296,16 +299,16 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
-		chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache,
-		    ret, size, zeroed, committed);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		chunk_record(tsd, arena, chunk_hooks, chunks_szad, chunks_ad,
+		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(arena, node);
+		arena_node_dalloc(tsd, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -328,8 +331,8 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, dss_prec_t dss_prec)
+chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
 
@@ -340,8 +343,8 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
 	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
@@ -349,8 +352,8 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 
 	/* All strategies for allocation failed. */
@@ -380,8 +383,8 @@ chunk_alloc_base(size_t size)
 }
 
 void *
-chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool dalloc_node)
+chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
 {
 	void *ret;
 	bool commit;
@@ -392,7 +395,7 @@ chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached,
+	ret = chunk_recycle(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
 	    &commit, dalloc_node);
 	if (ret == NULL)
@@ -404,11 +407,11 @@ chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 }
 
 static arena_t *
-chunk_arena_get(unsigned arena_ind)
+chunk_arena_get(tsd_t *tsd, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
@@ -422,11 +425,13 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
+	tsd_t *tsd;
 	arena_t *arena;
 
-	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
-	    arena->dss_prec);
+	tsd = tsd_fetch();
+	arena = chunk_arena_get(tsd, arena_ind);
+	ret = chunk_alloc_core(tsd, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -436,8 +441,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 static void *
-chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 
 	assert(size != 0);
@@ -445,20 +450,20 @@ chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
-	    commit, true));
+	return (chunk_recycle(tsd, arena, chunk_hooks,
+	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
+	    new_addr, size, alignment, zero, commit, true));
 }
 
 void *
-chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsd, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
 		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
@@ -473,7 +478,7 @@ chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 }
 
 static void
-chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
@@ -485,8 +490,8 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
@@ -511,7 +516,7 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(arena);
+		node = arena_node_alloc(tsd, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
@@ -520,8 +525,8 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(arena, chunk_hooks, chunk,
-				    size, 0, size);
+				chunk_purge_wrapper(tsd, arena, chunk_hooks,
+				    chunk, size, 0, size);
 			}
 			goto label_return;
 		}
@@ -557,16 +562,16 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(arena, prev);
+		arena_node_dalloc(tsd, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
+chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -574,9 +579,9 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(arena);
+	arena_maybe_purge(tsd, arena);
 }
 
 static bool
@@ -584,14 +589,14 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(chunk))
+	if (!have_dss || !chunk_in_dss(tsd_fetch(), chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
 
 void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool zeroed, bool committed)
+chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool zeroed, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -599,7 +604,7 @@ chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 	/* Try to deallocate. */
 	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
 		return;
@@ -610,7 +615,7 @@ chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 }
 
@@ -648,11 +653,11 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, size_t offset, size_t length)
+chunk_purge_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
@@ -673,8 +678,11 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b))
-		return (true);
+	if (have_dss) {
+		tsd_t *tsd = tsd_fetch();
+		if (chunk_in_dss(tsd, chunk_a) != chunk_in_dss(tsd, chunk_b))
+			return (true);
+	}
 
 	return (false);
 }
@@ -683,7 +691,7 @@ static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(nelms *
+	return ((rtree_node_elm_t *)base_alloc(tsd_fetch(), nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
@@ -730,22 +738,22 @@ chunk_boot(void)
 }
 
 void
-chunk_prefork(void)
+chunk_prefork(tsd_t *tsd)
 {
 
-	chunk_dss_prefork();
+	chunk_dss_prefork(tsd);
 }
 
 void
-chunk_postfork_parent(void)
+chunk_postfork_parent(tsd_t *tsd)
 {
 
-	chunk_dss_postfork_parent();
+	chunk_dss_postfork_parent(tsd);
 }
 
 void
-chunk_postfork_child(void)
+chunk_postfork_child(tsd_t *tsd)
 {
 
-	chunk_dss_postfork_child();
+	chunk_dss_postfork_child(tsd);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 943d0e98..3b3f2433 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -41,33 +41,33 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(void)
+chunk_dss_prec_get(tsd_t *tsd)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	ret = dss_prec_default;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(dss_prec_t dss_prec)
+chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 	return (false);
 }
 
 void *
-chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit)
+chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
 	assert(size > 0 && (size & chunksize_mask) == 0);
@@ -80,7 +80,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	if (dss_prev != (void *)-1) {
 
 		/*
@@ -122,7 +122,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
 				/* Wrap-around. */
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsd, &dss_mtx);
 				return (NULL);
 			}
 			incr = gap_size + cpad_size + size;
@@ -130,11 +130,11 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsd, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(arena,
+					chunk_dalloc_wrapper(tsd, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
@@ -149,25 +149,25 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			}
 		} while (dss_prev != (void *)-1);
 	}
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 
 	return (NULL);
 }
 
 bool
-chunk_in_dss(void *chunk)
+chunk_in_dss(tsd_t *tsd, void *chunk)
 {
 	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 
 	return (ret);
 }
@@ -178,7 +178,7 @@ chunk_dss_boot(void)
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx))
+	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
 		return (true);
 	dss_base = chunk_dss_sbrk(0);
 	dss_prev = dss_base;
@@ -188,27 +188,27 @@ chunk_dss_boot(void)
 }
 
 void
-chunk_dss_prefork(void)
+chunk_dss_prefork(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_prefork(&dss_mtx);
+		malloc_mutex_prefork(tsd, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_parent(void)
+chunk_dss_postfork_parent(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_parent(&dss_mtx);
+		malloc_mutex_postfork_parent(tsd, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_child(void)
+chunk_dss_postfork_child(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_child(&dss_mtx);
+		malloc_mutex_postfork_child(tsd, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index 17bd0719..50faee7b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -42,25 +42,25 @@ ctl_indexed_node(const ctl_node_t *node)
 /* Function prototypes for non-inline static functions. */
 
 #define	CTL_PROTO(n)							\
-static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen);
+static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(const size_t *mib,		\
-    size_t miblen, size_t i);
+static const ctl_named_node_t	*n##_index(tsd_t *tsd,			\
+    const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(arena_t *arena, unsigned i);
-static bool	ctl_grow(void);
-static void	ctl_refresh(void);
-static bool	ctl_init(void);
-static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp);
+static void	ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsd_t *tsd);
+static void	ctl_refresh(tsd_t *tsd);
+static bool	ctl_init(tsd_t *tsd);
+static int	ctl_lookup(tsd_t *tsd, const char *name,
+    ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
@@ -117,7 +117,7 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(unsigned arena_ind, bool all);
+static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_dss)
@@ -554,12 +554,12 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsd, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
@@ -572,8 +572,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		arena_basic_stats_merge(tsd, arena, &cstats->nthreads,
+		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
 }
@@ -649,24 +649,24 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_refresh(arena_t *arena, unsigned i)
+ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(astats, arena);
+	ctl_arena_stats_amerge(tsd, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(void)
+ctl_grow(tsd_t *tsd)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(ctl_stats.narenas) == NULL)
+	if (arena_init(tsd, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -701,7 +701,7 @@ ctl_grow(void)
 }
 
 static void
-ctl_refresh(void)
+ctl_refresh(tsd_t *tsd)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -713,19 +713,20 @@ ctl_refresh(void)
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(i, false);
+		tarenas[i] = arena_get(tsd, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tarenas[i], i);
+			ctl_arena_refresh(tsd, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(&base_allocated, &base_resident, &base_mapped);
+		base_stats_get(tsd, &base_allocated, &base_resident,
+		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
@@ -748,11 +749,11 @@ ctl_refresh(void)
 }
 
 static bool
-ctl_init(void)
+ctl_init(tsd_t *tsd)
 {
 	bool ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
@@ -794,19 +795,19 @@ ctl_init(void)
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh();
+		ctl_refresh(tsd);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
-    size_t *depthp)
+ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
+    size_t *mibp, size_t *depthp)
 {
 	int ret;
 	const char *elm, *tdot, *dot;
@@ -858,7 +859,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mibp, *depthp, (size_t)index);
+			node = inode->index(tsd, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -902,8 +903,8 @@ label_return:
 }
 
 int
-ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
+ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
 {
 	int ret;
 	size_t depth;
@@ -911,19 +912,19 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd, name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
 	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl)
-		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
@@ -934,29 +935,29 @@ label_return:
 }
 
 int
-ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
 
 int
-ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -978,7 +979,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mib, miblen, mib[i]);
+			node = inode->index(tsd, mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -988,7 +989,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	/* Call the ctl function. */
 	if (node && node->ctl)
-		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	else {
 		/* Partial MIB. */
 		ret = ENOENT;
@@ -1002,7 +1003,7 @@ bool
 ctl_boot(void)
 {
 
-	if (malloc_mutex_init(&ctl_mtx))
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
 		return (true);
 
 	ctl_initialized = false;
@@ -1011,24 +1012,24 @@ ctl_boot(void)
 }
 
 void
-ctl_prefork(void)
+ctl_prefork(tsd_t *tsd)
 {
 
-	malloc_mutex_prefork(&ctl_mtx);
+	malloc_mutex_prefork(tsd, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(void)
+ctl_postfork_parent(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_parent(&ctl_mtx);
+	malloc_mutex_postfork_parent(tsd, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(void)
+ctl_postfork_child(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_child(&ctl_mtx);
+	malloc_mutex_postfork_child(tsd, &ctl_mtx);
 }
 
 /******************************************************************************/
@@ -1085,8 +1086,8 @@ ctl_postfork_child(void)
  */
 #define	CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1094,7 +1095,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(&ctl_mtx);				\
+		malloc_mutex_lock(tsd, &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1102,47 +1103,47 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(&ctl_mtx);				\
+		malloc_mutex_unlock(tsd, &ctl_mtx);			\
 	return (ret);							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd, &ctl_mtx);				\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd, &ctl_mtx);				\
 	return (ret);							\
 }
 
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd, &ctl_mtx);				\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd, &ctl_mtx);				\
 	return (ret);							\
 }
 
@@ -1152,8 +1153,8 @@ label_return:								\
  */
 #define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1171,8 +1172,8 @@ label_return:								\
 
 #define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1188,17 +1189,15 @@ label_return:								\
 
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
-	tsd_t *tsd;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	READONLY();							\
-	tsd = tsd_fetch();						\
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
 									\
@@ -1209,8 +1208,8 @@ label_return:								\
 
 #define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1229,21 +1228,21 @@ label_return:								\
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh();
+		ctl_refresh(tsd);
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
@@ -1298,20 +1297,18 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 /******************************************************************************/
 
 static int
-thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	tsd = tsd_fetch();
 	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
@@ -1325,7 +1322,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(newind, true);
+		newarena = arena_get(tsd, newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1335,7 +1332,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tcache, oldarena,
+				tcache_arena_reassociate(tsd, tcache, oldarena,
 				    newarena);
 			}
 		}
@@ -1343,7 +1340,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
@@ -1357,8 +1354,8 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
     tsd_thread_deallocatedp_get, uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -1382,8 +1379,8 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1401,7 +1398,7 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1412,20 +1409,16 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		tsd_t *tsd;
-
 		if (newlen != sizeof(const char *)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		tsd = tsd_fetch();
-
 		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
 		    0)
 			goto label_return;
 	} else {
-		const char *oldname = prof_thread_name_get();
+		const char *oldname = prof_thread_name_get(tsd);
 		READ(oldname, const char *);
 	}
 
@@ -1435,7 +1428,7 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1444,13 +1437,13 @@ thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
 	if (!config_prof)
 		return (ENOENT);
 
-	oldval = prof_thread_active_get();
+	oldval = prof_thread_active_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (prof_thread_active_set(*(bool *)newp)) {
+		if (prof_thread_active_set(tsd, *(bool *)newp)) {
 			ret = EAGAIN;
 			goto label_return;
 		}
@@ -1465,19 +1458,16 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@@ -1487,23 +1477,20 @@ tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1519,18 +1506,15 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp,
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1548,10 +1532,10 @@ label_return:
 /******************************************************************************/
 
 static void
-arena_i_purge(unsigned arena_ind, bool all)
+arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
@@ -1560,43 +1544,43 @@ arena_i_purge(unsigned arena_ind, bool all)
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(i, false);
+				tarenas[i] = arena_get(tsd, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsd, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tarenas[i], all);
+					arena_purge(tsd, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(arena_ind, false);
+			tarena = arena_get(tsd, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsd, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tarena, all);
+				arena_purge(tsd, tarena, all);
 		}
 	}
 }
 
 static int
-arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], true);
+	arena_i_purge(tsd, (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1604,14 +1588,14 @@ label_return:
 }
 
 static int
-arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], false);
+	arena_i_purge(tsd, (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1619,8 +1603,8 @@ label_return:
 }
 
 static int
-arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *dss = NULL;
@@ -1628,7 +1612,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
@@ -1649,20 +1633,20 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(arena_ind, false);
+		arena_t *arena = arena_get(tsd, arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		    arena_dss_prec_set(tsd, arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(arena);
+		dss_prec_old = arena_dss_prec_get(tsd, arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(dss_prec)) {
+		    chunk_dss_prec_set(tsd, dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get();
+		dss_prec_old = chunk_dss_prec_get(tsd);
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1670,26 +1654,26 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd, arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1697,7 +1681,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd, arena, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1709,21 +1693,21 @@ label_return:
 }
 
 static int
-arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(arena);
+		size_t oldval = arena_decay_time_get(tsd, arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1731,7 +1715,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd, arena, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1743,24 +1727,25 @@ label_return:
 }
 
 static int
-arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(arena_ind, false)) != NULL) {
+	    arena_get(tsd, arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(arena,
+			old_chunk_hooks = chunk_hooks_set(tsd, arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(tsd,
+			    arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
@@ -1769,16 +1754,16 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_named_node_t * ret;
+	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
@@ -1786,20 +1771,20 @@ arena_i_index(const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
@@ -1810,18 +1795,18 @@ arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
@@ -1836,13 +1821,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1867,7 +1852,7 @@ label_return:
 }
 
 static int
-arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1901,7 +1886,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
@@ -1912,7 +1897,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
@@ -1924,7 +1909,7 @@ CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
@@ -1933,15 +1918,15 @@ arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 static int
-arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
-	if (ctl_grow()) {
+	if (ctl_grow(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1950,14 +1935,39 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	bool oldval;
+
+	if (!config_prof)
+		return (ENOENT);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = prof_thread_active_init_set(tsd, *(bool *)newp);
+	} else
+		oldval = prof_thread_active_init_get(tsd);
+	READ(oldval, bool);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1971,9 +1981,9 @@ prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(*(bool *)newp);
+		oldval = prof_active_set(tsd, *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get();
+		oldval = prof_active_get(tsd);
 	READ(oldval, bool);
 
 	ret = 0;
@@ -1982,33 +1992,8 @@ label_return:
 }
 
 static int
-prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-	bool oldval;
-
-	if (!config_prof)
-		return (ENOENT);
-
-	if (newp != NULL) {
-		if (newlen != sizeof(bool)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		oldval = prof_active_set(*(bool *)newp);
-	} else
-		oldval = prof_active_get();
-	READ(oldval, bool);
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
-static int
-prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *filename = NULL;
@@ -2019,7 +2004,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	WRITEONLY();
 	WRITE(filename, const char *);
 
-	if (prof_mdump(filename)) {
+	if (prof_mdump(tsd, filename)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2030,8 +2015,8 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -2044,9 +2029,9 @@ prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_gdump_set(*(bool *)newp);
+		oldval = prof_gdump_set(tsd, *(bool *)newp);
 	} else
-		oldval = prof_gdump_get();
+		oldval = prof_gdump_get(tsd);
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2055,12 +2040,11 @@ label_return:
 }
 
 static int
-prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
-	tsd_t *tsd;
 
 	if (!config_prof)
 		return (ENOENT);
@@ -2070,8 +2054,6 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	tsd = tsd_fetch();
-
 	prof_reset(tsd, lg_sample);
 
 	ret = 0;
@@ -2157,7 +2139,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_bins_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > NBINS)
@@ -2175,7 +2158,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_lruns_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nlclasses)
@@ -2194,7 +2178,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nhclasses)
@@ -2203,11 +2188,11 @@ stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
@@ -2215,6 +2200,6 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index a63c8258..3a802dee 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,12 @@ huge_node_get(const void *ptr)
 }
 
 static bool
-huge_node_set(const void *ptr, extent_node_t *node)
+huge_node_set(tsd_t *tsd, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(ptr, node));
+	return (chunk_register(tsd, ptr, node));
 }
 
 static void
@@ -68,7 +68,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 */
 	is_zeroed = zero;
 	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
@@ -76,17 +76,17 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(ret, node)) {
-		arena_chunk_dalloc_huge(arena, ret, usize);
+	if (huge_node_set(tsd, ret, node)) {
+		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
@@ -103,7 +103,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(void *ptr, size_t usize)
+huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -111,7 +111,7 @@ huge_dalloc_junk(void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(tsd, ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -122,8 +122,8 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
 	extent_node_t *node;
@@ -151,21 +151,22 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    ptr, CHUNK_CEILING(oldsize), usize, sdiff);
+			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
+			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsd, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
@@ -182,7 +183,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 }
 
 static bool
-huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -193,7 +194,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks = chunk_hooks_get(tsd, arena);
 
 	assert(oldsize > usize);
 
@@ -206,42 +207,43 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk(tsd, (void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-			    CHUNK_CEILING(oldsize),
+			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
+			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsd, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
+huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
+    bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -249,14 +251,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 	 */
 	is_zeroed_chunk = zero;
 
-	if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize,
+	if (arena_chunk_ralloc_huge_expand(tsd, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -291,15 +293,15 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsd, ptr, oldsize, usize_max,
 		    zero)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
-		    oldsize, usize_min, zero)) {
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsd,
+		    ptr, oldsize, usize_min, zero)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
@@ -311,15 +313,15 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
-		    zero);
+		huge_ralloc_no_move_similar(tsd, ptr, oldsize, usize_min,
+		    usize_max, zero);
 		arena_decay_tick(tsd, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) {
+		if (!huge_ralloc_no_move_shrink(tsd, ptr, oldsize, usize_max)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
@@ -376,13 +378,13 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
-	huge_dalloc_junk(extent_node_addr_get(node),
+	huge_dalloc_junk(tsd, extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
 	idalloctm(tsd, node, tcache, true, true);
 
@@ -397,7 +399,7 @@ huge_aalloc(const void *ptr)
 }
 
 size_t
-huge_salloc(const void *ptr)
+huge_salloc(tsd_t *tsd, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
@@ -405,15 +407,15 @@ huge_salloc(const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(const void *ptr)
+huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
@@ -421,29 +423,29 @@ huge_prof_tctx_get(const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(const void *ptr)
+huge_prof_tctx_reset(tsd_t *tsd, const void *ptr)
 {
 
-	huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsd, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0735376e..7543dff1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -212,7 +212,7 @@ _init_init_lock(void)
 	 * really only matters early in the process creation, before any
 	 * separate thread normally starts doing anything. */
 	if (!init_lock_initialized)
-		malloc_mutex_init(&init_lock);
+		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
 	init_lock_initialized = true;
 }
 
@@ -254,7 +254,7 @@ typedef struct {
  * definition.
  */
 
-static bool	malloc_init_hard_a0(void);
+static bool	malloc_init_hard_a0(tsd_t *tsd);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
@@ -291,7 +291,7 @@ malloc_init_a0(void)
 {
 
 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
-		return (malloc_init_hard_a0());
+		return (malloc_init_hard_a0(NULL));
 	return (false);
 }
 
@@ -319,7 +319,7 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 		return (NULL);
 
 	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(0, false), true));
+	    is_metadata, arena_get(NULL, 0, false), true));
 }
 
 static void
@@ -413,7 +413,7 @@ narenas_total_get(void)
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(unsigned ind)
+arena_init_locked(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
@@ -427,26 +427,26 @@ arena_init_locked(unsigned ind)
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(ind);
+	arena = arena_new(tsd, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(unsigned ind)
+arena_init(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena = arena_init_locked(ind);
-	malloc_mutex_unlock(&arenas_lock);
+	malloc_mutex_lock(tsd, &arenas_lock);
+	arena = arena_init_locked(tsd, ind);
+	malloc_mutex_unlock(tsd, &arenas_lock);
 	return (arena);
 }
 
@@ -455,7 +455,7 @@ arena_bind(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	arena_nthreads_inc(arena);
 
 	if (tsd_nominal(tsd))
@@ -467,8 +467,8 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(oldind, false);
-	newarena = arena_get(newind, false);
+	oldarena = arena_get(tsd, oldind, false);
+	newarena = arena_get(tsd, newind, false);
 	arena_nthreads_dec(oldarena);
 	arena_nthreads_inc(newarena);
 	tsd_arena_set(tsd, newarena);
@@ -479,7 +479,7 @@ arena_unbind(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	arena_nthreads_dec(arena);
 	tsd_arena_set(tsd, NULL);
 }
@@ -571,16 +571,16 @@ arena_choose_hard(tsd_t *tsd)
 
 		choose = 0;
 		first_null = narenas_auto;
-		malloc_mutex_lock(&arenas_lock);
-		assert(arena_get(0, false) != NULL);
+		malloc_mutex_lock(tsd, &arenas_lock);
+		assert(arena_get(tsd, 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(i, false) != NULL) {
+			if (arena_get(tsd, i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(i, false)) <
-				    arena_nthreads_get(arena_get(choose,
+				if (arena_nthreads_get(arena_get(tsd, i, false))
+				    < arena_nthreads_get(arena_get(tsd, choose,
 				    false)))
 					choose = i;
 			} else if (first_null == narenas_auto) {
@@ -597,26 +597,26 @@ arena_choose_hard(tsd_t *tsd)
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(choose, false)) == 0
+		if (arena_nthreads_get(arena_get(tsd, choose, false)) == 0
 		    || first_null == narenas_auto) {
 			/*
 			 * Use an unloaded arena, or the least loaded arena if
 			 * all arenas are already initialized.
 			 */
-			ret = arena_get(choose, false);
+			ret = arena_get(tsd, choose, false);
 		} else {
 			/* Initialize a new arena. */
 			choose = first_null;
-			ret = arena_init_locked(choose);
+			ret = arena_init_locked(tsd, choose);
 			if (ret == NULL) {
-				malloc_mutex_unlock(&arenas_lock);
+				malloc_mutex_unlock(tsd, &arenas_lock);
 				return (NULL);
 			}
 		}
 		arena_bind(tsd, choose);
-		malloc_mutex_unlock(&arenas_lock);
+		malloc_mutex_unlock(tsd, &arenas_lock);
 	} else {
-		ret = arena_get(0, false);
+		ret = arena_get(tsd, 0, false);
 		arena_bind(tsd, 0);
 	}
 
@@ -681,8 +681,11 @@ stats_print_atexit(void)
 {
 
 	if (config_tcache && config_stats) {
+		tsd_t *tsd;
 		unsigned narenas, i;
 
+		tsd = tsd_fetch();
+
 		/*
 		 * Merge stats from extant threads.  This is racy, since
 		 * individual threads do not lock when recording tcache stats
@@ -691,7 +694,7 @@ stats_print_atexit(void)
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(i, false);
+			arena_t *arena = arena_get(tsd, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -701,11 +704,11 @@ stats_print_atexit(void)
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(&arena->lock);
+				malloc_mutex_lock(tsd, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tcache, arena);
+					tcache_stats_merge(tsd, tcache, arena);
 				}
-				malloc_mutex_unlock(&arena->lock);
+				malloc_mutex_unlock(tsd, &arena->lock);
 			}
 		}
 	}
@@ -1056,7 +1059,8 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(i)) {
+						if (chunk_dss_prec_set(NULL,
+						   i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -1186,7 +1190,6 @@ malloc_conf_init(void)
 	}
 }
 
-/* init_lock must be held. */
 static bool
 malloc_init_hard_needed(void)
 {
@@ -1204,9 +1207,9 @@ malloc_init_hard_needed(void)
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(&init_lock);
+			malloc_mutex_unlock(NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(&init_lock);
+			malloc_mutex_lock(NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
@@ -1214,9 +1217,8 @@ malloc_init_hard_needed(void)
 	return (true);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_a0_locked(void)
+malloc_init_hard_a0_locked(tsd_t *tsd)
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1242,9 +1244,9 @@ malloc_init_hard_a0_locked(void)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot())
+	if (config_tcache && tcache_boot(tsd))
 		return (true);
-	if (malloc_mutex_init(&arenas_lock))
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
@@ -1258,38 +1260,35 @@ malloc_init_hard_a0_locked(void)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(0) == NULL)
+	if (arena_init(tsd, 0) == NULL)
 		return (true);
 	malloc_init_state = malloc_init_a0_initialized;
 	return (false);
 }
 
 static bool
-malloc_init_hard_a0(void)
+malloc_init_hard_a0(tsd_t *tsd)
 {
 	bool ret;
 
-	malloc_mutex_lock(&init_lock);
-	ret = malloc_init_hard_a0_locked();
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_lock(tsd, &init_lock);
+	ret = malloc_init_hard_a0_locked(tsd);
+	malloc_mutex_unlock(tsd, &init_lock);
 	return (ret);
 }
 
-/*
- * Initialize data structures which may trigger recursive allocation.
- *
- * init_lock must be held.
- */
+/* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(void)
+malloc_init_hard_recursible(tsd_t **tsd)
 {
-	bool ret = false;
+	bool ret;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(*tsd, &init_lock);
 
 	/* LinuxThreads' pthread_setspecific() allocates. */
-	if (malloc_tsd_boot0()) {
+	*tsd = malloc_tsd_boot0();
+	if (*tsd == NULL) {
 		ret = true;
 		goto label_return;
 	}
@@ -1308,17 +1307,17 @@ malloc_init_hard_recursible(void)
 	}
 #endif
 
+	ret = false;
 label_return:
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(*tsd, &init_lock);
 	return (ret);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_finish(void)
+malloc_init_hard_finish(tsd_t *tsd)
 {
 
-	if (mutex_boot())
+	if (malloc_mutex_boot())
 		return (true);
 
 	if (opt_narenas == 0) {
@@ -1343,7 +1342,7 @@ malloc_init_hard_finish(void)
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsd, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
@@ -1359,38 +1358,39 @@ malloc_init_hard_finish(void)
 static bool
 malloc_init_hard(void)
 {
+	tsd_t *tsd = NULL;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(tsd, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked()) {
-		malloc_mutex_unlock(&init_lock);
+	    malloc_init_hard_a0_locked(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_recursible(&tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2()) {
-		malloc_mutex_unlock(&init_lock);
+	if (config_prof && prof_boot2(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_finish(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(tsd, &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
@@ -1416,7 +1416,7 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
 		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = imalloc(tsd, usize, ind, slow_path);
 
@@ -1438,7 +1438,7 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1450,7 +1450,11 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 
 	if (slow_path && unlikely(malloc_init()))
 		return (NULL);
+
 	*tsd = tsd_fetch();
+
+	witness_assert_lockless(*tsd);
+
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
 		return (NULL);
@@ -1479,7 +1483,7 @@ imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 }
@@ -1507,9 +1511,10 @@ je_malloc(size_t size)
 		ret = imalloc_body(size, &tsd, &usize, true);
 		imalloc_post_check(ret, tsd, usize, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1526,7 +1531,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1548,7 +1553,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1565,10 +1570,12 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	assert(min_alignment != 0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		result = NULL;
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	if (size == 0)
 		size = 1;
 
@@ -1603,10 +1610,12 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(result, config_prof));
+		assert(usize == isalloc(tsd, result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd, result, usize, false);
+	witness_assert_lockless(tsd);
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1616,6 +1625,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
+	witness_assert_lockless(tsd);
 	goto label_return;
 }
 
@@ -1623,9 +1633,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	int ret = imemalign(memptr, alignment, size, sizeof(void *));
-	JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
-	    config_prof), false);
+	int ret;
+
+	ret = imemalign(memptr, alignment, size, sizeof(void *));
+
 	return (ret);
 }
 
@@ -1641,8 +1652,7 @@ je_aligned_alloc(size_t alignment, size_t size)
 		ret = NULL;
 		set_errno(err);
 	}
-	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
-	    false);
+
 	return (ret);
 }
 
@@ -1658,7 +1668,7 @@ icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
 		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = icalloc(tsd, usize, ind);
 
@@ -1680,7 +1690,7 @@ icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1697,11 +1707,13 @@ je_calloc(size_t num, size_t size)
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		num_size = 0;
 		ret = NULL;
 		goto label_return;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
@@ -1747,11 +1759,12 @@ label_return:
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true);
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1767,7 +1780,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
@@ -1782,7 +1795,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd, old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -1804,14 +1817,16 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd, ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd, ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1819,7 +1834,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(ptr);
+			rzsize = p2rz(tsd, ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
@@ -1830,6 +1845,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -1838,7 +1855,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(ptr);
+		rzsize = p2rz(tsd, ptr);
 	isqalloc(tsd, ptr, usize, tcache);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
@@ -1869,10 +1886,13 @@ je_realloc(void *ptr, size_t size)
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
+		witness_assert_lockless(tsd);
 
-		old_usize = isalloc(ptr, config_prof);
-		if (config_valgrind && unlikely(in_valgrind))
-			old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
+		old_usize = isalloc(tsd, ptr, config_prof);
+		if (config_valgrind && unlikely(in_valgrind)) {
+			old_rzsize = config_prof ? p2rz(tsd, ptr) :
+			    u2rz(old_usize);
+		}
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
@@ -1901,13 +1921,14 @@ je_realloc(void *ptr, size_t size)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsd, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1922,6 +1943,7 @@ je_free(void *ptr)
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
+		witness_assert_lockless(tsd);
 	}
 }
 
@@ -1942,7 +1964,6 @@ je_memalign(size_t alignment, size_t size)
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -1956,7 +1977,6 @@ je_valloc(size_t size)
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -2020,7 +2040,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(arena_ind, true);
+		*arena = arena_get(tsd, arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2076,7 +2096,7 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		    arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
 
@@ -2108,7 +2128,7 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, *usize, tctx);
+	prof_malloc(tsd, p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2154,9 +2174,12 @@ je_mallocx(size_t size, int flags)
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		goto label_oom;
+	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	if (config_prof && opt_prof)
 		p = imallocx_prof(tsd, size, flags, &usize);
@@ -2166,11 +2189,12 @@ je_mallocx(size_t size, int flags)
 		goto label_oom;
 
 	if (config_stats) {
-		assert(usize == isalloc(p, config_prof));
+		assert(usize == isalloc(tsd, p, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
+	JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags));
+	witness_assert_lockless(tsd);
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2178,6 +2202,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(0, size, 0);
+	witness_assert_lockless(tsd);
 	return (NULL);
 }
 
@@ -2195,7 +2220,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
@@ -2214,7 +2239,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd, old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
@@ -2237,7 +2262,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(p, config_prof);
+		*usize = isalloc(tsd, p, config_prof);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
@@ -2265,10 +2290,11 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(arena_ind, true);
+		arena = arena_get(tsd, arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2282,7 +2308,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd, ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
@@ -2300,7 +2326,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(p, config_prof);
+			usize = isalloc(tsd, p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2309,8 +2335,9 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsd, p, usize, false, ptr, old_usize,
 	    old_rzsize, false, zero);
+	witness_assert_lockless(tsd);
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2318,6 +2345,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
+	witness_assert_lockless(tsd);
 	return (NULL);
 }
 
@@ -2329,7 +2357,7 @@ ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 
 	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(ptr, config_prof);
+	usize = isalloc(tsd, ptr, config_prof);
 
 	return (usize);
 }
@@ -2357,7 +2385,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(ptr);
+	old_tctx = prof_tctx_get(tsd, ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2413,8 +2441,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd, ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2449,10 +2478,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(false, tsd, ptr, usize, false, ptr, old_usize,
 	    old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2461,15 +2491,20 @@ JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
+	tsd_t *tsd;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		usize = ivsalloc(ptr, config_prof);
-	else
-		usize = isalloc(ptr, config_prof);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
+	if (config_ivsalloc)
+		usize = ivsalloc(tsd, ptr, config_prof);
+	else
+		usize = isalloc(tsd, ptr, config_prof);
+
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2483,6 +2518,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2493,17 +2529,21 @@ je_dallocx(void *ptr, int flags)
 
 	UTRACE(ptr, 0, 0);
 	ifree(tsd_fetch(), ptr, tcache, true);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(size_t size, int flags)
+inallocx(tsd_t *tsd, size_t size, int flags)
 {
 	size_t usize;
 
+	witness_assert_lockless(tsd);
+
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2516,10 +2556,11 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	usize = inallocx(size, flags);
-	assert(usize == isalloc(ptr, config_prof));
-
 	tsd = tsd_fetch();
+	usize = inallocx(tsd, size, flags);
+	assert(usize == isalloc(tsd, ptr, config_prof));
+
+	witness_assert_lockless(tsd);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2530,6 +2571,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	UTRACE(ptr, 0, 0);
 	isfree(tsd, ptr, usize, tcache);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2537,16 +2579,21 @@ JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
+	tsd_t *tsd;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	usize = inallocx(size, flags);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+
+	usize = inallocx(tsd, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2554,55 +2601,82 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_nametomib(name, mibp, miblenp));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_nametomib(tsd, name, mibp, miblenp);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
+	tsd_t *tsd;
 
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	stats_print(write_cb, cbopaque, opts);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
+	tsd_t *tsd;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		ret = ivsalloc(ptr, config_prof);
-	else
-		ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
+	if (config_ivsalloc)
+		ret = ivsalloc(tsd, ptr, config_prof);
+	else
+		ret = (ptr == NULL) ? 0 : isalloc(tsd, ptr, config_prof);
+
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -2644,6 +2718,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -2652,18 +2727,20 @@ _malloc_prefork(void)
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork();
-	prof_prefork();
-	malloc_mutex_prefork(&arenas_lock);
+	ctl_prefork(tsd);
+	prof_prefork(tsd);
+	malloc_mutex_prefork(tsd, &arenas_lock);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_prefork(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_prefork(tsd, arena);
 	}
-	chunk_prefork();
-	base_prefork();
+	chunk_prefork(tsd);
+	base_prefork(tsd);
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2674,6 +2751,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -2682,39 +2760,44 @@ _malloc_postfork(void)
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent();
-	chunk_postfork_parent();
+	base_postfork_parent(tsd);
+	chunk_postfork_parent(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_parent(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_postfork_parent(tsd, arena);
 	}
-	malloc_mutex_postfork_parent(&arenas_lock);
-	prof_postfork_parent();
-	ctl_postfork_parent();
+	malloc_mutex_postfork_parent(tsd, &arenas_lock);
+	prof_postfork_parent(tsd);
+	ctl_postfork_parent(tsd);
 }
 
 void
 jemalloc_postfork_child(void)
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child();
-	chunk_postfork_child();
+	base_postfork_child(tsd);
+	chunk_postfork_child(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_child(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_postfork_child(tsd, arena);
 	}
-	malloc_mutex_postfork_child(&arenas_lock);
-	prof_postfork_child();
-	ctl_postfork_child();
+	malloc_mutex_postfork_child(tsd, &arenas_lock);
+	prof_postfork_child(tsd);
+	ctl_postfork_child(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 2d47af97..4174f42e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -69,7 +69,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 #endif
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex)
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 {
 
 #ifdef _WIN32
@@ -103,31 +103,34 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
+	if (config_debug)
+		witness_init(&mutex->witness, name, rank, NULL);
 	return (false);
 }
 
 void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(mutex);
+	malloc_mutex_lock(tsd, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsd, mutex);
 }
 
 void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsd, mutex);
 #else
-	if (malloc_mutex_init(mutex)) {
+	if (malloc_mutex_init(mutex, mutex->witness.name,
+	    mutex->witness.rank)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort)
@@ -137,7 +140,7 @@ malloc_mutex_postfork_child(malloc_mutex_t *mutex)
 }
 
 bool
-mutex_boot(void)
+malloc_mutex_boot(void)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
diff --git a/src/prof.c b/src/prof.c
index b3872277..520bf90a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -121,9 +121,9 @@ static bool		prof_booted = false;
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
@@ -213,22 +213,23 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tctx->tdata->lock);
+		malloc_mutex_lock(tsd, tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tctx))
+		if (prof_tctx_should_destroy(tsd, tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tctx->tdata->lock);
+			malloc_mutex_unlock(tsd, tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, usize, tctx);
+	prof_tctx_set(tsd, ptr, usize, tctx);
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -236,23 +237,23 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tctx->tdata->lock);
+	malloc_mutex_unlock(tsd, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd, tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tctx))
+	if (prof_tctx_should_destroy(tsd, tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tctx->tdata->lock);
+		malloc_mutex_unlock(tsd, tctx->tdata->lock);
 }
 
 void
@@ -277,7 +278,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd, &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
@@ -287,7 +288,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
@@ -300,9 +301,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump();
+			prof_idump(tsd);
 		if (gdump)
-			prof_gdump();
+			prof_gdump(tsd);
 	}
 }
 
@@ -585,7 +586,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
@@ -593,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
 	} else {
 		/*
@@ -601,16 +602,17 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
-/* tctx->tdata->lock must be held. */
 static bool
-prof_tctx_should_destroy(prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
 
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
 	if (opt_prof_accum)
 		return (false);
 	if (tctx->cnts.curobjs != 0)
@@ -633,7 +635,6 @@ prof_gctx_should_destroy(prof_gctx_t *gctx)
 	return (true);
 }
 
-/* tctx->tdata->lock is held upon entry, and released before return. */
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
@@ -641,6 +642,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
 	assert(!opt_prof_accum);
@@ -648,10 +651,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumbytes == 0);
 
 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tdata, false);
-	malloc_mutex_unlock(tdata->lock);
+	destroy_tdata = prof_tdata_should_destroy(tsd, tdata, false);
+	malloc_mutex_unlock(tsd, tdata->lock);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
@@ -691,12 +694,14 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd, gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
+	malloc_mutex_assert_not_owner(tsd, tctx->tdata->lock);
+
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, false);
 
@@ -740,9 +745,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(gctx.p->lock);
+		malloc_mutex_lock(tsd, gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(gctx.p->lock);
+		malloc_mutex_unlock(tsd, gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
@@ -769,11 +774,11 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 	if (not_found) {
 		tcache_t *tcache;
 		void *btkey;
@@ -806,20 +811,20 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tdata->lock);
+		malloc_mutex_lock(tsd, tdata->lock);
 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tdata->lock);
+		malloc_mutex_unlock(tsd, tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			idalloctm(tsd, ret.v, tcache, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd, gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 	}
 
 	return (ret.p);
@@ -894,11 +899,13 @@ size_t
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
+	tsd_t *tsd;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	tsd = tsd_fetch();
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	return (tdata_count);
 }
@@ -917,9 +924,9 @@ prof_bt_count(void)
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd, &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
 
 	return (bt_count);
 }
@@ -1032,20 +1039,21 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 	return (ret);
 }
 
-/* tctx->tdata->lock is held. */
 static void
-prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_lock(tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsd, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsd, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
@@ -1064,11 +1072,12 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
 	}
 }
 
-/* gctx->lock is held. */
 static void
-prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
+	malloc_mutex_assert_owner(tsd, gctx->lock);
+
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	if (opt_prof_accum) {
@@ -1077,10 +1086,12 @@ prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
 	}
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
+
+	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1088,7 +1099,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsd, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
@@ -1097,11 +1108,18 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 	return (NULL);
 }
 
-/* gctx->lock is held. */
+struct prof_tctx_dump_iter_arg_s {
+	tsd_t	*tsd;
+	bool	propagate_err;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 {
-	bool propagate_err = *(bool *)arg;
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -1110,7 +1128,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(propagate_err,
+		if (prof_dump_printf(arg->propagate_err,
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
@@ -1123,12 +1141,14 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 	return (NULL);
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
 	prof_tctx_t *ret;
 
+	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		/* New since dumping started; ignore. */
@@ -1149,12 +1169,12 @@ label_return:
 }
 
 static void
-prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
@@ -1166,19 +1186,26 @@ prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd, gctx->lock);
 }
 
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
-{
-	size_t *leak_ngctx = (size_t *)arg;
+struct prof_gctx_merge_iter_arg_s {
+	tsd_t	*tsd;
+	size_t	leak_ngctx;
+};
 
-	malloc_mutex_lock(gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL);
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
+{
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsd, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsd);
 	if (gctx->cnt_summed.curobjs != 0)
-		(*leak_ngctx)++;
-	malloc_mutex_unlock(gctx->lock);
+		arg->leak_ngctx++;
+	malloc_mutex_unlock(arg->tsd, gctx->lock);
 
 	return (NULL);
 }
@@ -1197,7 +1224,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd, gctx->lock);
 		{
 			prof_tctx_t *next;
 
@@ -1205,7 +1232,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, NULL);
+				    prof_tctx_finish_iter, (void *)tsd);
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
@@ -1220,19 +1247,26 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd, gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd, gctx->lock);
 	}
 }
 
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
-	prof_cnt_t *cnt_all = (prof_cnt_t *)arg;
+struct prof_tdata_merge_iter_arg_s {
+	tsd_t		*tsd;
+	prof_cnt_t	cnt_all;
+};
 
-	malloc_mutex_lock(tdata->lock);
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque)
+{
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsd, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
@@ -1244,17 +1278,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsd, tctx.p, tdata);
 
-		cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			cnt_all->accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(arg->tsd, tdata->lock);
 
 	return (NULL);
 }
@@ -1283,7 +1317,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
@@ -1294,10 +1328,10 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -1306,15 +1340,16 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
 prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
-/* gctx->lock is held. */
 static bool
-prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs)
+prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
 	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
+	malloc_mutex_assert_owner(tsd, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -1348,8 +1383,10 @@ prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
 		goto label_return;
 	}
 
+	prof_tctx_dump_iter_arg.tsd = tsd;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&propagate_err) != NULL) {
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
 		ret = true;
 		goto label_return;
 	}
@@ -1459,22 +1496,29 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 	}
 }
 
+struct prof_gctx_dump_iter_arg_s {
+	tsd_t	*tsd;
+	bool	propagate_err;
+};
+
 static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 {
 	prof_gctx_t *ret;
-	bool propagate_err = *(bool *)arg;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(arg->tsd, gctx->lock);
 
-	if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) {
+	if (prof_dump_gctx(arg->tsd, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
 		ret = gctx;
 		goto label_return;
 	}
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(arg->tsd, gctx->lock);
 	return (ret);
 }
 
@@ -1482,13 +1526,14 @@ static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 {
 	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
 		void		*v;
 	} gctx;
-	size_t leak_ngctx;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	cassert(config_prof);
@@ -1497,7 +1542,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(&prof_dump_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
@@ -1506,20 +1551,24 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd, gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	memset(&cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(&tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all);
-	malloc_mutex_unlock(&tdatas_mtx);
+	prof_tdata_merge_iter_arg.tsd = tsd;
+	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)&prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	leak_ngctx = 0;
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx);
+	prof_gctx_merge_iter_arg.tsd = tsd;
+	prof_gctx_merge_iter_arg.leak_ngctx = 0;
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)&prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 
@@ -1528,12 +1577,15 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(propagate_err, &cnt_all))
+	if (prof_dump_header(tsd, propagate_err,
+	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg.tsd = tsd;
+	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)&propagate_err) != NULL)
+	    (void *)&prof_gctx_dump_iter_arg) != NULL)
 		goto label_write_error;
 
 	/* Dump /proc/<pid>/maps if possible. */
@@ -1544,17 +1596,18 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
-
-	if (leakcheck)
-		prof_leakcheck(&cnt_all, leak_ngctx, filename);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
 	return (false);
 label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 	return (true);
 }
 
@@ -1594,23 +1647,21 @@ prof_fdump(void)
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(void)
+prof_idump(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsd == NULL)
 		return;
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1621,50 +1672,46 @@ prof_idump(void)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
 
 bool
-prof_mdump(const char *filename)
+prof_mdump(tsd_t *tsd, const char *filename)
 {
-	tsd_t *tsd;
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
 	if (!opt_prof || !prof_booted)
 		return (true);
-	tsd = tsd_fetch();
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(void)
+prof_gdump(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsd == NULL)
 		return;
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1675,10 +1722,10 @@ prof_gdump(void)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1707,14 +1754,14 @@ prof_bt_keycomp(const void *k1, const void *k2)
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(void)
+prof_thr_uid_alloc(tsd_t *tsd)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(&next_thr_uid_mtx);
+	malloc_mutex_lock(tsd, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(&next_thr_uid_mtx);
+	malloc_mutex_unlock(tsd, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
@@ -1759,9 +1806,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	return (tdata);
 }
@@ -1770,13 +1817,13 @@ prof_tdata_t *
 prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL,
-	    prof_thread_active_init_get()));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd), 0, NULL,
+	    prof_thread_active_init_get(tsd)));
 }
 
-/* tdata->lock must be held. */
 static bool
-prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
@@ -1786,18 +1833,32 @@ prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
 	return (true);
 }
 
-/* tdatas_mtx must be held. */
+static bool
+prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached)
+{
+
+	malloc_mutex_assert_owner(tsd, tdata->lock);
+
+	return (prof_tdata_should_destroy_unlocked(tsd, tdata,
+	    even_if_attached));
+}
+
 static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 	tcache_t *tcache;
 
-	assert(prof_tdata_should_destroy(tdata, even_if_attached));
+	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
+
 	assert(tsd_prof_tdata_get(tsd) != tdata);
 
 	tdata_tree_remove(&tdatas, tdata);
 
+	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
+	    even_if_attached));
+
 	tcache = tcache_get(tsd, false);
 	if (tdata->thread_name != NULL)
 		idalloctm(tsd, tdata->thread_name, tcache, true, true);
@@ -1809,9 +1870,9 @@ static void
 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 }
 
 static void
@@ -1819,9 +1880,9 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd, tdata, true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1831,7 +1892,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, true);
 }
@@ -1851,18 +1912,18 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 }
 
 static bool
-prof_tdata_expire(prof_tdata_t *tdata)
+prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tdata, false);
+		    prof_tdata_should_destroy(tsd, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 
 	return (destroy_tdata);
 }
@@ -1870,8 +1931,9 @@ prof_tdata_expire(prof_tdata_t *tdata)
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
 
-	return (prof_tdata_expire(tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsd, tdata) ? tdata : NULL);
 }
 
 void
@@ -1881,15 +1943,15 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(&prof_dump_mtx);
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, NULL);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
@@ -1897,8 +1959,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(&tdatas_mtx);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 }
 
 void
@@ -1915,35 +1977,33 @@ prof_tdata_cleanup(tsd_t *tsd)
 }
 
 bool
-prof_active_get(void)
+prof_active_get(tsd_t *tsd)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsd, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsd, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(bool active)
+prof_active_set(tsd_t *tsd, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsd, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsd, &prof_active_mtx);
 	return (prof_active_old);
 }
 
 const char *
-prof_thread_name_get(void)
+prof_thread_name_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return ("");
@@ -2006,12 +2066,10 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 }
 
 bool
-prof_thread_active_get(void)
+prof_thread_active_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (false);
@@ -2019,12 +2077,10 @@ prof_thread_active_get(void)
 }
 
 bool
-prof_thread_active_set(bool active)
+prof_thread_active_set(tsd_t *tsd, bool active)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (true);
@@ -2033,48 +2089,48 @@ prof_thread_active_set(bool active)
 }
 
 bool
-prof_thread_active_init_get(void)
+prof_thread_active_init_get(tsd_t *tsd)
 {
 	bool active_init;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(bool active_init)
+prof_thread_active_init_set(tsd_t *tsd, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(void)
+prof_gdump_get(tsd_t *tsd)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsd, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(bool gdump)
+prof_gdump_set(tsd_t *tsd, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsd, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
@@ -2115,47 +2171,54 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(void)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
 
 	if (opt_prof) {
-		tsd_t *tsd;
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
 
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx))
+		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+		    WITNESS_RANK_PROF_ACTIVE))
 			return (true);
 
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx))
+		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+		    WITNESS_RANK_PROF_GDUMP))
 			return (true);
 
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx))
+		if (malloc_mutex_init(&prof_thread_active_init_mtx,
+		    "prof_thread_active_init",
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		tsd = tsd_fetch();
 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
-		if (malloc_mutex_init(&bt2gctx_mtx))
+		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+		    WITNESS_RANK_PROF_BT2GCTX))
 			return (true);
 
 		tdata_tree_new(&tdatas);
-		if (malloc_mutex_init(&tdatas_mtx))
+		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+		    WITNESS_RANK_PROF_TDATAS))
 			return (true);
 
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx))
+		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+		    WITNESS_RANK_PROF_NEXT_THR_UID))
 			return (true);
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx))
+		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
+		    WITNESS_RANK_PROF_DUMP_SEQ))
 			return (true);
-		if (malloc_mutex_init(&prof_dump_mtx))
+		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+		    WITNESS_RANK_PROF_DUMP))
 			return (true);
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
@@ -2165,21 +2228,23 @@ prof_boot2(void)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd, PROF_NCTX_LOCKS *
 		    sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
-			if (malloc_mutex_init(&gctx_locks[i]))
+			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
+			    WITNESS_RANK_PROF_GCTX))
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS *
-		    sizeof(malloc_mutex_t));
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd,
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
-			if (malloc_mutex_init(&tdata_locks[i]))
+			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
+			    WITNESS_RANK_PROF_TDATA))
 				return (true);
 		}
 	}
@@ -2198,56 +2263,56 @@ prof_boot2(void)
 }
 
 void
-prof_prefork(void)
+prof_prefork(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(&tdatas_mtx);
-		malloc_mutex_prefork(&bt2gctx_mtx);
-		malloc_mutex_prefork(&next_thr_uid_mtx);
-		malloc_mutex_prefork(&prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(&gctx_locks[i]);
+			malloc_mutex_prefork(tsd, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(&tdata_locks[i]);
+			malloc_mutex_prefork(tsd, &tdata_locks[i]);
 	}
 }
 
 void
-prof_postfork_parent(void)
+prof_postfork_parent(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(&tdata_locks[i]);
+			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(&gctx_locks[i]);
-		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(&bt2gctx_mtx);
-		malloc_mutex_postfork_parent(&tdatas_mtx);
+			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
 	}
 }
 
 void
-prof_postfork_child(void)
+prof_postfork_child(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(&tdata_locks[i]);
+			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(&gctx_locks[i]);
-		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(&next_thr_uid_mtx);
-		malloc_mutex_postfork_child(&bt2gctx_mtx);
-		malloc_mutex_postfork_child(&tdatas_mtx);
+			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
+		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
 	}
 }
 
diff --git a/src/quarantine.c b/src/quarantine.c
index c024deab..6cb74b37 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -99,7 +99,7 @@ static void
 quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(obj->ptr, config_prof));
+	assert(obj->usize == isalloc(tsd, obj->ptr, config_prof));
 	idalloctm(tsd, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
@@ -119,7 +119,7 @@ void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(ptr, config_prof);
+	size_t usize = isalloc(tsd, ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
diff --git a/src/tcache.c b/src/tcache.c
index a8620c3d..a9539f64 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,10 +24,10 @@ static tcaches_t	*tcaches_avail;
 /******************************************************************************/
 
 size_t
-tcache_salloc(const void *ptr)
+tcache_salloc(tsd_t *tsd, const void *ptr)
 {
 
-	return (arena_salloc(ptr, false));
+	return (arena_salloc(tsd, ptr, false));
 }
 
 void
@@ -107,12 +107,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(arena, tcache->prof_accumbytes))
-				prof_idump();
+			if (arena_prof_accum(tsd, arena,
+			    tcache->prof_accumbytes))
+				prof_idump(tsd);
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -130,8 +131,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get_mutable(chunk, pageind);
-				arena_dalloc_bin_junked_locked(bin_arena, chunk,
-				    ptr, bitselm);
+				arena_dalloc_bin_junked_locked(tsd, bin_arena,
+				    chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -143,7 +144,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
@@ -152,11 +153,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -189,7 +190,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(&locked_arena->lock);
+		malloc_mutex_lock(tsd, &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -212,8 +213,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(locked_arena,
-				    chunk, ptr);
+				arena_dalloc_large_junked_locked(tsd,
+				    locked_arena, chunk, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -225,9 +226,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&locked_arena->lock);
+		malloc_mutex_unlock(tsd, &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump();
+			prof_idump(tsd);
 		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
@@ -235,12 +236,12 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -251,33 +252,34 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 }
 
 void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 }
 
 void
-tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena)
+tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
 {
 
-	tcache_arena_dissociate(tcache, oldarena);
-	tcache_arena_associate(tcache, newarena);
+	tcache_arena_dissociate(tsd, tcache, oldarena);
+	tcache_arena_associate(tsd, tcache, newarena);
 }
 
 void
-tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
+tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -290,8 +292,8 @@ tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tcache, arena);
-		malloc_mutex_unlock(&arena->lock);
+		tcache_stats_merge(tsd, tcache, arena);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 }
 
@@ -327,11 +329,11 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	size = sa2u(size, CACHELINE);
 
 	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(0, false));
+	    arena_get(tsd, 0, false));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tcache, arena);
+	tcache_arena_associate(tsd, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
@@ -358,7 +360,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tcache, arena);
+	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
@@ -366,9 +368,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(&bin->lock);
+			malloc_mutex_lock(tsd, &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(&bin->lock);
+			malloc_mutex_unlock(tsd, &bin->lock);
 		}
 	}
 
@@ -377,17 +379,17 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(&arena->lock);
+			malloc_mutex_lock(tsd, &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(&arena->lock);
+			malloc_mutex_unlock(tsd, &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(arena, tcache->prof_accumbytes))
-		prof_idump();
+	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
+		prof_idump(tsd);
 
 	idalloctm(tsd, tcache, false, true, true);
 }
@@ -413,21 +415,22 @@ tcache_enabled_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
-/* Caller must own arena->lock. */
 void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
+	malloc_mutex_assert_owner(tsd, &arena->lock);
+
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
@@ -447,7 +450,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd, sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -455,7 +458,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(0, false));
+	tcache = tcache_create(tsd, arena_get(tsd, 0, false));
 	if (tcache == NULL)
 		return (true);
 
@@ -501,7 +504,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind)
 }
 
 bool
-tcache_boot(void)
+tcache_boot(tsd_t *tsd)
 {
 	unsigned i;
 
@@ -519,7 +522,7 @@ tcache_boot(void)
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsd, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
diff --git a/src/tsd.c b/src/tsd.c
index 34c1573c..38d8bde4 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -106,15 +106,17 @@ MALLOC_TSD
 	}
 }
 
-bool
+tsd_t *
 malloc_tsd_boot0(void)
 {
+	tsd_t *tsd;
 
 	ncleanups = 0;
 	if (tsd_boot0())
-		return (true);
-	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true;
-	return (false);
+		return (NULL);
+	tsd = tsd_fetch();
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	return (tsd);
 }
 
 void
@@ -169,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(&head->lock);
+			malloc_mutex_unlock(NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -180,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 	return (NULL);
 }
 
@@ -188,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 }
 #endif
diff --git a/src/witness.c b/src/witness.c
new file mode 100644
index 00000000..b7b91aca
--- /dev/null
+++ b/src/witness.c
@@ -0,0 +1,206 @@
+#define	JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp)
+{
+
+	witness->name = name;
+	witness->rank = rank;
+	witness->comp = comp;
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
+#endif
+static void
+witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error)
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
+#endif
+
+void
+witness_lock(tsd_t *tsd, witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsd, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL && w->rank >= witness->rank && (w->comp == NULL ||
+	    w->comp != witness->comp || w->comp(w, witness) > 0))
+		witness_lock_error(witnesses, witness);
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+void
+witness_unlock(tsd_t *tsd, witness_t *witness)
+{
+	witness_list_t *witnesses;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_owner(tsd, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_remove(witnesses, witness, link);
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
+#endif
+static void
+witness_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error)
+witness_owner_error_t *witness_owner_error =
+    JEMALLOC_N(witness_owner_error_impl);
+#endif
+
+void
+witness_assert_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return;
+	}
+	witness_owner_error(witness);
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
+#endif
+static void
+witness_not_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+witness_not_owner_error_t *witness_not_owner_error =
+    JEMALLOC_N(witness_not_owner_error_impl);
+#endif
+
+void
+witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
+#endif
+static void
+witness_lockless_error(const witness_list_t *witnesses)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Should not own any locks:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf("\n");
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
+witness_lockless_error_t *witness_lockless_error =
+    JEMALLOC_N(witness_lockless_error_impl);
+#endif
+
+void
+witness_assert_lockless(tsd_t *tsd)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL) {
+		witness_lockless_error(witnesses);
+	}
+}
+
+void
+witnesses_cleanup(tsd_t *tsd)
+{
+
+	witness_assert_lockless(tsd);
+
+	/* Do nothing. */
+}
diff --git a/src/zone.c b/src/zone.c
index 6859b3fe..8f25051a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(ptr, config_prof));
+	return (ivsalloc(tsd_fetch(), ptr, config_prof));
 }
 
 static void *
@@ -87,7 +87,7 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0) {
+	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@ static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0)
+	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(ptr, config_prof);
+	alloc_size = ivsalloc(tsd_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/unit/junk.c b/test/unit/junk.c
index fecf6fae..e251a124 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(void *ptr, size_t usize)
+huge_dalloc_junk_intercept(tsd_t *tsd, void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(ptr, usize);
+	huge_dalloc_junk_orig(tsd, ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 69983e5e..83f51df8 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -94,7 +94,8 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header_intercept(tsd_t *tsd, bool propagate_err,
+    const prof_cnt_t *cnt_all)
 {
 
 	prof_dump_header_intercepted = true;
diff --git a/test/unit/witness.c b/test/unit/witness.c
new file mode 100644
index 00000000..430d8203
--- /dev/null
+++ b/test/unit/witness.c
@@ -0,0 +1,278 @@
+#include "test/jemalloc_test.h"
+
+static witness_lock_error_t *witness_lock_error_orig;
+static witness_owner_error_t *witness_owner_error_orig;
+static witness_not_owner_error_t *witness_not_owner_error_orig;
+static witness_lockless_error_t *witness_lockless_error_orig;
+
+static bool saw_lock_error;
+static bool saw_owner_error;
+static bool saw_not_owner_error;
+static bool saw_lockless_error;
+
+static void
+witness_lock_error_intercept(const witness_list_t *witnesses,
+    const witness_t *witness)
+{
+
+	saw_lock_error = true;
+}
+
+static void
+witness_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_owner_error = true;
+}
+
+static void
+witness_not_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_not_owner_error = true;
+}
+
+static void
+witness_lockless_error_intercept(const witness_list_t *witnesses)
+{
+
+	saw_lockless_error = true;
+}
+
+static int
+witness_comp(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (strcmp(a->name, b->name));
+}
+
+static int
+witness_comp_reverse(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (-strcmp(a->name, b->name));
+}
+
+TEST_BEGIN(test_witness)
+{
+	witness_t a, b;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_assert_not_owner(tsd, &a);
+	witness_lock(tsd, &a);
+	witness_assert_owner(tsd, &a);
+
+	witness_init(&b, "b", 2, NULL);
+	witness_assert_not_owner(tsd, &b);
+	witness_lock(tsd, &b);
+	witness_assert_owner(tsd, &b);
+
+	witness_unlock(tsd, &a);
+	witness_unlock(tsd, &b);
+
+	witness_assert_lockless(tsd);
+}
+TEST_END
+
+TEST_BEGIN(test_witness_comp)
+{
+	witness_t a, b, c, d;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, witness_comp);
+	witness_assert_not_owner(tsd, &a);
+	witness_lock(tsd, &a);
+	witness_assert_owner(tsd, &a);
+
+	witness_init(&b, "b", 1, witness_comp);
+	witness_assert_not_owner(tsd, &b);
+	witness_lock(tsd, &b);
+	witness_assert_owner(tsd, &b);
+	witness_unlock(tsd, &b);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	witness_init(&c, "c", 1, witness_comp_reverse);
+	witness_assert_not_owner(tsd, &c);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &c);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsd, &c);
+
+	saw_lock_error = false;
+
+	witness_init(&d, "d", 1, NULL);
+	witness_assert_not_owner(tsd, &d);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &d);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsd, &d);
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_reversal)
+{
+	witness_t a, b;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_init(&b, "b", 2, NULL);
+
+	witness_lock(tsd, &b);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+
+	witness_unlock(tsd, &a);
+	witness_unlock(tsd, &b);
+
+	witness_assert_lockless(tsd);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_recursive)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_not_owner_error_orig = witness_not_owner_error;
+	witness_not_owner_error = witness_not_owner_error_intercept;
+	saw_not_owner_error = false;
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	witness_lock(tsd, &a);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	witness_lock(tsd, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	assert_true(saw_not_owner_error, "Expected witness not owner error");
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_owner_error = witness_owner_error_orig;
+	witness_lock_error = witness_lock_error_orig;
+
+}
+TEST_END
+
+TEST_BEGIN(test_witness_unlock_not_owned)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_owner_error_orig = witness_owner_error;
+	witness_owner_error = witness_owner_error_intercept;
+	saw_owner_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_owner_error, "Unexpected owner error");
+	witness_unlock(tsd, &a);
+	assert_true(saw_owner_error, "Expected owner error");
+
+	witness_assert_lockless(tsd);
+
+	witness_owner_error = witness_owner_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_lockful)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_lockless_error_orig = witness_lockless_error;
+	witness_lockless_error = witness_lockless_error_intercept;
+	saw_lockless_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_lockless_error, "Unexpected lockless error");
+	witness_assert_lockless(tsd);
+
+	witness_lock(tsd, &a);
+	witness_assert_lockless(tsd);
+	assert_true(saw_lockless_error, "Expected lockless error");
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_lockless_error = witness_lockless_error_orig;
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_witness,
+	    test_witness_comp,
+	    test_witness_reversal,
+	    test_witness_recursive,
+	    test_witness_unlock_not_owned,
+	    test_witness_lockful));
+}

From a15841cc7d7c60b2c72c091d8048223789f5c4e0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 14 Apr 2016 02:12:33 -0700
Subject: [PATCH 43/82] Fix a compilation error.

Fix a compilation error that occurs if Valgrind is not enabled.  This
regression was caused by b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add
witness, a simple online locking validator.).
---
 include/jemalloc/internal/valgrind.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index 7c6a62fa..2667bf5e 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -83,8 +83,8 @@
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)

From 22884243250a0f7d412ca745fbf7231d69abe771 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 14 Apr 2016 12:17:56 -0700
Subject: [PATCH 44/82] s/MALLOC_MUTEX_RANK_OMIT/WITNESS_RANK_OMIT/

This fixes a compilation error caused by
b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add witness, a simple online
locking validator.).

This resolves #375.
---
 include/jemalloc/internal/mb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index de54f508..81129d08 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -104,7 +104,7 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx, MALLOC_MUTEX_RANK_OMIT);
+	malloc_mutex_init(&mtx, WITNESS_RANK_OMIT);
 	malloc_mutex_lock(NULL, &mtx);
 	malloc_mutex_unlock(NULL, &mtx);
 }

From d9394d0ca870fc20b48ee7c57c69d48effe51041 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 12:33:39 -0700
Subject: [PATCH 45/82] Convert base_mtx locking protocol comments to
 assertions.

---
 src/base.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/base.c b/src/base.c
index 87b376b8..901553a1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -13,12 +13,13 @@ static size_t		base_mapped;
 
 /******************************************************************************/
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_node_try_alloc(void)
+base_node_try_alloc(tsd_t *tsd)
 {
 	extent_node_t *node;
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
+
 	if (base_nodes == NULL)
 		return (NULL);
 	node = base_nodes;
@@ -27,33 +28,34 @@ base_node_try_alloc(void)
 	return (node);
 }
 
-/* base_mtx must be held. */
 static void
-base_node_dalloc(extent_node_t *node)
+base_node_dalloc(tsd_t *tsd, extent_node_t *node)
 {
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
+
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
 	base_nodes = node;
 }
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_chunk_alloc(size_t minsize)
+base_chunk_alloc(tsd_t *tsd, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc();
+	node = base_node_try_alloc(tsd);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(node);
+			base_node_dalloc(tsd, node);
 		return (NULL);
 	}
 	base_mapped += csize;
@@ -98,7 +100,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(csize);
+		node = base_chunk_alloc(tsd, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
@@ -111,7 +113,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(node);
+		base_node_dalloc(tsd, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*

From 1b5830178fe73d4018233fea6858fff87d2b19df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 12:53:48 -0700
Subject: [PATCH 46/82] Fix malloc_mutex_[un]lock() to conditionally check
 witness.

Also remove tautological cassert(config_debug) calls.
---
 include/jemalloc/internal/mutex.h |  4 ++--
 src/witness.c                     | 10 ----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 7d19a0f4..581aaf57 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -81,7 +81,8 @@ malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		witness_assert_not_owner(tsd, &mutex->witness);
+		if (config_debug)
+			witness_assert_not_owner(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -103,7 +104,6 @@ malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		witness_assert_owner(tsd, &mutex->witness);
 		if (config_debug)
 			witness_unlock(tsd, &mutex->witness);
 #ifdef _WIN32
diff --git a/src/witness.c b/src/witness.c
index b7b91aca..444d200f 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -39,8 +39,6 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -63,8 +61,6 @@ witness_unlock(tsd_t *tsd, witness_t *witness)
 {
 	witness_list_t *witnesses;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -101,8 +97,6 @@ witness_assert_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -141,8 +135,6 @@ witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -184,8 +176,6 @@ witness_assert_lockless(tsd_t *tsd)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 

From 1423ee9016f1e7cb0cf3302207bcc488ce4374fc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 10:30:25 -0700
Subject: [PATCH 47/82] Fix style nits.

---
 include/jemalloc/internal/nstime.h | 2 +-
 src/tsd.c                          | 2 +-
 test/unit/junk_alloc.c             | 2 +-
 test/unit/junk_free.c              | 2 +-
 test/unit/util.c                   | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index dcb4b47f..dc293b73 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,7 +1,7 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
     && _POSIX_MONOTONIC_CLOCK >= 0
 
 typedef struct nstime_s nstime_t;
diff --git a/src/tsd.c b/src/tsd.c
index 38d8bde4..aeaa5e18 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -77,7 +77,7 @@ tsd_cleanup(void *arg)
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define O(n, t)								\
+#define	O(n, t)								\
 		n##_cleanup(tsd);
 MALLOC_TSD
 #undef O
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index 8db3331d..a5895b5c 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index 482a61d0..bb5183c9 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:free"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/util.c b/test/unit/util.c
index c4333d53..c958dc0f 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -162,11 +162,11 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
 	char buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
+#define	TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);		\
+	    buf, expected_str_untruncated);				\
 	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)

From a0c632c9d5d3fb31189ee85440b52579e37c85c1 Mon Sep 17 00:00:00 2001
From: Rajat Goel <rajatgoel@users.noreply.github.com>
Date: Mon, 18 Apr 2016 11:54:09 -0700
Subject: [PATCH 48/82] Update private_symbols.txt

Add 4 missing symbols
---
 include/jemalloc/internal/private_symbols.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index be5d30e7..8cd88d29 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -217,6 +217,8 @@ extent_node_size_get
 extent_node_size_set
 extent_node_zeroed_get
 extent_node_zeroed_set
+extent_tree_ad_destroy
+extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
 extent_tree_ad_first
 extent_tree_ad_insert
@@ -234,6 +236,8 @@ extent_tree_ad_reverse_iter
 extent_tree_ad_reverse_iter_recurse
 extent_tree_ad_reverse_iter_start
 extent_tree_ad_search
+extent_tree_szad_destroy
+extent_tree_szad_destroy_recurse
 extent_tree_szad_empty
 extent_tree_szad_first
 extent_tree_szad_insert

From ab0cfe01fa354597d28303952d3b0f87d932f6d6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 18 Apr 2016 15:11:20 -0700
Subject: [PATCH 49/82] Update private_symbols.txt.

Change test-related mangling to simplify symbol filtering.

The following commands can be used to detect missing/obsolete symbol
mangling, with the caveat that the full set of symbols is based on the
union of symbols generated by all configurations, some of which are
platform-specific:

./autogen.sh --enable-debug --enable-prof --enable-lazy-lock
make all tests
nm -a lib/libjemalloc.a src/*.jet.o \
  |grep " [TDBCR] " \
  |awk '{print $3}' \
  |sed -e 's/^\(je_\|jet_\(n_\)\?\)\([a-zA-Z0-9_]*\)/\3/g' \
  |LC_COLLATE=C sort -u \
  |grep -v \
   -e '^\(malloc\|calloc\|posix_memalign\|aligned_alloc\|realloc\|free\)$' \
   -e '^\(m\|r\|x\|s\|d\|sd\|n\)allocx$' \
   -e '^mallctl\(\|nametomib\|bymib\)$' \
   -e '^malloc_\(stats_print\|usable_size\|message\)$' \
   -e '^\(memalign\|valloc\)$' \
   -e '^__\(malloc\|memalign\|realloc\|free\)_hook$' \
   -e '^pthread_create$' \
  > /tmp/private_symbols.txt
---
 include/jemalloc/internal/private_symbols.txt | 39 +++++++++++++++----
 src/arena.c                                   | 24 ++++++------
 src/nstime.c                                  |  4 +-
 3 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8cd88d29..c8799cba 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -100,12 +100,6 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
-arena_run_heap_empty
-arena_run_heap_first
-arena_run_heap_insert
-arena_run_heap_new
-arena_run_heap_remove_first
-arena_run_heap_remove
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
@@ -132,6 +126,11 @@ atomic_sub_u
 atomic_sub_uint32
 atomic_sub_uint64
 atomic_sub_z
+atomic_write_p
+atomic_write_u
+atomic_write_uint32
+atomic_write_uint64
+atomic_write_z
 base_alloc
 base_boot
 base_postfork_child
@@ -207,6 +206,8 @@ extent_node_addr_get
 extent_node_addr_set
 extent_node_arena_get
 extent_node_arena_set
+extent_node_committed_get
+extent_node_committed_set
 extent_node_dirty_insert
 extent_node_dirty_linkage_init
 extent_node_dirty_remove
@@ -315,12 +316,13 @@ jemalloc_postfork_parent
 jemalloc_prefork
 large_maxclass
 lg_floor
+lg_prof_sample
 malloc_cprintf
-malloc_mutex_init
-malloc_mutex_lock
 malloc_mutex_assert_not_owner
 malloc_mutex_assert_owner
 malloc_mutex_boot
+malloc_mutex_init
+malloc_mutex_lock
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
@@ -344,6 +346,8 @@ narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
+nhclasses
+nlclasses
 nstime_add
 nstime_compare
 nstime_copy
@@ -397,6 +401,7 @@ pow2_ceil_u64
 pow2_ceil_zu
 prng_lg_range
 prng_range
+prof_active
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
@@ -406,6 +411,7 @@ prof_backtrace
 prof_boot0
 prof_boot1
 prof_boot2
+prof_bt_count
 prof_dump_header
 prof_dump_open
 prof_free
@@ -432,6 +438,7 @@ prof_tctx_get
 prof_tctx_reset
 prof_tctx_set
 prof_tdata_cleanup
+prof_tdata_count
 prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
@@ -519,6 +526,13 @@ ticker_tick
 ticker_ticks
 tsd_arena_get
 tsd_arena_set
+tsd_arenap_get
+tsd_arenas_tdata_bypass_get
+tsd_arenas_tdata_bypass_set
+tsd_arenas_tdata_bypassp_get
+tsd_arenas_tdata_get
+tsd_arenas_tdata_set
+tsd_arenas_tdatap_get
 tsd_boot
 tsd_boot0
 tsd_boot1
@@ -536,19 +550,28 @@ tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
+tsd_prof_tdatap_get
 tsd_quarantine_get
 tsd_quarantine_set
+tsd_quarantinep_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
+tsd_tcache_enabledp_get
 tsd_tcache_get
 tsd_tcache_set
+tsd_tcachep_get
 tsd_thread_allocated_get
 tsd_thread_allocated_set
+tsd_thread_allocatedp_get
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
+tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_witnesses_get
+tsd_witnesses_set
+tsd_witnessesp_get
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
diff --git a/src/arena.c b/src/arena.c
index cc648e31..48fa93cb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -156,7 +156,7 @@ run_quantize_ceil_compute(size_t size)
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
 #endif
 static size_t
 run_quantize_floor(size_t size)
@@ -174,12 +174,12 @@ run_quantize_floor(size_t size)
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#define	run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil)
 #endif
 static size_t
 run_quantize_ceil(size_t size)
@@ -197,7 +197,7 @@ run_quantize_ceil(size_t size)
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
-run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
 static arena_run_heap_t *
@@ -2271,7 +2271,7 @@ arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 
 #ifdef JEMALLOC_JET
 #undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
+#define	arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption)
 #endif
 static void
 arena_redzone_corruption(void *ptr, size_t usize, bool after,
@@ -2286,7 +2286,7 @@ arena_redzone_corruption(void *ptr, size_t usize, bool after,
 #undef arena_redzone_corruption
 #define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
 arena_redzone_corruption_t *arena_redzone_corruption =
-    JEMALLOC_N(arena_redzone_corruption_impl);
+    JEMALLOC_N(n_arena_redzone_corruption);
 #endif
 
 static void
@@ -2327,7 +2327,7 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
+#define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
 arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
@@ -2342,7 +2342,7 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 #undef arena_dalloc_junk_small
 #define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 arena_dalloc_junk_small_t *arena_dalloc_junk_small =
-    JEMALLOC_N(arena_dalloc_junk_small_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
 
 void
@@ -2780,7 +2780,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
+#define	arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large)
 #endif
 void
 arena_dalloc_junk_large(void *ptr, size_t usize)
@@ -2793,7 +2793,7 @@ arena_dalloc_junk_large(void *ptr, size_t usize)
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
 arena_dalloc_junk_large_t *arena_dalloc_junk_large =
-    JEMALLOC_N(arena_dalloc_junk_large_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_large);
 #endif
 
 static void
@@ -2981,7 +2981,7 @@ label_fail:
 
 #ifdef JEMALLOC_JET
 #undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
+#define	arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large)
 #endif
 static void
 arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
@@ -2996,7 +2996,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 #undef arena_ralloc_junk_large
 #define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
 arena_ralloc_junk_large_t *arena_ralloc_junk_large =
-    JEMALLOC_N(arena_ralloc_junk_large_impl);
+    JEMALLOC_N(n_arena_ralloc_junk_large);
 #endif
 
 /*
diff --git a/src/nstime.c b/src/nstime.c
index 4cf90b58..26e49dc5 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -99,7 +99,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(nstime_update_impl)
+#define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
 nstime_update(nstime_t *time)
@@ -144,5 +144,5 @@ nstime_update(nstime_t *time)
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(nstime_update)
-nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl);
+nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
 #endif

From b6e07d2389d97c98c353fea4f04ec68a7d5a13a0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 18 Apr 2016 15:42:09 -0700
Subject: [PATCH 50/82] Fix malloc_mutex_assert_[not_]owner() for
 --enable-lazy-lock case.

---
 include/jemalloc/internal/mutex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 581aaf57..5ddae11c 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -124,7 +124,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	if (config_debug)
+	if (isthreaded && config_debug)
 		witness_assert_owner(tsd, &mutex->witness);
 }
 
@@ -132,7 +132,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	if (config_debug)
+	if (isthreaded && config_debug)
 		witness_assert_not_owner(tsd, &mutex->witness);
 }
 #endif

From eb68842dafb6d86e8310b1db9ca0467d577091b1 Mon Sep 17 00:00:00 2001
From: hitstergtd <hitstergtd@users.noreply.github.com>
Date: Mon, 18 Apr 2016 11:08:39 +0100
Subject: [PATCH 51/82] Doc typo fixes.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 63088cd1..9814c226 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1024,7 +1024,7 @@ for (i = 0; i < nbins; i++) {
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
         <function>atexit<parameter/></function>, so this option is not
-        univerally usable (though the application can register its own
+        universally usable (though the application can register its own
         <function>atexit<parameter/></function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
@@ -1328,7 +1328,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         option.  Note that <function>atexit<parameter/></function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit<parameter/></function>, so
-        this option is not univerally usable (though the application can
+        this option is not universally usable (though the application can
         register its own <function>atexit<parameter/></function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
@@ -2070,7 +2070,7 @@ typedef struct {
           [<option>--enable-prof</option>]
         </term>
         <listitem><para>Average number of bytes allocated between
-        inverval-based profile dumps.  See the
+        interval-based profile dumps.  See the
         <link
         linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
         option for additional information.</para></listitem>

From c9a4bf91702b351e73e2cd7cf9125afd076d59fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:36:48 -0700
Subject: [PATCH 52/82] Reduce a variable scope.

---
 src/arena.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 48fa93cb..15023cf9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3320,7 +3320,6 @@ arena_new(tsd_t *tsd, unsigned ind)
 	arena_t *arena;
 	size_t arena_size;
 	unsigned i;
-	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
 	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
@@ -3411,7 +3410,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin = &arena->bins[i];
+		arena_bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);

From 66cd953514a18477eb49732e40d5c2ab5f1b12c5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:34:14 -0700
Subject: [PATCH 53/82] Do not allocate metadata via non-auto arenas, nor
 tcaches.

This assures that all internally allocated metadata come from the
first opt_narenas arenas, i.e. the automatically multiplexed arenas.
---
 include/jemalloc/internal/arena.h             |  28 +++--
 include/jemalloc/internal/huge.h              |   7 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  21 +++-
 include/jemalloc/internal/private_symbols.txt |   2 +
 include/jemalloc/internal/tcache.h            |  10 +-
 include/jemalloc/internal/tsd.h               |   2 +
 src/arena.c                                   |  29 +++--
 src/ckh.c                                     |  16 +--
 src/ctl.c                                     |   2 +-
 src/huge.c                                    |  28 ++---
 src/jemalloc.c                                | 112 ++++++++++++------
 src/prof.c                                    |  42 +++----
 src/quarantine.c                              |   8 +-
 src/tcache.c                                  |  20 ++--
 14 files changed, 192 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 2130e9a0..103a4c91 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -290,10 +290,18 @@ struct arena_s {
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * synchronized via atomic operations.
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * created via the arenas.extend mallctl, because the arena.<i>.reset
+	 * mallctl indiscriminately discards all allocations for the affected
+	 * arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
 	 */
-	unsigned		nthreads;
+	unsigned		nthreads[2];
 
 	/*
 	 * There are three classes of arena operations from a locking
@@ -541,7 +549,7 @@ void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
 void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
 void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache);
+    bool zero);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
@@ -583,9 +591,9 @@ void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
     malloc_huge_stats_t *hstats);
-unsigned	arena_nthreads_get(arena_t *arena);
-void	arena_nthreads_inc(arena_t *arena);
-void	arena_nthreads_dec(arena_t *arena);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
 void	arena_prefork(tsd_t *tsd, arena_t *arena);
@@ -1320,7 +1328,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
+	return (arena_malloc_hard(tsd, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1426,7 +1434,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsd, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1477,7 +1485,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsd, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index f19d3368..9de2055d 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,10 +9,9 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache);
+void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero);
 void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache);
+    bool zero);
 bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
@@ -21,7 +20,7 @@ void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
 typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	huge_dalloc(tsd_t *tsd, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
 size_t	huge_salloc(tsd_t *tsd, const void *ptr);
 prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ddceabca..fe58c1c6 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -443,6 +443,9 @@ extern bool	in_valgrind;
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -469,10 +472,11 @@ void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
 arena_t	*arena_init(tsd_t *tsd, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd);
+arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
+void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
 void	narenas_tdata_cleanup(tsd_t *tsd);
@@ -546,7 +550,7 @@ size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
-arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_choose(tsd_t *tsd, arena_t *arena, bool internal);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
@@ -784,15 +788,16 @@ sa2u(size_t size, size_t alignment)
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
+arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
 	if (arena != NULL)
 		return (arena);
 
-	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
-		ret = arena_choose_hard(tsd);
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL))
+		ret = arena_choose_hard(tsd, internal);
 
 	return (ret);
 }
@@ -935,6 +940,8 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	void *ret;
 
 	assert(size != 0);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
 	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
@@ -982,6 +989,8 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
 	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -1052,6 +1061,8 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 {
 
 	assert(ptr != NULL);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
 		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
 		    config_prof));
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c8799cba..eacc7c62 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -286,6 +286,7 @@ huge_ralloc_no_move
 huge_salloc
 iaalloc
 iallocztm
+iarena_cleanup
 icalloc
 icalloct
 idalloc
@@ -342,6 +343,7 @@ malloc_write
 map_bias
 map_misc_offset
 mb_write
+narenas_auto
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 1aa64631..82724304 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -293,7 +293,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
-		arena = arena_choose(tsd, arena);
+		arena = arena_choose(tsd, arena, false);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -354,7 +354,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		arena = arena_choose(tsd, arena);
+		arena = arena_choose(tsd, arena, false);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -459,8 +459,10 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
-	if (unlikely(elm->tcache == NULL))
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL,
+		    false));
+	}
 	return (elm->tcache);
 }
 #endif
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index b23b3b4c..1a1b5c32 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -536,6 +536,7 @@ struct tsd_init_head_s {
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
+    O(iarena,			arena_t *)				\
     O(arena,			arena_t *)				\
     O(arenas_tdata,		arena_tdata_t *)			\
     O(narenas_tdata,		unsigned)				\
@@ -552,6 +553,7 @@ struct tsd_init_head_s {
     NULL,								\
     NULL,								\
     NULL,								\
+    NULL,								\
     0,									\
     false,								\
     tcache_enabled_default,						\
diff --git a/src/arena.c b/src/arena.c
index 15023cf9..0da832e2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2478,10 +2478,10 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 void *
 arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache)
+    bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2489,7 +2489,7 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 		return (arena_malloc_small(tsd, arena, ind, zero));
 	if (likely(size <= large_maxclass))
 		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
+	return (huge_malloc(tsd, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
@@ -2506,7 +2506,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2606,10 +2606,9 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 			ret = arena_palloc_large(tsd, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero, tcache);
+			ret = huge_malloc(tsd, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero,
-			    tcache);
+			ret = huge_palloc(tsd, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
@@ -3211,7 +3210,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty)
 {
 
-	*nthreads += arena_nthreads_get(arena);
+	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
 	*decay_time = arena->decay_time;
@@ -3294,24 +3293,24 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 }
 
 unsigned
-arena_nthreads_get(arena_t *arena)
+arena_nthreads_get(arena_t *arena, bool internal)
 {
 
-	return (atomic_read_u(&arena->nthreads));
+	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
-arena_nthreads_inc(arena_t *arena)
+arena_nthreads_inc(arena_t *arena, bool internal)
 {
 
-	atomic_add_u(&arena->nthreads, 1);
+	atomic_add_u(&arena->nthreads[internal], 1);
 }
 
 void
-arena_nthreads_dec(arena_t *arena)
+arena_nthreads_dec(arena_t *arena, bool internal)
 {
 
-	atomic_sub_u(&arena->nthreads, 1);
+	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 arena_t *
@@ -3338,7 +3337,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 		return (NULL);
 
 	arena->ind = ind;
-	arena->nthreads = 0;
+	arena->nthreads[0] = arena->nthreads[1] = 0;
 	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
diff --git a/src/ckh.c b/src/ckh.c
index 07b49dd2..aa9803e8 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -271,7 +271,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			goto label_return;
 		}
 		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, NULL);
+		    true, arena_choose(tsd, NULL, true));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+			idalloctm(tsd, tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -315,7 +315,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	    arena_choose(tsd, NULL, true));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	    arena_choose(tsd, NULL, true));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
diff --git a/src/ctl.c b/src/ctl.c
index 50faee7b..fad2fdd7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1304,7 +1304,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	oldarena = arena_choose(tsd, NULL);
+	oldarena = arena_choose(tsd, NULL, false);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
diff --git a/src/huge.c b/src/huge.c
index 3a802dee..bac2425f 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -31,18 +31,17 @@ huge_node_unset(const void *ptr, const extent_node_t *node)
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache)
+huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
+	return (huge_palloc(tsd, arena, usize, chunksize, zero));
 }
 
 void *
 huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache)
+    bool zero)
 {
 	void *ret;
 	size_t ausize;
@@ -58,7 +57,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, tcache, true, arena);
+	    CACHELINE, false, NULL, true, arena_choose(tsd, NULL, true));
 	if (node == NULL)
 		return (NULL);
 
@@ -67,10 +66,10 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, tcache, true, true);
+		idalloctm(tsd, node, NULL, true, true);
 		return (NULL);
 	}
 
@@ -78,7 +77,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	if (huge_node_set(tsd, ret, node)) {
 		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
-		idalloctm(tsd, node, tcache, true, true);
+		idalloctm(tsd, node, NULL, true, true);
 		return (NULL);
 	}
 
@@ -331,12 +330,12 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 static void *
 huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+    size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero, tcache));
-	return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
+		return (huge_malloc(tsd, arena, usize, zero));
+	return (huge_palloc(tsd, arena, usize, alignment, zero));
 }
 
 void *
@@ -358,8 +357,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
-	    tcache);
+	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero);
 	if (ret == NULL)
 		return (NULL);
 
@@ -370,7 +368,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+huge_dalloc(tsd_t *tsd, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -386,7 +384,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, tcache, true, true);
+	idalloctm(tsd, node, NULL, true, true);
 
 	arena_decay_tick(tsd, arena);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7543dff1..3bd39c3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -60,7 +60,7 @@ static malloc_mutex_t	arenas_lock;
 arena_t			**arenas;
 static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
-static unsigned		narenas_auto; /* Read-only after initialization. */
+unsigned		narenas_auto; /* Read-only after initialization. */
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
@@ -318,8 +318,8 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(NULL, 0, false), true));
+	return (iallocztm(NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(NULL, 0, true), true));
 }
 
 static void
@@ -451,15 +451,19 @@ arena_init(tsd_t *tsd, unsigned ind)
 }
 
 static void
-arena_bind(tsd_t *tsd, unsigned ind)
+arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
 	arena = arena_get(tsd, ind, false);
-	arena_nthreads_inc(arena);
+	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd))
-		tsd_arena_set(tsd, arena);
+	if (tsd_nominal(tsd)) {
+		if (internal)
+			tsd_iarena_set(tsd, arena);
+		else
+			tsd_arena_set(tsd, arena);
+	}
 }
 
 void
@@ -469,19 +473,22 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 
 	oldarena = arena_get(tsd, oldind, false);
 	newarena = arena_get(tsd, newind, false);
-	arena_nthreads_dec(oldarena);
-	arena_nthreads_inc(newarena);
+	arena_nthreads_dec(oldarena, false);
+	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 }
 
 static void
-arena_unbind(tsd_t *tsd, unsigned ind)
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
 	arena = arena_get(tsd, ind, false);
-	arena_nthreads_dec(arena);
-	tsd_arena_set(tsd, NULL);
+	arena_nthreads_dec(arena, internal);
+	if (internal)
+		tsd_iarena_set(tsd, NULL);
+	else
+		tsd_arena_set(tsd, NULL);
 }
 
 arena_tdata_t *
@@ -562,14 +569,24 @@ label_return:
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
-arena_choose_hard(tsd_t *tsd)
+arena_choose_hard(tsd_t *tsd, bool internal)
 {
-	arena_t *ret;
+	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (narenas_auto > 1) {
-		unsigned i, choose, first_null;
+		unsigned i, j, choose[2], first_null;
+
+		/*
+		 * Determine binding for both non-internal and internal
+		 * allocation.
+		 *
+		 *   choose[0]: For application allocation.
+		 *   choose[1]: For internal metadata allocation.
+		 */
+
+		for (j = 0; j < 2; j++)
+			choose[j] = 0;
 
-		choose = 0;
 		first_null = narenas_auto;
 		malloc_mutex_lock(tsd, &arenas_lock);
 		assert(arena_get(tsd, 0, false) != NULL);
@@ -579,10 +596,13 @@ arena_choose_hard(tsd_t *tsd)
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(tsd, i, false))
-				    < arena_nthreads_get(arena_get(tsd, choose,
-				    false)))
-					choose = i;
+				for (j = 0; j < 2; j++) {
+					if (arena_nthreads_get(arena_get(tsd, i,
+					    false), !!j) <
+					    arena_nthreads_get(arena_get(tsd,
+					    choose[j], false), !!j))
+						choose[j] = i;
+				}
 			} else if (first_null == narenas_auto) {
 				/*
 				 * Record the index of the first uninitialized
@@ -597,27 +617,35 @@ arena_choose_hard(tsd_t *tsd)
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(tsd, choose, false)) == 0
-		    || first_null == narenas_auto) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arena_get(tsd, choose, false);
-		} else {
-			/* Initialize a new arena. */
-			choose = first_null;
-			ret = arena_init_locked(tsd, choose);
-			if (ret == NULL) {
-				malloc_mutex_unlock(tsd, &arenas_lock);
-				return (NULL);
+		for (j = 0; j < 2; j++) {
+			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
+			    !!j) == 0 || first_null != narenas_auto) {
+				/*
+				 * Use an unloaded arena, or the least loaded
+				 * arena if all arenas are already initialized.
+				 */
+				if (!!j == internal)
+					ret = arena_get(tsd, choose[j], false);
+			} else {
+				arena_t *arena;
+
+				/* Initialize a new arena. */
+				choose[j] = first_null;
+				arena = arena_init_locked(tsd, choose[j]);
+				if (arena == NULL) {
+					malloc_mutex_unlock(tsd, &arenas_lock);
+					return (NULL);
+				}
+				if (!!j == internal)
+					ret = arena;
 			}
+			arena_bind(tsd, choose[j], !!j);
 		}
-		arena_bind(tsd, choose);
 		malloc_mutex_unlock(tsd, &arenas_lock);
 	} else {
 		ret = arena_get(tsd, 0, false);
-		arena_bind(tsd, 0);
+		arena_bind(tsd, 0, false);
+		arena_bind(tsd, 0, true);
 	}
 
 	return (ret);
@@ -637,6 +665,16 @@ thread_deallocated_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
+void
+iarena_cleanup(tsd_t *tsd)
+{
+	arena_t *iarena;
+
+	iarena = tsd_iarena_get(tsd);
+	if (iarena != NULL)
+		arena_unbind(tsd, iarena->ind, true);
+}
+
 void
 arena_cleanup(tsd_t *tsd)
 {
@@ -644,7 +682,7 @@ arena_cleanup(tsd_t *tsd)
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL)
-		arena_unbind(tsd, arena->ind);
+		arena_unbind(tsd, arena->ind, false);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index 520bf90a..82604632 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -554,7 +554,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, tcache_get(tsd, true), true, NULL, true);
+	    size2index(size), false, NULL, true, arena_get(NULL, 0, true),
+	    true);
 	if (gctx == NULL)
 		return (NULL);
 	gctx->lock = prof_gctx_mutex_choose();
@@ -595,7 +596,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd, gctx->lock);
-		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -706,7 +707,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, tctx, NULL, true, true);
 }
 
 static bool
@@ -735,8 +736,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
-			    true);
+			idalloctm(tsd, gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -780,7 +780,6 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 	malloc_mutex_unlock(tsd, tdata->lock);
 	if (not_found) {
-		tcache_t *tcache;
 		void *btkey;
 		prof_gctx_t *gctx;
 		bool new_gctx, error;
@@ -794,10 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		tcache = tcache_get(tsd, true);
 		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
-		    size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
-		    true);
+		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_choose(tsd, NULL, true), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -817,7 +815,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, tcache, true, true);
+			idalloctm(tsd, ret.v, NULL, true, true);
 			return (NULL);
 		}
 		malloc_mutex_lock(tsd, gctx->lock);
@@ -1238,8 +1236,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy,
-					    tcache_get(tsd, false), true, true);
+					idalloctm(tsd, to_destroy, NULL, true,
+					    true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1771,14 +1769,13 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
-	tcache_t *tcache;
 
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tcache = tcache_get(tsd, true);
 	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(NULL,
+	    0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1792,7 +1789,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, tcache, true, true);
+		idalloctm(tsd, tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1848,7 +1845,6 @@ static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-	tcache_t *tcache;
 
 	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
 
@@ -1859,11 +1855,10 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
 	    even_if_attached));
 
-	tcache = tcache_get(tsd, false);
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, tcache, true, true);
+		idalloctm(tsd, tdata->thread_name, NULL, true, true);
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, tcache, true, true);
+	idalloctm(tsd, tdata, NULL, true, true);
 }
 
 static void
@@ -2023,8 +2018,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
-	    true), true, NULL, true);
+	ret = iallocztm(tsd, size, size2index(size), false, NULL, true,
+	    arena_get(NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -2056,8 +2051,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
-		    true, true);
+		idalloctm(tsd, tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
diff --git a/src/quarantine.c b/src/quarantine.c
index 6cb74b37..ff1637ec 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -30,7 +30,7 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
 	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, tcache_get(tsd, true), true, NULL, true);
+	    false, NULL, true, arena_get(NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -57,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, quarantine, NULL, true, true);
 }
 
 static quarantine_t *
@@ -89,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
@@ -179,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd)
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
 		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index a9539f64..ca867c72 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -97,7 +97,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
@@ -179,7 +179,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
@@ -307,7 +307,7 @@ tcache_get_hard(tsd_t *tsd)
 			tcache_enabled_set(false); /* Memoize. */
 		return (NULL);
 	}
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 	return (tcache_create(tsd, arena));
@@ -328,8 +328,8 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(tsd, 0, false));
+	tcache = ipallocztm(tsd, size, CACHELINE, true, NULL, true,
+	    arena_get(NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
@@ -359,7 +359,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	arena_t *arena;
 	unsigned i;
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
@@ -391,7 +391,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
 		prof_idump(tsd);
 
-	idalloctm(tsd, tcache, false, true, true);
+	idalloctm(tsd, tcache, NULL, true, true);
 }
 
 void
@@ -446,6 +446,7 @@ tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 bool
 tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
+	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
@@ -458,7 +459,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(tsd, 0, false));
+	arena = arena_choose(tsd, NULL, true);
+	if (unlikely(arena == NULL))
+		return (true);
+	tcache = tcache_create(tsd, arena);
 	if (tcache == NULL)
 		return (true);
 

From 19ff2cefba48d1ddab8fb52e3d78f309ca2553cf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:37:17 -0700
Subject: [PATCH 54/82] Implement the arena.<i>.reset mallctl.

This makes it possible to discard all of an arena's allocations in a
single operation.

This resolves #146.
---
 Makefile.in                                   |   4 +-
 doc/jemalloc.xml.in                           |  17 ++
 include/jemalloc/internal/arena.h             |   5 +
 include/jemalloc/internal/extent.h            |   2 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   | 225 +++++++++++++++---
 src/ctl.c                                     |  36 +++
 test/unit/arena_reset.c                       | 160 +++++++++++++
 8 files changed, 411 insertions(+), 39 deletions(-)
 create mode 100644 test/unit/arena_reset.c

diff --git a/Makefile.in b/Makefile.in
index a872eb5f..ddc89157 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,7 +135,9 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+TESTS_UNIT := \
+	$(srcroot)test/unit/arena_reset.c \
+	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 9814c226..7b602a51 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1558,6 +1558,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.reset">
+        <term>
+          <mallctl>arena.&lt;i&gt;.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Discard all of the arena's extant allocations.  This
+        interface can only be used with arenas created via <link
+        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        of the arena's discarded/cached allocations may accessed afterward.  As
+        part of this requirement, all thread caches which were used to
+        allocate/deallocate in conjunction with the arena must be flushed
+        beforehand.  This interface cannot be used if running inside Valgrind,
+        nor if the <link linkend="opt.quarantine">quarantine</link> size is
+        non-zero.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 103a4c91..f2685f6f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -330,6 +330,10 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
+
+	/* Extant arena chunks. */
+	ql_head(extent_node_t)	achunks;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
@@ -533,6 +537,7 @@ ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
 bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
 void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 386d50ef..49d76a57 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,7 +48,7 @@ struct extent_node_s {
 		/* Linkage for the size/address-ordered tree. */
 		rb_node(extent_node_t)	szad_link;
 
-		/* Linkage for arena's huge and node_cache lists. */
+		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
 	};
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index eacc7c62..c7ff8529 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -100,6 +100,7 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_reset
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
diff --git a/src/arena.c b/src/arena.c
index 0da832e2..f752acad 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -738,14 +738,61 @@ arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 			return (NULL);
 	}
 
+	ql_elm_new(&chunk->node, ql_link);
+	ql_tail_insert(&arena->achunks, &chunk->node, ql_link);
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
 }
 
+static void
+arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	bool committed;
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+	chunk_deregister(chunk, &chunk->node);
+
+	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
+	if (!committed) {
+		/*
+		 * Decommit the header.  Mark the chunk as decommitted even if
+		 * header decommit fails, since treating a partially committed
+		 * chunk as committed has a high potential for causing later
+		 * access of decommitted memory.
+		 */
+		chunk_hooks = chunk_hooks_get(tsd, arena);
+		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
+		    arena->ind);
+	}
+
+	chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)chunk, chunksize,
+	    committed);
+
+	if (config_stats) {
+		arena->stats.mapped -= chunksize;
+		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
+	}
+}
+
+static void
+arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
+{
+
+	assert(arena->spare != spare);
+
+	if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
+		arena_run_dirty_remove(arena, spare, map_bias,
+		    chunk_npages-map_bias);
+	}
+
+	arena_chunk_discard(tsd, arena, spare);
+}
+
 static void
 arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 {
+	arena_chunk_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
@@ -761,43 +808,11 @@ arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	if (arena->spare != NULL) {
-		arena_chunk_t *spare = arena->spare;
-		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-		bool committed;
-
-		arena->spare = chunk;
-		if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
-			arena_run_dirty_remove(arena, spare, map_bias,
-			    chunk_npages-map_bias);
-		}
-
-		chunk_deregister(spare, &spare->node);
-
-		committed = (arena_mapbits_decommitted_get(spare, map_bias) ==
-		    0);
-		if (!committed) {
-			/*
-			 * Decommit the header.  Mark the chunk as decommitted
-			 * even if header decommit fails, since treating a
-			 * partially committed chunk as committed has a high
-			 * potential for causing later access of decommitted
-			 * memory.
-			 */
-			chunk_hooks = chunk_hooks_get(tsd, arena);
-			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
-		}
-
-		chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)spare,
-		    chunksize, committed);
-
-		if (config_stats) {
-			arena->stats.mapped -= chunksize;
-			arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
-		}
-	} else
-		arena->spare = chunk;
+	ql_remove(&arena->achunks, &chunk->node, ql_link);
+	spare = arena->spare;
+	arena->spare = chunk;
+	if (spare != NULL)
+		arena_spare_discard(tsd, arena, spare);
 }
 
 static void
@@ -1802,6 +1817,140 @@ arena_purge(tsd_t *tsd, arena_t *arena, bool all)
 	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
+static void
+arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	size_t pageind, npages;
+
+	cassert(config_prof);
+	assert(opt_prof);
+
+	/*
+	 * Iterate over the allocated runs and remove profiled allocations from
+	 * the sample set.
+	 */
+	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
+		if (arena_mapbits_allocated_get(chunk, pageind) != 0) {
+			if (arena_mapbits_large_get(chunk, pageind) != 0) {
+				void *ptr = (void *)((uintptr_t)chunk + (pageind
+				    << LG_PAGE));
+				size_t usize = isalloc(tsd, ptr, config_prof);
+
+				prof_free(tsd, ptr, usize);
+				npages = arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
+			} else {
+				/* Skip small run. */
+				size_t binind = arena_mapbits_binind_get(chunk,
+				    pageind);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				npages = bin_info->run_size >> LG_PAGE;
+			}
+		} else {
+			/* Skip unallocated run. */
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
+		}
+		assert(pageind + npages <= chunk_npages);
+	}
+}
+
+void
+arena_reset(tsd_t *tsd, arena_t *arena)
+{
+	unsigned i;
+	extent_node_t *node;
+
+	/*
+	 * Locking in this function is unintuitive.  The caller guarantees that
+	 * no concurrent operations are happening in this arena, but there are
+	 * still reasons that some locking is necessary:
+	 *
+	 * - Some of the functions in the transitive closure of calls assume
+	 *   appropriate locks are held, and in some cases these locks are
+	 *   temporarily dropped to avoid lock order reversal or deadlock due to
+	 *   reentry.
+	 * - mallctl("epoch", ...) may concurrently refresh stats.  While
+	 *   strictly speaking this is a "concurrent operation", disallowing
+	 *   stats refreshes would impose an inconvenient burden.
+	 */
+
+	/* Remove large allocations from prof sample set. */
+	if (config_prof && opt_prof) {
+		ql_foreach(node, &arena->achunks, ql_link) {
+			arena_achunk_prof_reset(tsd, arena,
+			    extent_node_addr_get(node));
+		}
+	}
+
+	/* Huge allocations. */
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
+	    ql_last(&arena->huge, ql_link)) {
+		void *ptr = extent_node_addr_get(node);
+
+		malloc_mutex_unlock(tsd, &arena->huge_mtx);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof) {
+			size_t usize;
+
+			usize = isalloc(tsd, ptr, config_prof);
+			prof_free(tsd, ptr, usize);
+		}
+		huge_dalloc(tsd, ptr);
+		malloc_mutex_lock(tsd, &arena->huge_mtx);
+	}
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+
+	malloc_mutex_lock(tsd, &arena->lock);
+
+	/* Bins. */
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		malloc_mutex_lock(tsd, &bin->lock);
+		bin->runcur = NULL;
+		arena_run_heap_new(&bin->runs);
+		if (config_stats) {
+			bin->stats.curregs = 0;
+			bin->stats.curruns = 0;
+		}
+		malloc_mutex_unlock(tsd, &bin->lock);
+	}
+
+	/*
+	 * Re-initialize runs_dirty such that the chunks_cache and runs_dirty
+	 * chains directly correspond.
+	 */
+	qr_new(&arena->runs_dirty, rd_link);
+	for (node = qr_next(&arena->chunks_cache, cc_link);
+	    node != &arena->chunks_cache; node = qr_next(node, cc_link)) {
+		qr_new(&node->rd, rd_link);
+		qr_meld(&arena->runs_dirty, &node->rd, rd_link);
+	}
+
+	/* Arena chunks. */
+	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
+	    ql_last(&arena->achunks, ql_link)) {
+		ql_remove(&arena->achunks, node, ql_link);
+		arena_chunk_discard(tsd, arena, extent_node_addr_get(node));
+	}
+
+	/* Spare. */
+	if (arena->spare != NULL) {
+		arena_chunk_discard(tsd, arena, arena->spare);
+		arena->spare = NULL;
+	}
+
+	assert(!arena->purging);
+	arena->nactive = 0;
+
+	for(i = 0; i < runs_avail_nclasses; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
+	malloc_mutex_unlock(tsd, &arena->lock);
+}
+
 static void
 arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
@@ -3373,6 +3522,8 @@ arena_new(tsd_t *tsd, unsigned ind)
 
 	arena->dss_prec = chunk_dss_prec_get(tsd);
 
+	ql_new(&arena->achunks);
+
 	arena->spare = NULL;
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
diff --git a/src/ctl.c b/src/ctl.c
index fad2fdd7..2e811430 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -120,6 +120,7 @@ CTL_PROTO(tcache_destroy)
 static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
+CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
@@ -299,6 +300,7 @@ static const ctl_named_node_t	tcache_node[] = {
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
+	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
@@ -1602,6 +1604,40 @@ label_return:
 	return (ret);
 }
 
+static int
+arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	READONLY();
+	WRITEONLY();
+
+	if ((config_valgrind && unlikely(in_valgrind)) || (config_fill &&
+	    unlikely(opt_quarantine))) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	arena_ind = (unsigned)mib[1];
+	if (config_debug) {
+		malloc_mutex_lock(tsd, &ctl_mtx);
+		assert(arena_ind < ctl_stats.narenas);
+		malloc_mutex_unlock(tsd, &ctl_mtx);
+	}
+	assert(arena_ind >= opt_narenas);
+
+	arena = arena_get(tsd, arena_ind, false);
+
+	arena_reset(tsd, arena);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
new file mode 100644
index 00000000..52170cc4
--- /dev/null
+++ b/test/unit/arena_reset.c
@@ -0,0 +1,160 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#endif
+
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+	return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+	return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_arena_reset)
+{
+#define	NHUGE	4
+	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
+	size_t sz, miblen;
+	void **ptrs;
+	size_t mib[3];
+	tsd_t *tsd;
+
+	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
+	    && unlikely(opt_quarantine)));
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	nsmall = get_nsmall();
+	nlarge = get_nlarge();
+	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
+	nptrs = nsmall + nlarge + nhuge;
+	ptrs = (void **)malloc(nptrs * sizeof(void *));
+	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
+
+	/* Allocate objects with a wide range of sizes. */
+	for (i = 0; i < nsmall; i++) {
+		sz = get_small_size(i);
+		ptrs[i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+	for (i = 0; i < nlarge; i++) {
+		sz = get_large_size(i);
+		ptrs[nsmall + i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+	for (i = 0; i < nhuge; i++) {
+		sz = get_huge_size(i);
+		ptrs[nsmall + nlarge + i] = mallocx(sz,
+		    MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+
+	tsd = tsd_fetch();
+
+	/* Verify allocations. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_gt(ivsalloc(tsd, ptrs[i], false), 0,
+		    "Allocation should have queryable size");
+	}
+
+	/* Reset. */
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	/* Verify allocations no longer exist. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_eq(ivsalloc(tsd, ptrs[i], false), 0,
+		    "Allocation should no longer exist");
+	}
+
+	free(ptrs);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_arena_reset));
+}

From 71d94828a2fa807054ea8c01486667c4bd7649b1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 21:27:17 -0700
Subject: [PATCH 55/82] Fix degenerate mb_write() compilation error.

This resolves #375.
---
 include/jemalloc/internal/mb.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 81129d08..437c86f7 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -42,7 +42,7 @@ mb_write(void)
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#else
+#  else
 	/*
 	 * This is hopefully enough to keep the compiler from reordering
 	 * instructions around this one.
@@ -52,7 +52,7 @@ mb_write(void)
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#endif
+#  endif
 }
 #elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
@@ -104,7 +104,7 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx, WITNESS_RANK_OMIT);
+	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
 	malloc_mutex_lock(NULL, &mtx);
 	malloc_mutex_unlock(NULL, &mtx);
 }

From 259f8ebbfc025eec17695c3d14019f17e414791f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 22:21:31 -0700
Subject: [PATCH 56/82] Fix arena_choose_hard() regression.

This regression was caused by 66cd953514a18477eb49732e40d5c2ab5f1b12c5
(Do not allocate metadata via non-auto arenas, nor tcaches.).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3bd39c3c..8b744e68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -619,7 +619,7 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 
 		for (j = 0; j < 2; j++) {
 			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
-			    !!j) == 0 || first_null != narenas_auto) {
+			    !!j) == 0 || first_null == narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.

From 2fe64d237cf65baa9f6056622e896949933355e5 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 12:51:17 -0700
Subject: [PATCH 57/82] Fix arena_reset() test to avoid tcache.

---
 test/unit/arena_reset.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 52170cc4..8e769de6 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -84,6 +84,7 @@ TEST_BEGIN(test_arena_reset)
 	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
 	size_t sz, miblen;
 	void **ptrs;
+	int flags;
 	size_t mib[3];
 	tsd_t *tsd;
 
@@ -94,6 +95,8 @@ TEST_BEGIN(test_arena_reset)
 	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
 	nsmall = get_nsmall();
 	nlarge = get_nlarge();
 	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
@@ -104,25 +107,21 @@ TEST_BEGIN(test_arena_reset)
 	/* Allocate objects with a wide range of sizes. */
 	for (i = 0; i < nsmall; i++) {
 		sz = get_small_size(i);
-		ptrs[i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		ptrs[i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nlarge; i++) {
 		sz = get_large_size(i);
-		ptrs[nsmall + i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		ptrs[nsmall + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nhuge; i++) {
 		sz = get_huge_size(i);
-		ptrs[nsmall + nlarge + i] = mallocx(sz,
-		    MALLOCX_ARENA(arena_ind));
+		ptrs[nsmall + nlarge + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
 	tsd = tsd_fetch();

From 7e6749595a570ed6686603a1bcfdf8cf49147f19 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 13:26:54 -0700
Subject: [PATCH 58/82] Fix arena reset effects on large/huge stats.

Reset large curruns to 0 during arena reset.

Do not increase huge ndalloc stats during arena reset.
---
 src/arena.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f752acad..c6859e3b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -854,6 +854,17 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
 	arena->stats.hstats[index].curhchunks--;
 }
 
+static void
+arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
+{
+	szind_t index = size2index(usize) - nlclasses - NBINS;
+
+	cassert(config_stats);
+
+	arena->stats.ndalloc_huge++;
+	arena->stats.hstats[index].ndalloc--;
+}
+
 static void
 arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
 {
@@ -1884,22 +1895,30 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		}
 	}
 
+	/* Reset curruns for large size classes. */
+	if (config_stats) {
+		for (i = 0; i < nlclasses; i++)
+			arena->stats.lstats[i].curruns = 0;
+	}
+
 	/* Huge allocations. */
 	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
 	    ql_last(&arena->huge, ql_link)) {
 		void *ptr = extent_node_addr_get(node);
+		size_t usize;
 
 		malloc_mutex_unlock(tsd, &arena->huge_mtx);
-		/* Remove huge allocation from prof sample set. */
-		if (config_prof && opt_prof) {
-			size_t usize;
-
+		if (config_stats || (config_prof && opt_prof))
 			usize = isalloc(tsd, ptr, config_prof);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof)
 			prof_free(tsd, ptr, usize);
-		}
 		huge_dalloc(tsd, ptr);
 		malloc_mutex_lock(tsd, &arena->huge_mtx);
+		/* Cancel out unwanted effects on stats. */
+		if (config_stats)
+			arena_huge_reset_stats_cancel(arena, usize);
 	}
 	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 

From 0d970a054e5477cd6cf3639366bcc0a1a4f61b11 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 20:26:03 -0700
Subject: [PATCH 59/82] Use separate arena for chunk tests.

This assures that side effects of internal allocation don't impact
tests.
---
 test/integration/chunk.c | 73 +++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index af1c9a53..ff9bf967 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -121,6 +121,10 @@ TEST_BEGIN(test_chunk)
 {
 	void *p;
 	size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz;
+	unsigned arena_ind;
+	int flags;
+	size_t hooks_mib[3], purge_mib[3];
+	size_t hooks_miblen, purge_miblen;
 	chunk_hooks_t new_hooks = {
 		chunk_alloc,
 		chunk_dalloc,
@@ -132,10 +136,19 @@ TEST_BEGIN(test_chunk)
 	};
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
 	/* Install custom chunk hooks. */
+	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib,
+	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(chunk_hooks_t);
 	new_size = sizeof(chunk_hooks_t);
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
@@ -165,45 +178,49 @@ TEST_BEGIN(test_chunk)
 	    "Unexpected arenas.hchunk.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
+	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	purge_mib[1] = (size_t)arena_ind;
 	do_dalloc = false;
 	do_decommit = false;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_dalloc = false;
 	did_decommit = false;
 	did_purge = false;
 	did_split = false;
-	xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
 		assert_true(did_dalloc, "Expected dalloc");
 		assert_false(did_decommit, "Unexpected decommit");
 		assert_true(did_purge, "Expected purge");
 	}
 	assert_true(did_split, "Expected split");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 
 	/* Test decommit/commit and observe split/merge. */
 	do_dalloc = false;
 	do_decommit = true;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	did_split = false;
 	did_merge = false;
-	xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b)
 		assert_true(did_split, "Expected split");
-	xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2);
+	xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2);
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 	do_decommit = false;
 
@@ -214,42 +231,42 @@ TEST_BEGIN(test_chunk)
 		 * successful xallocx() from size=huge2 to size=huge1 is
 		 * guaranteed to leave trailing purgeable memory.
 		 */
-		p = mallocx(huge2, 0);
+		p = mallocx(huge2, flags);
 		assert_ptr_not_null(p, "Unexpected mallocx() error");
 		did_purge = false;
-		assert_zu_eq(xallocx(p, huge1, 0, 0), huge1,
+		assert_zu_eq(xallocx(p, huge1, 0, flags), huge1,
 		    "Unexpected xallocx() failure");
 		assert_true(did_purge, "Expected purge");
-		dallocx(p, 0);
+		dallocx(p, flags);
 	}
 
 	/* Test decommit for large allocations. */
 	do_decommit = true;
-	p = mallocx(large1, 0);
+	p = mallocx(large1, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_decommit = false;
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() failure");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_commit = false;
-	assert_zu_eq(xallocx(p, large1, 0, 0), large1,
+	assert_zu_eq(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() failure");
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_decommit = false;
 
 	/* Make sure non-huge allocation succeeds. */
-	p = mallocx(42, 0);
+	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	dallocx(p, 0);
+	dallocx(p, flags);
 
 	/* Restore chunk hooks. */
-	assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks,
-	    new_size), 0, "Unexpected chunk_hooks error");
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	    &old_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    NULL, 0), 0, "Unexpected chunk_hooks error");
 	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
 	    "Unexpected alloc error");

From 174c0c3a9c63b3a0bfa32381148b537e9b9af96d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 25 Apr 2016 23:14:40 -0700
Subject: [PATCH 60/82] Fix fork()-related lock rank ordering reversals.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena.h             |  5 +-
 include/jemalloc/internal/private_symbols.txt | 11 +++-
 include/jemalloc/internal/prof.h              |  3 +-
 include/jemalloc/internal/tsd.h               |  4 +-
 include/jemalloc/internal/witness.h           |  3 ++
 src/arena.c                                   | 32 +++++++++---
 src/jemalloc.c                                | 45 +++++++++++-----
 src/prof.c                                    | 52 +++++++++++++------
 src/witness.c                                 | 37 ++++++++++++-
 test/unit/fork.c                              | 39 ++++++++++++++
 11 files changed, 188 insertions(+), 44 deletions(-)
 create mode 100644 test/unit/fork.c

diff --git a/Makefile.in b/Makefile.in
index ddc89157..a98ebd62 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -141,6 +141,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f2685f6f..53e6b3ad 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -601,7 +601,10 @@ void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork(tsd_t *tsd, arena_t *arena);
+void	arena_prefork0(tsd_t *tsd, arena_t *arena);
+void	arena_prefork1(tsd_t *tsd, arena_t *arena);
+void	arena_prefork2(tsd_t *tsd, arena_t *arena);
+void	arena_prefork3(tsd_t *tsd, arena_t *arena);
 void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
 void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c7ff8529..0eb7778c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -84,7 +84,10 @@ arena_nthreads_inc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
-arena_prefork
+arena_prefork0
+arena_prefork1
+arena_prefork2
+arena_prefork3
 arena_prof_accum
 arena_prof_accum_impl
 arena_prof_accum_locked
@@ -432,7 +435,8 @@ prof_malloc_sample_object
 prof_mdump
 prof_postfork_child
 prof_postfork_parent
-prof_prefork
+prof_prefork0
+prof_prefork1
 prof_realloc
 prof_reset
 prof_sample_accum_update
@@ -583,11 +587,14 @@ valgrind_make_mem_undefined
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
+witness_fork_cleanup
 witness_init
 witness_lock
 witness_lock_error
 witness_lockless_error
 witness_not_owner_error
 witness_owner_error
+witness_postfork
+witness_prefork
 witness_unlock
 witnesses_cleanup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 047bd0b7..4fe17875 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -316,7 +316,8 @@ bool	prof_gdump_set(tsd_t *tsd, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork(tsd_t *tsd);
+void	prof_prefork0(tsd_t *tsd);
+void	prof_prefork1(tsd_t *tsd);
 void	prof_postfork_parent(tsd_t *tsd);
 void	prof_postfork_child(tsd_t *tsd);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 1a1b5c32..4a99ee6e 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -544,6 +544,7 @@ struct tsd_init_head_s {
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
     O(witnesses,		witness_list_t)				\
+    O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -558,7 +559,8 @@ struct tsd_init_head_s {
     false,								\
     tcache_enabled_default,						\
     NULL,								\
-    ql_head_initializer(witnesses)					\
+    ql_head_initializer(witnesses),					\
+    false								\
 }
 
 struct tsd_s {
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 22f0b2c7..ecdc034a 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -94,6 +94,9 @@ extern witness_lockless_error_t *witness_lockless_error;
 void	witness_assert_lockless(tsd_t *tsd);
 
 void	witnesses_cleanup(tsd_t *tsd);
+void	witness_fork_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index c6859e3b..969ad85d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3822,16 +3822,34 @@ arena_boot(void)
 }
 
 void
-arena_prefork(tsd_t *tsd, arena_t *arena)
+arena_prefork0(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->lock);
+}
+
+void
+arena_prefork1(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+}
+
+void
+arena_prefork2(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
+}
+
+void
+arena_prefork3(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_prefork(tsd, &arena->lock);
-	malloc_mutex_prefork(tsd, &arena->huge_mtx);
-	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
-	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsd, &arena->huge_mtx);
 }
 
 void
@@ -3839,11 +3857,11 @@ arena_postfork_parent(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
 	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
 	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
 	malloc_mutex_postfork_parent(tsd, &arena->lock);
 }
 
@@ -3852,10 +3870,10 @@ arena_postfork_child(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
 	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
 	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
 	malloc_mutex_postfork_child(tsd, &arena->lock);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8b744e68..a7acf5f7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2757,7 +2757,8 @@ _malloc_prefork(void)
 #endif
 {
 	tsd_t *tsd;
-	unsigned i, narenas;
+	unsigned i, j, narenas;
+	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
@@ -2767,18 +2768,32 @@ _malloc_prefork(void)
 
 	tsd = tsd_fetch();
 
-	/* Acquire all mutexes in a safe order. */
-	ctl_prefork(tsd);
-	prof_prefork(tsd);
-	malloc_mutex_prefork(tsd, &arenas_lock);
-	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-		arena_t *arena;
+	narenas = narenas_total_get();
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_prefork(tsd, arena);
+	/* Acquire all mutexes in a safe order. */
+	witness_prefork(tsd);
+	ctl_prefork(tsd);
+	malloc_mutex_prefork(tsd, &arenas_lock);
+	prof_prefork0(tsd);
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < narenas; j++) {
+			if ((arena = arena_get(tsd, j, false)) != NULL) {
+				switch (i) {
+				case 0: arena_prefork0(tsd, arena); break;
+				case 1: arena_prefork1(tsd, arena); break;
+				case 2: arena_prefork2(tsd, arena); break;
+				default: not_reached();
+				}
+			}
+		}
 	}
-	chunk_prefork(tsd);
 	base_prefork(tsd);
+	chunk_prefork(tsd);
+	for (i = 0; i < narenas; i++) {
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_prefork3(tsd, arena);
+	}
+	prof_prefork1(tsd);
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2801,17 +2816,18 @@ _malloc_postfork(void)
 	tsd = tsd_fetch();
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent(tsd);
 	chunk_postfork_parent(tsd);
+	base_postfork_parent(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd, i, false)) != NULL)
 			arena_postfork_parent(tsd, arena);
 	}
-	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	prof_postfork_parent(tsd);
+	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	ctl_postfork_parent(tsd);
+	witness_postfork(tsd);
 }
 
 void
@@ -2825,17 +2841,18 @@ jemalloc_postfork_child(void)
 	tsd = tsd_fetch();
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child(tsd);
 	chunk_postfork_child(tsd);
+	base_postfork_child(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd, i, false)) != NULL)
 			arena_postfork_child(tsd, arena);
 	}
-	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	prof_postfork_child(tsd);
+	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	ctl_postfork_child(tsd);
+	witness_postfork(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/prof.c b/src/prof.c
index 82604632..92edba84 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2257,20 +2257,32 @@ prof_boot2(tsd_t *tsd)
 }
 
 void
-prof_prefork(tsd_t *tsd)
+prof_prefork0(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_mtx);
 		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
-		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
-		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+		malloc_mutex_prefork(tsd, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
 			malloc_mutex_prefork(tsd, &tdata_locks[i]);
+		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+	}
+}
+
+void
+prof_prefork1(tsd_t *tsd)
+{
+
+	if (opt_prof) {
+		malloc_mutex_prefork(tsd, &prof_active_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsd, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsd, &prof_thread_active_init_mtx);
 	}
 }
 
@@ -2281,14 +2293,18 @@ prof_postfork_parent(tsd_t *tsd)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
 		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_mtx);
 	}
 }
 
@@ -2299,14 +2315,18 @@ prof_postfork_child(tsd_t *tsd)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
-		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
 		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_dump_mtx);
 	}
 }
 
diff --git a/src/witness.c b/src/witness.c
index 444d200f..b5384a29 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -48,9 +48,21 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
-	if (w != NULL && w->rank >= witness->rank && (w->comp == NULL ||
-	    w->comp != witness->comp || w->comp(w, witness) > 0))
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
 
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
@@ -194,3 +206,24 @@ witnesses_cleanup(tsd_t *tsd)
 
 	/* Do nothing. */
 }
+
+void
+witness_fork_cleanup(tsd_t *tsd)
+{
+
+	/* Do nothing. */
+}
+
+void
+witness_prefork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, true);
+}
+
+void
+witness_postfork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, false);
+}
diff --git a/test/unit/fork.c b/test/unit/fork.c
new file mode 100644
index 00000000..890bc869
--- /dev/null
+++ b/test/unit/fork.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+#include <sys/wait.h>
+
+TEST_BEGIN(test_fork)
+{
+	void *p;
+	pid_t pid;
+
+	p = malloc(1);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+	pid = fork();
+	if (pid == -1) {
+		/* Error. */
+		test_fail("Unexpected fork() failure");
+	} else if (pid == 0) {
+		/* Child. */
+		exit(0);
+	} else {
+		int status;
+
+		/* Parent. */
+		free(p);
+		do {
+			if (waitpid(pid, &status, 0) == -1)
+				test_fail("Unexpected waitpid() failure");
+		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_fork));
+}

From 108c4a11e96d57fd71751efa23ab986a236a0c7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 26 Apr 2016 10:47:22 -0700
Subject: [PATCH 61/82] Fix witness/fork() interactions.

Fix witness to clear its list of owned mutexes in the child if
platform-specific malloc_mutex code re-initializes mutexes rather than
unlocking them.
---
 include/jemalloc/internal/private_symbols.txt |  3 ++-
 include/jemalloc/internal/witness.h           |  3 ++-
 src/jemalloc.c                                |  6 ++---
 src/witness.c                                 | 14 ++++++++++-
 test/unit/fork.c                              | 25 ++++++++++++++++---
 5 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0eb7778c..de884fcf 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -594,7 +594,8 @@ witness_lock_error
 witness_lockless_error
 witness_not_owner_error
 witness_owner_error
-witness_postfork
+witness_postfork_child
+witness_postfork_parent
 witness_prefork
 witness_unlock
 witnesses_cleanup
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index ecdc034a..b2e6e825 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -96,7 +96,8 @@ void	witness_assert_lockless(tsd_t *tsd);
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
 void	witness_prefork(tsd_t *tsd);
-void	witness_postfork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a7acf5f7..cd97ea16 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2770,8 +2770,8 @@ _malloc_prefork(void)
 
 	narenas = narenas_total_get();
 
-	/* Acquire all mutexes in a safe order. */
 	witness_prefork(tsd);
+	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd);
 	malloc_mutex_prefork(tsd, &arenas_lock);
 	prof_prefork0(tsd);
@@ -2815,6 +2815,7 @@ _malloc_postfork(void)
 
 	tsd = tsd_fetch();
 
+	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
 	chunk_postfork_parent(tsd);
 	base_postfork_parent(tsd);
@@ -2827,7 +2828,6 @@ _malloc_postfork(void)
 	prof_postfork_parent(tsd);
 	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	ctl_postfork_parent(tsd);
-	witness_postfork(tsd);
 }
 
 void
@@ -2840,6 +2840,7 @@ jemalloc_postfork_child(void)
 
 	tsd = tsd_fetch();
 
+	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
 	chunk_postfork_child(tsd);
 	base_postfork_child(tsd);
@@ -2852,7 +2853,6 @@ jemalloc_postfork_child(void)
 	prof_postfork_child(tsd);
 	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	ctl_postfork_child(tsd);
-	witness_postfork(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/witness.c b/src/witness.c
index b5384a29..31c36a24 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -222,8 +222,20 @@ witness_prefork(tsd_t *tsd)
 }
 
 void
-witness_postfork(tsd_t *tsd)
+witness_postfork_parent(tsd_t *tsd)
 {
 
 	tsd_witness_fork_set(tsd, false);
 }
+
+void
+witness_postfork_child(tsd_t *tsd)
+{
+#ifndef JEMALLOC_MUTEX_INIT_CB
+	witness_list_t *witnesses;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_new(witnesses);
+#endif
+	tsd_witness_fork_set(tsd, false);
+}
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 890bc869..d64f2e09 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -11,6 +11,13 @@ TEST_BEGIN(test_fork)
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
+
+	free(p);
+
+	p = malloc(64);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	free(p);
+
 	if (pid == -1) {
 		/* Error. */
 		test_fail("Unexpected fork() failure");
@@ -21,11 +28,23 @@ TEST_BEGIN(test_fork)
 		int status;
 
 		/* Parent. */
-		free(p);
-		do {
+		while (true) {
 			if (waitpid(pid, &status, 0) == -1)
 				test_fail("Unexpected waitpid() failure");
-		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+			if (WIFSIGNALED(status)) {
+				test_fail("Unexpected child termination due to "
+				    "signal %d", WTERMSIG(status));
+				break;
+			}
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) != 0) {
+					test_fail(
+					    "Unexpected child exit value %d",
+					    WEXITSTATUS(status));
+				}
+				break;
+			}
+		}
 	}
 }
 TEST_END

From 9aa1543e9c1cdd8373985e16e4610fd84caafd85 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 09:37:54 -0700
Subject: [PATCH 62/82] Update mallocx() OOM test to deal with smaller hugemax.

Depending on virtual memory resource limits, it is necessary to attempt
allocating three maximally sized objects to trigger OOM rather than just
two, since the maximum supported size is slightly less than half the
total virtual memory address space.

This fixes a test failure that was introduced by
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).

This resolves #379.
---
 test/integration/mallocx.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index d82bf422..578c229a 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -69,19 +69,28 @@ TEST_END
 
 TEST_BEGIN(test_oom)
 {
+	size_t hugemax;
+	bool oom;
+	void *ptrs[3];
+	unsigned i;
 
 	/*
-	 * It should be impossible to allocate two objects that each consume
-	 * more than half the virtual address space.
+	 * It should be impossible to allocate three objects that each consume
+	 * nearly half the virtual address space.
 	 */
-	{
-		size_t hugemax = get_huge_size(get_nhuge()-1);
-		void *p = mallocx(hugemax, 0);
-		if (p != NULL) {
-			assert_ptr_null(mallocx(hugemax, 0),
-			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
-			dallocx(p, 0);
-		}
+	hugemax = get_huge_size(get_nhuge()-1);
+	oom = false;
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		ptrs[i] = mallocx(hugemax, 0);
+		if (ptrs[i] == NULL)
+			oom = true;
+	}
+	assert_true(oom,
+	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
+	    hugemax);
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		if (ptrs[i] != NULL)
+			dallocx(ptrs[i], 0);
 	}
 
 #if LG_SIZEOF_PTR == 3

From 1eb46ab6e7be3db65131c7098635fe079eb5f9ed Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 17:18:34 -0700
Subject: [PATCH 63/82] Don't test fork() on Windows.

---
 test/unit/fork.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index d64f2e09..46c815ef 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,9 +1,12 @@
 #include "test/jemalloc_test.h"
 
+#ifndef _WIN32
 #include <sys/wait.h>
+#endif
 
 TEST_BEGIN(test_fork)
 {
+#ifndef _WIN32
 	void *p;
 	pid_t pid;
 
@@ -46,6 +49,9 @@ TEST_BEGIN(test_fork)
 			}
 		}
 	}
+#else
+	test_skip("fork(2) is irrelevant to Windows");
+#endif
 }
 TEST_END
 

From 21cda0dc42bdcb1b5b6ecdb82157a0af84c9f0c4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 12:11:36 -0700
Subject: [PATCH 64/82] Update ChangeLog for 4.1.1.

---
 ChangeLog | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 69f4dbb0..9b924cdf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,27 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.1 (May 3, 2016)
+
+  This bugfix release resolves a variety of mostly minor issues, though the
+  bitmap fix is critical for 64-bit Windows.
+
+  Bug fixes:
+  - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
+    even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
+    Windows.  (@jasone)
+  - Fix hashing functions to avoid unaligned memory accesses (and resulting
+    crashes).  This is relevant at least to some ARM-based platforms.
+    (@rkmisra)
+  - Fix fork()-related lock rank ordering reversals.  These reversals were
+    unlikely to cause deadlocks in practice except when heap profiling was
+    enabled and active.  (@jasone)
+  - Fix various chunk leaks in OOM code paths.  (@jasone)
+  - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
+  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix a variety of test failures that were due to test fragility rather than
+    core bugs.  (@jasone)
+
 * 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot

From 90827a3f3ef2099dcd480d542aacc9f44a0787e8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 15:00:42 -0700
Subject: [PATCH 65/82] Fix huge_palloc() regression.

Split arena_choose() into arena_[i]choose() and use arena_ichoose() for
arena lookup during internal allocation.  This fixes huge_palloc() so
that it always succeeds during extent node allocation.

This regression was introduced by
66cd953514a18477eb49732e40d5c2ab5f1b12c5 (Do not allocate metadata via
non-auto arenas, nor tcaches.).
---
 .../jemalloc/internal/jemalloc_internal.h.in  | 24 +++++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  2 ++
 include/jemalloc/internal/tcache.h            |  7 +++---
 src/arena.c                                   |  4 ++--
 src/ckh.c                                     |  6 ++---
 src/ctl.c                                     |  2 +-
 src/huge.c                                    |  5 ++--
 src/prof.c                                    |  2 +-
 src/tcache.c                                  | 10 ++++----
 9 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fe58c1c6..62d5da29 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -550,7 +550,9 @@ size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
-arena_t	*arena_choose(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
@@ -788,7 +790,7 @@ sa2u(size_t size, size_t alignment)
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
@@ -802,6 +804,24 @@ arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, false));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena)
+{
+
+	assert(tsd != NULL || arena != NULL);
+
+	if (tsd != NULL)
+		return (arena_choose_impl(tsd, NULL, true));
+	return (arena);
+}
+
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index de884fcf..7958a4ff 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -10,6 +10,7 @@ arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
+arena_choose_impl
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
@@ -35,6 +36,7 @@ arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
+arena_ichoose
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 82724304..59f60235 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -293,7 +293,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
-		arena = arena_choose(tsd, arena, false);
+		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -354,7 +354,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		arena = arena_choose(tsd, arena, false);
+		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -460,8 +460,7 @@ tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL,
-		    false));
+		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
 	}
 	return (elm->tcache);
 }
diff --git a/src/arena.c b/src/arena.c
index 969ad85d..45c53c18 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2649,7 +2649,7 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero)
 {
 
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2674,7 +2674,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
diff --git a/src/ckh.c b/src/ckh.c
index aa9803e8..25185974 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -271,7 +271,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			goto label_return;
 		}
 		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, arena_choose(tsd, NULL, true));
+		    true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -315,7 +315,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_choose(tsd, NULL, true));
+	    arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_choose(tsd, NULL, true));
+	    arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
diff --git a/src/ctl.c b/src/ctl.c
index 2e811430..e0392d0e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1306,7 +1306,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	oldarena = arena_choose(tsd, NULL, false);
+	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
diff --git a/src/huge.c b/src/huge.c
index bac2425f..0b3aed0d 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -56,8 +56,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
+	assert(tsd != NULL || arena != NULL);
 	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_choose(tsd, NULL, true));
+	    CACHELINE, false, NULL, true, arena_ichoose(tsd, arena));
 	if (node == NULL)
 		return (NULL);
 
@@ -66,7 +67,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsd, node, NULL, true, true);
diff --git a/src/prof.c b/src/prof.c
index 92edba84..b21cd6be 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_choose(tsd, NULL, true), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
diff --git a/src/tcache.c b/src/tcache.c
index ca867c72..88005f30 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -97,7 +97,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
@@ -179,7 +179,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
@@ -307,7 +307,7 @@ tcache_get_hard(tsd_t *tsd)
 			tcache_enabled_set(false); /* Memoize. */
 		return (NULL);
 	}
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
 	return (tcache_create(tsd, arena));
@@ -359,7 +359,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	arena_t *arena;
 	unsigned i;
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
@@ -459,7 +459,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_choose(tsd, NULL, true);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
 	tcache = tcache_create(tsd, arena);

From 7ba6e742335117eca1268757add9adac808e4e38 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 17:46:07 -0700
Subject: [PATCH 66/82] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 9b924cdf..3cb32be9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,7 +21,7 @@ brevity.  Much more detail can be found in the git revision history:
     enabled and active.  (@jasone)
   - Fix various chunk leaks in OOM code paths.  (@jasone)
   - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
-  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix MSVC-specific build/test issues.  (@rustyx, @yuslepukhin)
   - Fix a variety of test failures that were due to test fragility rather than
     core bugs.  (@jasone)
 

From c1e9cf47f93713e9d9b7c28c13b53f90d19a2c9d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 21:28:20 -0700
Subject: [PATCH 67/82] Link against librt for clock_gettime(2) if glibc <
 2.17.

Link libjemalloc against librt if clock_gettime(2) is in librt rather
than libc, as for versions of glibc prior to 2.17.

This resolves #349.
---
 Makefile.in  |  7 +++----
 configure.ac | 10 ++--------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a98ebd62..1cf4bf0f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -28,7 +28,6 @@ CFLAGS := @CFLAGS@
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
-TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
@@ -295,15 +294,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index 275576bd..1a1c9704 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1239,13 +1239,8 @@ fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
-dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
-SAVED_LIBS="${LIBS}"
-LIBS=
-AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
-AC_SUBST([TESTLIBS])
-LIBS="${SAVED_LIBS}"
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
 
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
@@ -1751,7 +1746,6 @@ AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
-AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])

From 04c3c0f9a0c910589a75604d8d0405407f1f035d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 22:11:35 -0700
Subject: [PATCH 68/82] Add the stats.retained and stats.arenas.<i>.retained
 statistics.

This resolves #367.
---
 doc/jemalloc.xml.in               | 31 +++++++++++++++++++++++++++++++
 include/jemalloc/internal/ctl.h   |  1 +
 include/jemalloc/internal/stats.h |  8 ++++++++
 src/arena.c                       |  1 +
 src/chunk.c                       | 13 +++++++++++--
 src/ctl.c                         | 10 ++++++++++
 src/stats.c                       | 12 ++++++++----
 7 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7b602a51..c4a44e3c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2186,6 +2186,25 @@ typedef struct {
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.retained">
+        <term>
+          <mallctl>stats.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in virtual memory mappings that
+        were retained rather than being returned to the operating system via
+        e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
+        typically untouched, decommitted, or purged, so it has no strongly
+        associated physical memory (see <link
+        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
+        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
@@ -2266,6 +2285,18 @@ typedef struct {
         <listitem><para>Number of mapped bytes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of retained bytes.  See <link
+        linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.metadata.mapped">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index ec856996..c84c0de9 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -61,6 +61,7 @@ struct ctl_stats_s {
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
+	size_t			retained;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 705903ad..b6218178 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -102,6 +102,14 @@ struct arena_stats_s {
 	/* Number of bytes currently mapped. */
 	size_t		mapped;
 
+	/*
+	 * Number of bytes currently retained as a side effect of munmap() being
+	 * disabled/bypassed.  Retained bytes are technically mapped (though
+	 * always decommitted or purged), but they are excluded from the mapped
+	 * statistic (above).
+	 */
+	size_t		retained;
+
 	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
 	 * and total pages purged in order to keep dirty unused memory under
diff --git a/src/arena.c b/src/arena.c
index 45c53c18..1172dc2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3414,6 +3414,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
+	astats->retained += arena->stats.retained;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
diff --git a/src/chunk.c b/src/chunk.c
index 0ee2a1a7..1f2afd9d 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -444,15 +444,21 @@ static void *
 chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
+	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(tsd, arena, chunk_hooks,
+	ret = chunk_recycle(tsd, arena, chunk_hooks,
 	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true));
+	    new_addr, size, alignment, zero, commit, true);
+
+	if (config_stats && ret != NULL)
+		arena->stats.retained -= size;
+
+	return (ret);
 }
 
 void *
@@ -617,6 +623,9 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    arena->ind);
 	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+
+	if (config_stats)
+		arena->stats.retained += size;
 }
 
 static bool
diff --git a/src/ctl.c b/src/ctl.c
index e0392d0e..fd5561a3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -192,6 +192,7 @@ CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
@@ -204,6 +205,7 @@ CTL_PROTO(stats_active)
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
+CTL_PROTO(stats_retained)
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -458,6 +460,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
+	{NAME("retained"),	CTL(stats_arenas_i_retained)},
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
@@ -484,6 +487,7 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("metadata"),	CTL(stats_metadata)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
+	{NAME("retained"),	CTL(stats_retained)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
@@ -591,6 +595,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 
 	if (config_stats) {
 		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.retained += astats->astats.retained;
 		sstats->astats.npurge += astats->astats.npurge;
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
@@ -745,6 +750,8 @@ ctl_refresh(tsd_t *tsd)
 		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
 		ctl_stats.mapped = base_mapped +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
+		ctl_stats.retained =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
 	}
 
 	ctl_epoch++;
@@ -2108,6 +2115,7 @@ CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
@@ -2119,6 +2127,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
+    ctl_stats.arenas[mib[2]].astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
     ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
diff --git a/src/stats.c b/src/stats.c
index 87b09e58..073be4fe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -259,7 +259,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	unsigned nthreads;
 	const char *dss;
 	ssize_t lg_dirty_mult, decay_time;
-	size_t page, pactive, pdirty, mapped;
+	size_t page, pactive, pdirty, mapped, retained;
 	size_t metadata_mapped, metadata_allocated;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -349,6 +349,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
 	malloc_cprintf(write_cb, cbopaque,
 	    "mapped:                  %12zu\n", mapped);
+	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "retained:                %12zu\n", retained);
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
@@ -597,7 +600,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	if (config_stats) {
 		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped;
+		size_t allocated, active, metadata, resident, mapped, retained;
 
 		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
@@ -605,10 +608,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_GET("stats.metadata", &metadata, size_t);
 		CTL_GET("stats.resident", &resident, size_t);
 		CTL_GET("stats.mapped", &mapped, size_t);
+		CTL_GET("stats.retained", &retained, size_t);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu\n",
-		    allocated, active, metadata, resident, mapped);
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Current active ceiling: %zu\n",
 		    atomic_read_z(cactive));

From dc391adc6577b4ed0dac0ce3b1778473e67e4c17 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 4 May 2016 12:14:36 -0700
Subject: [PATCH 69/82] Scale leak report summary according to sampling
 probability.

This makes the numbers reported in the leak report summary closely match
those reported by jeprof.

This resolves #356.
---
 src/prof.c | 56 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index b21cd6be..df7f1f9b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -828,22 +828,22 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	return (ret.p);
 }
 
+/*
+ * The bodies of this function and prof_leakcheck() are compiled out unless heap
+ * profiling is enabled, so that it is possible to compile jemalloc with
+ * floating point support completely disabled.  Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a workaround for
+ * versions of glibc that don't properly save/restore floating point registers
+ * during dynamic lazy symbol loading (which internally calls into whatever
+ * malloc implementation happens to be integrated into the application).  Note
+ * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
+ * memory moves, so jemalloc must be compiled with such optimizations disabled
+ * (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
 void
 prof_sample_threshold_update(prof_tdata_t *tdata)
 {
-	/*
-	 * The body of this function is compiled out unless heap profiling is
-	 * enabled, so that it is possible to compile jemalloc with floating
-	 * point support completely disabled.  Avoiding floating point code is
-	 * important on memory-constrained systems, but it also enables a
-	 * workaround for versions of glibc that don't properly save/restore
-	 * floating point registers during dynamic lazy symbol loading (which
-	 * internally calls into whatever malloc implementation happens to be
-	 * integrated into the application).  Note that some compilers (e.g.
-	 * gcc 4.8) may use floating point registers for fast memory moves, so
-	 * jemalloc must be compiled with such optimizations disabled (e.g.
-	 * -mno-sse) in order for the workaround to be complete.
-	 */
 #ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
@@ -1477,21 +1477,41 @@ label_return:
 	return (ret);
 }
 
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
 static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
     const char *filename)
 {
 
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
 	if (cnt_all->curbytes != 0) {
-		malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %"
-		    FMTu64" object%s, %zu context%s\n",
-		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
-		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
-		    leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
 		    filename);
 	}
+#endif
 }
 
 struct prof_gctx_dump_iter_arg_s {

From c2f970c32b527660a33fa513a76d913c812dcf7c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 5 May 2016 17:45:02 -0700
Subject: [PATCH 70/82] Modify pages_map() to support mapping uncommitted
 virtual memory.

If the OS overcommits:
- Commit all mappings in pages_map() regardless of whether the caller
  requested committed memory.
- Linux-specific: Specify MAP_NORESERVE to avoid
  unfortunate interactions with heuristic overcommit mode during
  fork(2).

This resolves #193.
---
 configure.ac                                  |   2 +
 .../internal/jemalloc_internal_defs.h.in      |   9 ++
 include/jemalloc/internal/pages.h             |   5 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/chunk_mmap.c                              |  10 +-
 src/jemalloc.c                                |   1 +
 src/pages.c                                   | 116 +++++++++++++++---
 7 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1a1c9704..7f19715d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -305,6 +305,7 @@ case "${host}" in
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
@@ -329,6 +330,7 @@ case "${host}" in
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2c753719..7de0cf7c 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -214,6 +214,15 @@
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index da7eb968..e21effd1 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -9,13 +9,14 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*pages_map(void *addr, size_t size);
+void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size);
+    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 7958a4ff..0f9b99e4 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -398,6 +398,7 @@ opt_utrace
 opt_xmalloc
 opt_zero
 p2rz
+pages_boot
 pages_commit
 pages_decommit
 pages_map
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index e2e66bc9..f95ae756 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -16,18 +16,16 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	do {
 		void *pages;
 		size_t leadsize;
-		pages = pages_map(NULL, alloc_size);
+		pages = pages_map(NULL, alloc_size, commit);
 		if (pages == NULL)
 			return (NULL);
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
-		ret = pages_trim(pages, alloc_size, leadsize, size);
+		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
 	} while (ret == NULL);
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
@@ -54,7 +52,7 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(new_addr, size);
+	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr)
 		return (ret);
 	assert(new_addr == NULL);
@@ -66,8 +64,6 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cd97ea16..1a26a44f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1272,6 +1272,7 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 				abort();
 		}
 	}
+	pages_boot();
 	if (base_boot())
 		return (true);
 	if (chunk_boot())
diff --git a/src/pages.c b/src/pages.c
index 83a167f6..2a9b7e37 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,29 +1,49 @@
 #define	JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+#ifndef _WIN32
+#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#  define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int	mmap_flags;
+#endif
+static bool	os_overcommits;
+
 /******************************************************************************/
 
 void *
-pages_map(void *addr, size_t size)
+pages_map(void *addr, size_t size, bool *commit)
 {
 	void *ret;
 
 	assert(size != 0);
 
+	if (os_overcommits)
+		*commit = true;
+
 #ifdef _WIN32
 	/*
 	 * If VirtualAlloc can't allocate at the given address when one is
 	 * given, it fails and returns NULL.
 	 */
-	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
 	    PAGE_READWRITE);
 #else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
-	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-	    -1, 0);
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+	}
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
@@ -67,7 +87,8 @@ pages_unmap(void *addr, size_t size)
 }
 
 void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit)
 {
 	void *ret = (void *)((uintptr_t)addr + leadsize);
 
@@ -77,7 +98,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
 		void *new_addr;
 
 		pages_unmap(addr, alloc_size);
-		new_addr = pages_map(ret, size);
+		new_addr = pages_map(ret, size, commit);
 		if (new_addr == ret)
 			return (ret);
 		if (new_addr)
@@ -101,17 +122,17 @@ static bool
 pages_commit_impl(void *addr, size_t size, bool commit)
 {
 
-#ifndef _WIN32
-	/*
-	 * The following decommit/commit implementation is functional, but
-	 * always disabled because it doesn't add value beyong improved
-	 * debugging (at the cost of extra system calls) on systems that
-	 * overcommit.
-	 */
-	if (false) {
-		int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE;
-		void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON |
-		    MAP_FIXED, -1, 0);
+	if (os_overcommits)
+		return (true);
+
+#ifdef _WIN32
+	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+	{
+		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+		    -1, 0);
 		if (result == MAP_FAILED)
 			return (true);
 		if (result != addr) {
@@ -125,7 +146,6 @@ pages_commit_impl(void *addr, size_t size, bool commit)
 		return (false);
 	}
 #endif
-	return (true);
 }
 
 bool
@@ -171,3 +191,63 @@ pages_purge(void *addr, size_t size)
 	return (unzeroed);
 }
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void)
+{
+	int vm_overcommit;
+	size_t sz;
+
+	sz = sizeof(vm_overcommit);
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
+		return (false); /* Error. */
+
+	return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+static bool
+os_overcommits_proc(void)
+{
+	int fd;
+	char buf[1];
+	ssize_t nread;
+
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd == -1)
+		return (false); /* Error. */
+
+	nread = read(fd, &buf, sizeof(buf));
+	if (nread < 1)
+		return (false); /* Error. */
+	/*
+	 * /proc/sys/vm/overcommit_memory meanings:
+	 * 0: Heuristic overcommit.
+	 * 1: Always overcommit.
+	 * 2: Never overcommit.
+	 */
+	return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+void
+pages_boot(void)
+{
+
+#ifndef _WIN32
+	mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+	os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+	os_overcommits = os_overcommits_proc();
+#  ifdef MAP_NORESERVE
+	if (os_overcommits)
+		mmap_flags |= MAP_NORESERVE;
+#  endif
+#else
+	os_overcommits = false;
+#endif
+}

From 3ef51d7f733ac6432e80fa902a779ab5b98d74f6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 12:16:00 -0700
Subject: [PATCH 71/82] Optimize the fast paths of calloc() and
 [m,d,sd]allocx().

This is a broader application of optimizations to malloc() and free() in
f4a0f32d340985de477bbe329ecdaecd69ed1055 (Fast-path improvement:
reduce # of branches and unnecessary operations.).

This resolves #321.
---
 include/jemalloc/internal/arena.h             |  10 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  60 +---
 include/jemalloc/internal/private_symbols.txt |   6 +-
 src/arena.c                                   |   2 +-
 src/huge.c                                    |   2 +-
 src/jemalloc.c                                | 300 +++++++-----------
 6 files changed, 137 insertions(+), 243 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 53e6b3ad..debb43f3 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -680,7 +680,8 @@ void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
 void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -1446,7 +1447,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 	arena_chunk_t *chunk;
 
@@ -1473,7 +1475,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
 				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    true);
+				    slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
@@ -1486,7 +1488,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
 				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    true);
+				    slow_path);
 			} else {
 				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 62d5da29..fe504d8d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -895,12 +895,8 @@ arena_t	*iaalloc(const void *ptr);
 size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
 void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
-void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
-void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
+void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
 void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
 void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
@@ -911,11 +907,12 @@ size_t	u2rz(size_t usize);
 size_t	p2rz(tsd_t *tsd, const void *ptr);
 void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
-void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -972,35 +969,13 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
+	return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false,
 	    NULL, slow_path));
 }
 
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size, szind_t ind)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
-	    NULL, true));
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena)
@@ -1091,13 +1066,6 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 	arena_dalloc(tsd, ptr, tcache, slow_path);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
-{
-
-	idalloctm(tsd, ptr, tcache, false, true);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
@@ -1116,20 +1084,20 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache);
+	arena_sdalloc(tsd, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache);
+		isdalloct(tsd, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1160,7 +1128,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (p);
 }
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0f9b99e4..e47296ff 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -291,15 +291,11 @@ huge_ralloc
 huge_ralloc_no_move
 huge_salloc
 iaalloc
+ialloc
 iallocztm
 iarena_cleanup
-icalloc
-icalloct
 idalloc
-idalloct
 idalloctm
-imalloc
-imalloct
 in_valgrind
 index2size
 index2size_compute
diff --git a/src/arena.c b/src/arena.c
index 1172dc2c..992d96f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3303,7 +3303,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		copysize = (usize < oldsize) ? usize : oldsize;
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
-		isqalloc(tsd, ptr, oldsize, tcache);
+		isqalloc(tsd, ptr, oldsize, tcache, true);
 	} else {
 		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
 		    zero, tcache);
diff --git a/src/huge.c b/src/huge.c
index 0b3aed0d..71fb50c5 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -364,7 +364,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1a26a44f..259ab4f7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,10 +70,10 @@ typedef enum {
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
-/* 0 should be the common case.  Set to true to trigger initialization. */
+/* False should be the common case.  Set to true to trigger initialization. */
 static bool	malloc_slow = true;
 
-/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
@@ -1443,7 +1443,7 @@ malloc_init_hard(void)
  */
 
 static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
     prof_tctx_t *tctx, bool slow_path)
 {
 	void *p;
@@ -1452,27 +1452,27 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
+		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(tsd, p, usize);
 	} else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
+ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
+		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
 	else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -1482,16 +1482,21 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
 	return (p);
 }
 
+/*
+ * ialloc_body() is inlined so that fast and slow paths are generated separately
+ * with statically known slow_path.
+ */
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
 {
 	szind_t ind;
 
-	if (slow_path && unlikely(malloc_init()))
+	if (slow_path && unlikely(malloc_init())) {
+		*tsd = NULL;
 		return (NULL);
+	}
 
 	*tsd = tsd_fetch();
-
 	witness_assert_lockless(*tsd);
 
 	ind = size2index(size);
@@ -1505,26 +1510,30 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 	}
 
 	if (config_prof && opt_prof)
-		return (imalloc_prof(*tsd, *usize, ind, slow_path));
+		return (ialloc_prof(*tsd, *usize, ind, zero, slow_path));
 
-	return (imalloc(*tsd, size, ind, slow_path));
+	return (ialloc(*tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
+    bool update_errno, bool slow_path)
 {
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in malloc(): "
-			    "out of memory\n");
+			malloc_printf("<jemalloc>: Error in %s(): out of "
+			    "memory\n", func);
 			abort();
 		}
-		set_errno(ENOMEM);
+		if (update_errno)
+			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
 		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1540,20 +1549,15 @@ je_malloc(size_t size)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		/*
-		 * imalloc_body() is inlined so that fast and slow paths are
-		 * generated separately with statically known slow_path.
-		 */
-		ret = imalloc_body(size, &tsd, &usize, false);
-		imalloc_post_check(ret, tsd, usize, false);
+		ret = ialloc_body(size, false, &tsd, &usize, false);
+		ialloc_post_check(ret, tsd, usize, "malloc", true, false);
 	} else {
-		ret = imalloc_body(size, &tsd, &usize, true);
-		imalloc_post_check(ret, tsd, usize, true);
+		ret = ialloc_body(size, false, &tsd, &usize, true);
+		ialloc_post_check(ret, tsd, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
 		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
-	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1695,45 +1699,6 @@ je_aligned_alloc(size_t alignment, size_t size)
 	return (ret);
 }
 
-static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
-{
-	void *p;
-
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
-	} else
-		p = icalloc(tsd, usize, ind);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
-{
-	void *p;
-	prof_tctx_t *tctx;
-
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = icalloc_prof_sample(tsd, usize, ind, tctx);
-	else
-		p = icalloc(tsd, usize, ind);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(tsd, p, usize, tctx);
-
-	return (p);
-}
-
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
@@ -1742,68 +1707,33 @@ je_calloc(size_t num, size_t size)
 	void *ret;
 	tsd_t *tsd;
 	size_t num_size;
-	szind_t ind;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	if (unlikely(malloc_init())) {
-		tsd = NULL;
-		num_size = 0;
-		ret = NULL;
-		goto label_return;
-	}
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
-
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
 		if (num == 0 || size == 0)
 			num_size = 1;
-		else {
-			ret = NULL;
-			goto label_return;
-		}
+		else
+			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
 	 * most significant half of the bits in a size_t.
 	 */
 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
-	    2))) && (num_size / size != num))) {
-		/* size_t overflow. */
-		ret = NULL;
-		goto label_return;
-	}
+	    2))) && (num_size / size != num)))
+		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
 
-	ind = size2index(num_size);
-	if (unlikely(ind >= NSIZES)) {
-		ret = NULL;
-		goto label_return;
-	}
-	if (config_prof && opt_prof) {
-		usize = index2size(ind);
-		ret = icalloc_prof(tsd, usize, ind);
+	if (likely(!malloc_slow)) {
+		ret = ialloc_body(num_size, true, &tsd, &usize, false);
+		ialloc_post_check(ret, tsd, usize, "calloc", true, false);
 	} else {
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = index2size(ind);
-		ret = icalloc(tsd, num_size, ind);
+		ret = ialloc_body(num_size, true, &tsd, &usize, true);
+		ialloc_post_check(ret, tsd, usize, "calloc", true, true);
+		UTRACE(0, num_size, ret);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
-label_return:
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in calloc(): out of "
-			    "memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true);
-	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1880,7 +1810,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
@@ -1895,7 +1825,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
 		rzsize = p2rz(tsd, ptr);
-	isqalloc(tsd, ptr, usize, tcache);
+	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
 
@@ -1946,9 +1876,9 @@ je_realloc(void *ptr, size_t size)
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = imalloc_body(size, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsd, &usize, false);
 		else
-			ret = imalloc_body(size, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsd, &usize, true);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1978,6 +1908,7 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
+		witness_assert_lockless(tsd);
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
@@ -2056,7 +1987,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
  */
 
 JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
 {
 
@@ -2087,29 +2018,9 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 	return (false);
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
-{
-
-	if (likely(flags == 0)) {
-		*usize = s2u(size);
-		if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
-			return (true);
-		*alignment = 0;
-		*zero = false;
-		*tcache = tcache_get(tsd, true);
-		*arena = NULL;
-		return (false);
-	} else {
-		return (imallocx_flags_decode_hard(tsd, size, flags, usize,
-		    alignment, zero, tcache, arena));
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE_C void *
 imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
@@ -2117,14 +2028,13 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	if (unlikely(zero))
-		return (icalloct(tsd, usize, ind, tcache, arena));
-	return (imalloct(tsd, usize, ind, tcache, arena));
+	return (iallocztm(tsd, usize, ind, zero, tcache, false, arena,
+	    slow_path));
 }
 
 static void *
 imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
 
@@ -2132,18 +2042,20 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
 		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena);
+		    arena, slow_path);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(tsd, p, usize);
-	} else
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
+	} else {
+		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena,
+		    slow_path);
+	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2157,10 +2069,11 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
 	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
+		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
+		    slow_path);
 	else if ((uintptr_t)tctx > (uintptr_t)1U) {
 		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena);
+		    arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
@@ -2174,7 +2087,8 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
+    bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2182,24 +2096,50 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 	tcache_t *tcache;
 	arena_t *arena;
 
+	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+	    &zero, &tcache, &arena)))
+		return (NULL);
+	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
+	    slow_path);
+	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	return (p);
+}
+
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
+    bool slow_path)
+{
+
+	if (slow_path && unlikely(malloc_init())) {
+		*tsd = NULL;
+		return (NULL);
+	}
+
+	*tsd = tsd_fetch();
+	witness_assert_lockless(*tsd);
+
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
 		if (unlikely(ind >= NSIZES))
 			return (NULL);
-		if (config_stats || (config_valgrind &&
-		    unlikely(in_valgrind))) {
+		if (config_stats || (config_prof && opt_prof) || (slow_path &&
+		    config_valgrind && unlikely(in_valgrind))) {
 			*usize = index2size(ind);
 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 		}
-		return (imalloc(tsd, size, ind, true));
+
+		if (config_prof && opt_prof) {
+			return (ialloc_prof(*tsd, *usize, ind, false,
+			    slow_path));
+		}
+
+		return (ialloc(*tsd, size, ind, false, slow_path));
 	}
 
-	if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
-	    &alignment, &zero, &tcache, &arena)))
-		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
+	if (config_prof && opt_prof)
+		return (imallocx_prof(*tsd, size, flags, usize, slow_path));
+
+	return (imallocx_no_prof(*tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2213,36 +2153,18 @@ je_mallocx(size_t size, int flags)
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init())) {
-		tsd = NULL;
-		goto label_oom;
+	if (likely(!malloc_slow)) {
+		p = imallocx_body(size, flags, &tsd, &usize, false);
+		ialloc_post_check(p, tsd, usize, "mallocx", false, false);
+	} else {
+		p = imallocx_body(size, flags, &tsd, &usize, true);
+		ialloc_post_check(p, tsd, usize, "mallocx", false, true);
+		UTRACE(0, size, p);
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize,
+		    MALLOCX_ZERO_GET(flags));
 	}
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
 
-	if (config_prof && opt_prof)
-		p = imallocx_prof(tsd, size, flags, &usize);
-	else
-		p = imallocx_no_prof(tsd, size, flags, &usize);
-	if (unlikely(p == NULL))
-		goto label_oom;
-
-	if (config_stats) {
-		assert(usize == isalloc(tsd, p, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags));
-	witness_assert_lockless(tsd);
 	return (p);
-label_oom:
-	if (config_xmalloc && unlikely(opt_xmalloc)) {
-		malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
-		abort();
-	}
-	UTRACE(0, size, 0);
-	witness_assert_lockless(tsd);
-	return (NULL);
 }
 
 static void *
@@ -2567,7 +2489,10 @@ je_dallocx(void *ptr, int flags)
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	ifree(tsd_fetch(), ptr, tcache, true);
+	if (likely(!malloc_slow))
+		ifree(tsd, ptr, tcache, false);
+	else
+		ifree(tsd, ptr, tcache, true);
 	witness_assert_lockless(tsd);
 }
 
@@ -2609,7 +2534,10 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	isfree(tsd, ptr, usize, tcache);
+	if (likely(!malloc_slow))
+		isfree(tsd, ptr, usize, tcache, false);
+	else
+		isfree(tsd, ptr, usize, tcache, true);
 	witness_assert_lockless(tsd);
 }
 

From 1326010cf4a0faef7a0e8fd3e0cf62adcf56a398 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 14:50:58 -0700
Subject: [PATCH 72/82] Update private_symbols.txt.

---
 include/jemalloc/internal/private_symbols.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e47296ff..28996206 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -547,12 +547,18 @@ tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
 tsd_get
-tsd_wrapper_get
-tsd_wrapper_set
+tsd_iarena_get
+tsd_iarena_set
+tsd_iarenap_get
 tsd_initialized
 tsd_init_check_recursion
 tsd_init_finish
 tsd_init_head
+tsd_narenas_tdata_get
+tsd_narenas_tdata_set
+tsd_narenas_tdatap_get
+tsd_wrapper_get
+tsd_wrapper_set
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
@@ -575,6 +581,9 @@ tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_witness_fork_get
+tsd_witness_fork_set
+tsd_witness_forkp_get
 tsd_witnesses_get
 tsd_witnesses_set
 tsd_witnessesp_get

From 62c217e6131b845a91fcbd6372151acb300d2193 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 15:22:32 -0700
Subject: [PATCH 73/82] Update ChangeLog.

---
 ChangeLog | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 3cb32be9..68dedfa0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,28 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.2.0 (XXX)
+
+  New features:
+  - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+    an arena's allocations in a single operation.  (@jasone@)
+  - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
+  - Add the --with-version configure option.  (@jasone)
+  - Support --with-lg-page values larger than actual page size.  (@jasone)
+
+  Optimizations:
+  - Use pairing heaps rather than red-black trees for various hot data
+    structures.  (@djwatson, @jasone)
+  - Streamline fast paths of rtree operations.  (@jasone)
+  - Optimize the fast paths of calloc() and [m,d,sd]allocx().  (@jasone)
+  - Decommit unused virtual memory if the OS does not overcommit.  (@jasone)
+  - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+    to avoid unfortunate interactions during fork(2).  (@jasone)
+
+  Bug fixes:
+  - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
+  - Scale leak report summary according to sampling probability.  (@jasone)
+
 * 4.1.1 (May 3, 2016)
 
   This bugfix release resolves a variety of mostly minor issues, though the

From 919e4a0ea92fbbf9b97efdf9f31a3c800f77cd8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 17:15:32 -0700
Subject: [PATCH 74/82] Add LG_QUANTUM definition for the RISC-V architecture.

---
 include/jemalloc/internal/jemalloc_internal.h.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fe504d8d..3ce36659 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -258,6 +258,9 @@ typedef unsigned szind_t;
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif

From 0c12dcabc59ea9c95fc38197e7c4bc44663b0a26 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 7 May 2016 12:42:31 -0700
Subject: [PATCH 75/82] Fix tsd bootstrapping for a0malloc().

---
 Makefile.in              |  1 +
 src/jemalloc.c           | 58 +++++++++++++++++++++-------------------
 test/include/test/test.h |  4 +++
 test/src/test.c          | 56 +++++++++++++++++++++++++++-----------
 test/unit/a0.c           | 19 +++++++++++++
 test/unit/junk.c         |  1 -
 test/unit/tsd.c          |  5 ++++
 7 files changed, 101 insertions(+), 43 deletions(-)
 create mode 100644 test/unit/a0.c

diff --git a/Makefile.in b/Makefile.in
index 1cf4bf0f..652f01f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,6 +135,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
 TESTS_UNIT := \
+	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 259ab4f7..b1d691ed 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -254,7 +254,7 @@ typedef struct {
  * definition.
  */
 
-static bool	malloc_init_hard_a0(tsd_t *tsd);
+static bool	malloc_init_hard_a0(void);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
@@ -291,7 +291,7 @@ malloc_init_a0(void)
 {
 
 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
-		return (malloc_init_hard_a0(NULL));
+		return (malloc_init_hard_a0());
 	return (false);
 }
 
@@ -307,7 +307,7 @@ malloc_init(void)
 }
 
 /*
- * The a0*() functions are used instead of i[mcd]alloc() in situations that
+ * The a0*() functions are used instead of i{d,}alloc() in situations that
  * cannot tolerate TLS variable access.
  */
 
@@ -318,8 +318,8 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, NULL,
-	    is_metadata, arena_get(NULL, 0, true), true));
+	return (iallocztm(NULL, size, size2index(size), zero, NULL, is_metadata,
+	    arena_get(NULL, 0, true), true));
 }
 
 static void
@@ -1256,7 +1256,7 @@ malloc_init_hard_needed(void)
 }
 
 static bool
-malloc_init_hard_a0_locked(tsd_t *tsd)
+malloc_init_hard_a0_locked(tsd_t **tsd)
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1283,7 +1283,7 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot(tsd))
+	if (config_tcache && tcache_boot(*tsd))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
@@ -1299,38 +1299,41 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(tsd, 0) == NULL)
+	if (arena_init(*tsd, 0) == NULL)
 		return (true);
+
+	/*
+	 * Initialize tsd, since some code paths cause chunk allocation, which
+	 * in turn depends on tsd.
+	 */
+	*tsd = malloc_tsd_boot0();
+	if (*tsd == NULL)
+		return (true);
+
 	malloc_init_state = malloc_init_a0_initialized;
+
 	return (false);
 }
 
 static bool
-malloc_init_hard_a0(tsd_t *tsd)
+malloc_init_hard_a0(void)
 {
 	bool ret;
+	tsd_t *tsd = NULL;
 
 	malloc_mutex_lock(tsd, &init_lock);
-	ret = malloc_init_hard_a0_locked(tsd);
+	ret = malloc_init_hard_a0_locked(&tsd);
 	malloc_mutex_unlock(tsd, &init_lock);
 	return (ret);
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(tsd_t **tsd)
+malloc_init_hard_recursible(tsd_t *tsd)
 {
-	bool ret;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(*tsd, &init_lock);
-
-	/* LinuxThreads' pthread_setspecific() allocates. */
-	*tsd = malloc_tsd_boot0();
-	if (*tsd == NULL) {
-		ret = true;
-		goto label_return;
-	}
+	malloc_mutex_unlock(tsd, &init_lock);
 
 	ncpus = malloc_ncpus();
 
@@ -1339,17 +1342,16 @@ malloc_init_hard_recursible(tsd_t **tsd)
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
-		ret = true;
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
+		malloc_mutex_lock(tsd, &init_lock);
+		return (true);
 	}
 #endif
 
-	ret = false;
-label_return:
-	malloc_mutex_lock(*tsd, &init_lock);
-	return (ret);
+	malloc_mutex_lock(tsd, &init_lock);
+	return (false);
 }
 
 static bool
@@ -1409,12 +1411,12 @@ malloc_init_hard(void)
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked(tsd)) {
+	    malloc_init_hard_a0_locked(&tsd)) {
 		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible(&tsd)) {
+	if (malloc_init_hard_recursible(tsd)) {
 		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
@@ -2669,6 +2671,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
  * to trigger the deadlock described above, but doing so would involve forking
  * via a library constructor that runs before jemalloc's runs.
  */
+#ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void)
@@ -2676,6 +2679,7 @@ jemalloc_constructor(void)
 
 	malloc_init();
 }
+#endif
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 3cf901fc..c8112eb8 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -311,6 +311,9 @@ label_test_end:								\
 #define	test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
+#define	test_no_malloc_init(...)					\
+	p_test_no_malloc_init(__VA_ARGS__, NULL)
+
 #define	test_skip_if(e) do {						\
 	if (e) {							\
 		test_skip("%s:%s:%d: Test skipped: (%s)",		\
@@ -324,6 +327,7 @@ void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
 test_status_t	p_test(test_t *t, ...);
+test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index 8173614c..d70cc750 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -60,32 +60,30 @@ p_test_fini(void)
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
 
-test_status_t
-p_test(test_t *t, ...)
+static test_status_t
+p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 {
 	test_status_t ret;
-	va_list ap;
 
-	/*
-	 * Make sure initialization occurs prior to running tests.  Tests are
-	 * special because they may use internal facilities prior to triggering
-	 * initialization as a side effect of calling into the public API.  This
-	 * is a final safety that works even if jemalloc_constructor() doesn't
-	 * run, as for MSVC builds.
-	 */
-	if (nallocx(1, 0) == 0) {
-		malloc_printf("Initialization error");
-		return (test_status_fail);
+	if (do_malloc_init) {
+		/*
+		 * Make sure initialization occurs prior to running tests.
+		 * Tests are special because they may use internal facilities
+		 * prior to triggering initialization as a side effect of
+		 * calling into the public API.
+		 */
+		if (nallocx(1, 0) == 0) {
+			malloc_printf("Initialization error");
+			return (test_status_fail);
+		}
 	}
 
 	ret = test_status_pass;
-	va_start(ap, t);
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		t();
 		if (test_status > ret)
 			ret = test_status;
 	}
-	va_end(ap);
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
 	    test_status_string(test_status_pass),
@@ -98,6 +96,34 @@ p_test(test_t *t, ...)
 	return (ret);
 }
 
+test_status_t
+p_test(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(true, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+test_status_t
+p_test_no_malloc_init(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(false, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
 void
 p_test_fail(const char *prefix, const char *message)
 {
diff --git a/test/unit/a0.c b/test/unit/a0.c
new file mode 100644
index 00000000..b9ba45a3
--- /dev/null
+++ b/test/unit/a0.c
@@ -0,0 +1,19 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_a0)
+{
+	void *p;
+
+	p = a0malloc(1);
+	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	a0dalloc(p);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test_no_malloc_init(
+	    test_a0));
+}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index e251a124..414874a0 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -244,7 +244,6 @@ int
 main(void)
 {
 
-	assert(!config_fill || opt_junk_alloc || opt_junk_free);
 	return (test(
 	    test_junk_small,
 	    test_junk_large,
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 8be787fd..7dde4b77 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -99,6 +99,11 @@ int
 main(void)
 {
 
+	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
+	if (nallocx(1, 0) == 0) {
+		malloc_printf("Initialization error");
+		return (test_status_fail);
+	}
 	data_tsd_boot();
 
 	return (test(

From c1e00ef2a6442d1d047950247c757821560db329 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 10 May 2016 22:21:10 -0700
Subject: [PATCH 76/82] Resolve bootstrapping issues when embedded in FreeBSD
 libc.

b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add witness, a simple online
locking validator.) caused a broad propagation of tsd throughout the
internal API, but tsd_fetch() was designed to fail prior to tsd
bootstrapping.  Fix this by splitting tsd_t into non-nullable tsd_t and
nullable tsdn_t, and modifying all internal APIs that do not critically
rely on tsd to take nullable pointers.  Furthermore, add the
tsd_booted_get() function so that tsdn_fetch() can probe whether tsd
bootstrapping is complete and return NULL if not.  All dangerous
conversions of nullable pointers are tsdn_tsd() calls that assert-fail
on invalid conversion.
---
 include/jemalloc/internal/arena.h             | 201 +++---
 include/jemalloc/internal/base.h              |  10 +-
 include/jemalloc/internal/chunk.h             |  22 +-
 include/jemalloc/internal/chunk_dss.h         |  14 +-
 include/jemalloc/internal/ckh.h               |   8 +-
 include/jemalloc/internal/ctl.h               |  10 +-
 include/jemalloc/internal/huge.h              |  20 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 103 +--
 include/jemalloc/internal/mutex.h             |  32 +-
 include/jemalloc/internal/private_symbols.txt |   7 +-
 include/jemalloc/internal/prof.h              |  74 +--
 include/jemalloc/internal/tcache.h            |  33 +-
 include/jemalloc/internal/tsd.h               |  76 +++
 include/jemalloc/internal/valgrind.h          |  12 +-
 include/jemalloc/internal/witness.h           |  10 +-
 src/arena.c                                   | 585 +++++++++---------
 src/base.c                                    |  45 +-
 src/chunk.c                                   | 146 ++---
 src/chunk_dss.c                               |  42 +-
 src/ckh.c                                     |  42 +-
 src/ctl.c                                     | 229 +++----
 src/huge.c                                    | 152 ++---
 src/jemalloc.c                                | 518 ++++++++--------
 src/mutex.c                                   |  12 +-
 src/prof.c                                    | 475 +++++++-------
 src/quarantine.c                              |  44 +-
 src/tcache.c                                  | 123 ++--
 src/witness.c                                 |  36 +-
 src/zone.c                                    |   8 +-
 test/unit/arena_reset.c                       |   8 +-
 test/unit/ckh.c                               |  46 +-
 test/unit/junk.c                              |   4 +-
 test/unit/prof_reset.c                        |   2 +-
 test/unit/witness.c                           | 116 ++--
 34 files changed, 1709 insertions(+), 1556 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index debb43f3..b1de2b61 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -518,28 +518,28 @@ void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(tsd_t *tsd, arena_t *arena);
-void	arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero);
-void	arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk,
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t usize);
-void	arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena);
-bool	arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena,
+void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize, bool *zero);
+ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
-bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
-void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
-void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
-void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
@@ -552,17 +552,18 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
-void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
     bool zero);
-void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena,
+void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
@@ -570,28 +571,28 @@ extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr);
-void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(tsd_t *tsd, arena_t *arena);
-bool	arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
@@ -599,14 +600,14 @@ void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
-arena_t	*arena_new(tsd_t *tsd, unsigned ind);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork0(tsd_t *tsd, arena_t *arena);
-void	arena_prefork1(tsd_t *tsd, arena_t *arena);
-void	arena_prefork2(tsd_t *tsd, arena_t *arena);
-void	arena_prefork3(tsd_t *tsd, arena_t *arena);
-void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
-void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -663,24 +664,24 @@ void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
 #endif
 
@@ -1056,7 +1057,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
@@ -1067,9 +1068,9 @@ arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
 	{
 		bool ret;
 
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (ret);
 	}
 }
@@ -1205,7 +1206,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
@@ -1226,13 +1227,13 @@ arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(tsd, ptr);
+		ret = huge_prof_tctx_get(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
@@ -1264,11 +1265,11 @@ arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(tsd, ptr, tctx);
+		huge_prof_tctx_set(tsdn, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
@@ -1292,52 +1293,55 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(tsd, ptr);
+			huge_prof_tctx_reset(tsdn, ptr);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 {
+	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsd == NULL))
+	if (unlikely(tsdn_null(tsdn)))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena->ind);
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(tsd, arena, false);
+		arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsd_t *tsd, arena_t *arena)
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
-	arena_decay_ticks(tsd, arena, 1);
+	arena_decay_ticks(tsdn, arena, 1);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero));
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1354,7 +1358,7 @@ arena_aalloc(const void *ptr)
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
@@ -1397,17 +1401,18 @@ arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(tsd, ptr);
+		ret = huge_salloc(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1420,11 +1425,12 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
@@ -1435,23 +1441,26 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size - large_pad, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path)
 {
 	arena_chunk_t *chunk;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
@@ -1468,34 +1477,36 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(tsd, ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsdn, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 075a2a20..d6b81e16 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -9,13 +9,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(tsd_t *tsd, size_t size);
-void	base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident,
+void	*base_alloc(tsdn_t *tsdn, size_t size);
+void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
     size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(tsd_t *tsd);
-void	base_postfork_parent(tsd_t *tsd);
-void	base_postfork_child(tsd_t *tsd);
+void	base_prefork(tsdn_t *tsdn);
+void	base_postfork_parent(tsdn_t *tsdn);
+void	base_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 6c3ad9bf..c9fd4ecb 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -48,32 +48,32 @@ extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(tsd_t *tsd, arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(tsd_t *tsd, arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsd_t *tsd, const void *chunk,
+bool	chunk_register(tsdn_t *tsdn, const void *chunk,
     const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(tsd_t *tsd, arena_t *arena,
+void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool dalloc_node);
-void	*chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena,
+void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
-void	chunk_dalloc_cache(tsd_t *tsd, arena_t *arena,
+void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena,
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
     bool committed);
-bool	chunk_purge_wrapper(tsd_t *tsd, arena_t *arena,
+bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(tsd_t *tsd);
-void	chunk_postfork_parent(tsd_t *tsd);
-void	chunk_postfork_child(tsd_t *tsd);
+void	chunk_prefork(tsdn_t *tsdn);
+void	chunk_postfork_parent(tsdn_t *tsdn);
+void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 7f3a09c7..724fa579 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,15 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(tsd_t *tsd);
-bool	chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec);
-void	*chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr,
+dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
+bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsd_t *tsd, void *chunk);
+bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
 bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsd_t *tsd);
-void	chunk_dss_postfork_parent(tsd_t *tsd);
-void	chunk_dss_postfork_child(tsd_t *tsd);
+void	chunk_dss_prefork(tsdn_t *tsdn);
+void	chunk_dss_postfork_parent(tsdn_t *tsdn);
+void	chunk_dss_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index f75ad90b..46e151cd 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
+void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index c84c0de9..af0f6d7c 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -27,7 +27,7 @@ struct ctl_named_node_s {
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(tsd_t *, const size_t *, size_t,
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
 	    size_t);
 };
 
@@ -72,15 +72,15 @@ struct ctl_stats_s {
 
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
-int	ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp,
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
     size_t *miblenp);
 
 int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(tsd_t *tsd);
-void	ctl_postfork_parent(tsd_t *tsd);
-void	ctl_postfork_child(tsd_t *tsd);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 9de2055d..b5fa9e63 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,23 +9,23 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
-bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr);
+void	huge_dalloc(tsdn_t *tsdn, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(tsd_t *tsd, const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(tsd_t *tsd, const void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3ce36659..69d94ec5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -473,7 +473,7 @@ void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(tsd_t *tsd, unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
@@ -555,10 +555,10 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -815,13 +815,13 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsd_t *tsd, arena_t *arena)
+arena_ichoose(tsdn_t *tsdn, arena_t *arena)
 {
 
-	assert(tsd != NULL || arena != NULL);
+	assert(!tsdn_null(tsdn) || arena != NULL);
 
-	if (tsd != NULL)
-		return (arena_choose_impl(tsd, NULL, true));
+	if (!tsdn_null(tsdn))
+		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
 	return (arena);
 }
 
@@ -851,7 +851,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
@@ -861,7 +861,7 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(tsd, ind);
+			ret = arena_init(tsdn, ind);
 	}
 	return (ret);
 }
@@ -895,24 +895,24 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     bool slow_path);
-void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
-void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(tsd_t *tsd, const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(tsd_t *tsd, const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+size_t	p2rz(tsdn_t *tsdn, const void *ptr);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
 void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
@@ -923,7 +923,7 @@ void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero);
 #endif
 
@@ -939,22 +939,23 @@ iaalloc(const void *ptr)
 
 /*
  * Typical usage:
+ *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsd_t *tsd, const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(tsd, ptr, demote));
+	return (arena_salloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
@@ -963,10 +964,10 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
-		    config_prof));
+		arena_metadata_allocated_add(iaalloc(ret),
+		    isalloc(tsdn, ret, config_prof));
 	}
 	return (ret);
 }
@@ -975,12 +976,12 @@ JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false,
-	    NULL, slow_path));
+	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena)
 {
 	void *ret;
@@ -990,33 +991,33 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
 		    config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
+	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
-	    false, NULL));
+	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
@@ -1028,7 +1029,7 @@ ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(tsd, ptr, demote));
+	return (isalloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
@@ -1046,15 +1047,15 @@ u2rz(size_t usize)
 }
 
 JEMALLOC_INLINE size_t
-p2rz(tsd_t *tsd, const void *ptr)
+p2rz(tsdn_t *tsdn, const void *ptr)
 {
-	size_t usize = isalloc(tsd, ptr, false);
+	size_t usize = isalloc(tsdn, ptr, false);
 
 	return (u2rz(usize));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
 {
 
@@ -1062,18 +1063,18 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache, slow_path);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1083,14 +1084,15 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false, slow_path);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1100,7 +1102,7 @@ isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache, slow_path);
+		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1113,7 +1115,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
@@ -1121,7 +1123,8 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 	}
@@ -1167,7 +1170,7 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero)
 {
 
@@ -1180,7 +1183,7 @@ ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
 }
 #endif
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 5ddae11c..00f0b91c 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -59,9 +59,9 @@ extern bool isthreaded;
 
 bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank);
-void	malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
 bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -69,20 +69,20 @@ bool	malloc_mutex_boot(void);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
 		if (config_debug)
-			witness_assert_not_owner(tsd, &mutex->witness);
+			witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -95,17 +95,17 @@ malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 		pthread_mutex_lock(&mutex->lock);
 #endif
 		if (config_debug)
-			witness_lock(tsd, &mutex->witness);
+			witness_lock(tsdn, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
 		if (config_debug)
-			witness_unlock(tsd, &mutex->witness);
+			witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -121,19 +121,19 @@ malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded && config_debug)
-		witness_assert_owner(tsd, &mutex->witness);
+		witness_assert_owner(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded && config_debug)
-		witness_assert_not_owner(tsd, &mutex->witness);
+		witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 28996206..f2b6a55d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -496,8 +496,6 @@ tcache_alloc_easy
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
@@ -543,6 +541,7 @@ tsd_boot
 tsd_boot0
 tsd_boot1
 tsd_booted
+tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
@@ -581,12 +580,16 @@ tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_tsdn
 tsd_witness_fork_get
 tsd_witness_fork_set
 tsd_witness_forkp_get
 tsd_witnesses_get
 tsd_witnesses_set
 tsd_witnessesp_get
+tsdn_fetch
+tsdn_null
+tsdn_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 4fe17875..691e153d 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,7 +281,7 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
@@ -293,33 +293,33 @@ size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(tsd_t *, bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(tsd_t *tsd);
+void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
-void	prof_gdump(tsd_t *tsd);
-prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
+void	prof_gdump(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsd_t *tsd, size_t lg_sample);
+void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
 const char	*prof_thread_name_get(tsd_t *tsd);
-bool	prof_active_get(tsd_t *tsd);
-bool	prof_active_set(tsd_t *tsd, bool active);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
 bool	prof_thread_active_get(tsd_t *tsd);
 bool	prof_thread_active_set(tsd_t *tsd, bool active);
-bool	prof_thread_active_init_get(tsd_t *tsd);
-bool	prof_thread_active_init_set(tsd_t *tsd, bool active_init);
-bool	prof_gdump_get(tsd_t *tsd);
-bool	prof_gdump_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork0(tsd_t *tsd);
-void	prof_prefork1(tsd_t *tsd);
-void	prof_postfork_parent(tsd_t *tsd);
-void	prof_postfork_child(tsd_t *tsd);
+bool	prof_boot2(tsdn_t *tsdn);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -330,16 +330,16 @@ void	prof_sample_threshold_update(prof_tdata_t *tdata);
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-void	prof_malloc(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
@@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd);
+				tdata = prof_tdata_init(tsd_tsdn(tsd));
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
@@ -398,34 +398,34 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsd_t *tsd, const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(tsd, ptr));
+	return (arena_prof_tctx_get(tsdn, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -480,17 +480,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsd, ptr, true));
+	assert(usize == isalloc(tsdn, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	else
-		prof_tctx_set(tsd, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -504,7 +504,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd, ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -521,9 +521,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	else
-		prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
@@ -532,10 +532,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(tsd, ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(tsd, ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 59f60235..70883b1a 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -130,27 +130,25 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(tsd_t *tsd, const void *ptr);
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache,
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *oldarena, arena_t *newarena);
-void	tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsd_t *tsd);
+bool	tcache_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -297,8 +295,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
-			&tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false)
 			return (NULL);
 	}
@@ -310,7 +308,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(tsd, ret) == usize);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
 	if (likely(!zero)) {
@@ -358,7 +356,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd, arena, binind, zero);
+		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
@@ -407,7 +405,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(tsd, ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
@@ -434,8 +432,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(tsd, ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd, ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
@@ -460,7 +458,8 @@ tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
+		    NULL));
 	}
 	return (elm->tcache);
 }
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4a99ee6e..bf113411 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -13,6 +13,9 @@ typedef struct tsd_init_head_s tsd_init_head_t;
 #endif
 
 typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+
+#define	TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
@@ -44,6 +47,7 @@ typedef enum {
  * The result is a set of generated functions, e.g.:
  *
  *   bool example_tsd_boot(void) {...}
+ *   bool example_tsd_booted_get(void) {...}
  *   example_t *example_tsd_get() {...}
  *   void example_tsd_set(example_t *val) {...}
  *
@@ -98,6 +102,8 @@ a_attr void								\
 a_name##tsd_boot1(void);						\
 a_attr bool								\
 a_name##tsd_boot(void);							\
+a_attr bool								\
+a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
 a_name##tsd_get(void);							\
 a_attr void								\
@@ -201,6 +207,12 @@ a_name##tsd_boot(void)							\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -246,6 +258,12 @@ a_name##tsd_boot(void)							\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -368,6 +386,12 @@ a_name##tsd_boot(void)							\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -490,6 +514,12 @@ a_name##tsd_boot(void)							\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -571,6 +601,15 @@ MALLOC_TSD
 #undef O
 };
 
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
 
 malloc_tsd_types(, tsd_t)
@@ -601,6 +640,7 @@ void	tsd_cleanup(void *arg);
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
 tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
 #define	O(n, t)								\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
@@ -608,6 +648,9 @@ t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -634,6 +677,13 @@ tsd_fetch(void)
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
@@ -665,6 +715,32 @@ tsd_##n##_set(tsd_t *tsd, t n)						\
 }
 MALLOC_TSD
 #undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch()));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index 2667bf5e..1a868082 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -30,17 +30,17 @@
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
 	if (unlikely(in_valgrind && cond)) {				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsd, ptr),	\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
 		    zero);						\
 	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(tsd, ptr);				\
+		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
@@ -83,8 +83,8 @@
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index b2e6e825..4d312eab 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -75,23 +75,23 @@ void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *witness_lock_error;
 #endif
-void	witness_lock(tsd_t *tsd, witness_t *witness);
-void	witness_unlock(tsd_t *tsd, witness_t *witness);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *witness_owner_error;
 #endif
-void	witness_assert_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *witness_not_owner_error;
 #endif
-void	witness_assert_not_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_lockless_error_t)(const witness_list_t *);
 extern witness_lockless_error_t *witness_lockless_error;
 #endif
-void	witness_assert_lockless(tsd_t *tsd);
+void	witness_assert_lockless(tsdn_t *tsdn);
 
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
diff --git a/src/arena.c b/src/arena.c
index 992d96f5..c605bcd3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,11 +37,11 @@ static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
  * definition.
  */
 
-static void	arena_purge_to_limit(tsd_t *tsd, arena_t *arena,
+static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
-static void	arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run,
+static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
     bool dirty, bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena,
+static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
@@ -592,7 +592,7 @@ arena_chunk_init_spare(arena_t *arena)
 }
 
 static bool
-arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     bool zero)
 {
 
@@ -604,61 +604,62 @@ arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(tsd, chunk, &chunk->node));
+	return (chunk_register(tsdn, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsd, arena, chunk_hooks,
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(tsd, arena, chunk_hooks,
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
 			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsd, arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
+    bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, chunksize,
+	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, zero, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(tsd, arena, chunk, *zero)) {
-			chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
 		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(tsd, arena,
+		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
 		    &chunk_hooks, zero, commit);
 	}
 
@@ -671,7 +672,7 @@ arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
+arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
@@ -681,7 +682,7 @@ arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(tsd, arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
@@ -726,14 +727,14 @@ arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
+arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(tsd, arena);
+		chunk = arena_chunk_init_hard(tsdn, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
@@ -746,7 +747,7 @@ arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 }
 
 static void
-arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
@@ -761,12 +762,12 @@ arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 		 * chunk as committed has a high potential for causing later
 		 * access of decommitted memory.
 		 */
-		chunk_hooks = chunk_hooks_get(tsd, arena);
+		chunk_hooks = chunk_hooks_get(tsdn, arena);
 		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
 		    arena->ind);
 	}
 
-	chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)chunk, chunksize,
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
 	    committed);
 
 	if (config_stats) {
@@ -776,7 +777,7 @@ arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 }
 
 static void
-arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
+arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare)
 {
 
 	assert(arena->spare != spare);
@@ -786,11 +787,11 @@ arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
 		    chunk_npages-map_bias);
 	}
 
-	arena_chunk_discard(tsd, arena, spare);
+	arena_chunk_discard(tsdn, arena, spare);
 }
 
 static void
-arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
 	arena_chunk_t *spare;
 
@@ -812,7 +813,7 @@ arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 	spare = arena->spare;
 	arena->spare = chunk;
 	if (spare != NULL)
-		arena_spare_discard(tsd, arena, spare);
+		arena_spare_discard(tsdn, arena, spare);
 }
 
 static void
@@ -896,64 +897,64 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
 }
 
 extent_node_t *
-arena_node_alloc(tsd_t *tsd, arena_t *arena)
+arena_node_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
-		return (base_alloc(tsd, sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
     size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(tsd, arena, chunk_hooks, NULL, csize,
+	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
 	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -962,11 +963,11 @@ arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, csize,
+	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
 	    alignment, zero, true);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(tsd, arena, &chunk_hooks,
+		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
 		    usize, alignment, zero, csize);
 	}
 
@@ -974,49 +975,49 @@ arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
@@ -1029,35 +1030,35 @@ arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 		    true);
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
     bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
 		    *zero, true);
 		err = true;
 	}
@@ -1065,16 +1066,16 @@ arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
 }
 
 bool
-arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -1083,16 +1084,16 @@ arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, true) == NULL);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(tsd, arena,
+		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
 		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 		    *zero, true);
 		err = true;
 	}
@@ -1133,7 +1134,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1149,7 +1150,7 @@ arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsd, arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
@@ -1177,7 +1178,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1194,7 +1195,7 @@ arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsd, arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
@@ -1219,28 +1220,28 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 }
 
 ssize_t
-arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena)
+arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
@@ -1397,25 +1398,25 @@ arena_decay_time_valid(ssize_t decay_time)
 }
 
 ssize_t
-arena_decay_time_get(tsd_t *tsd, arena_t *arena)
+arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	decay_time = arena->decay_time;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -1425,14 +1426,14 @@ arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
@@ -1455,12 +1456,12 @@ arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(tsd, arena, threshold);
+		arena_purge_to_limit(tsdn, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
 	size_t ndirty_limit;
@@ -1470,7 +1471,7 @@ arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay_time <= 0) {
 		if (arena->decay_time == 0)
-			arena_purge_to_limit(tsd, arena, 0);
+			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
@@ -1491,11 +1492,11 @@ arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 	 */
 	if (arena->ndirty <= ndirty_limit)
 		return;
-	arena_purge_to_limit(tsd, arena, ndirty_limit);
+	arena_purge_to_limit(tsdn, arena, ndirty_limit);
 }
 
 void
-arena_maybe_purge(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
@@ -1503,9 +1504,9 @@ arena_maybe_purge(tsd_t *tsd, arena_t *arena)
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(tsd, arena);
+		arena_maybe_purge_ratio(tsdn, arena);
 	else
-		arena_maybe_purge_decay(tsd, arena);
+		arena_maybe_purge_decay(tsdn, arena);
 }
 
 static size_t
@@ -1543,7 +1544,7 @@ arena_dirty_count(arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1574,7 +1575,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(tsd, arena, chunk_hooks,
+			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
@@ -1609,7 +1610,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(tsd, arena);
+				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
@@ -1633,7 +1634,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static size_t
-arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1645,7 +1646,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
@@ -1684,7 +1685,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(tsd, arena,
+				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
@@ -1715,7 +1716,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1726,7 +1727,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1746,9 +1747,9 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(tsd, arena, chunkselm);
+			arena_node_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(tsd, arena, chunk_hooks, addr,
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
 			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
@@ -1760,7 +1761,7 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(tsd, arena, run, false, true,
+			arena_run_dalloc(tsdn, arena, run, false, true,
 			    decommitted);
 		}
 	}
@@ -1776,9 +1777,9 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
@@ -1799,14 +1800,14 @@ arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(tsd, arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(tsd, arena, &chunk_hooks,
+	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(tsd, arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
@@ -1817,15 +1818,15 @@ label_return:
 }
 
 void
-arena_purge(tsd_t *tsd, arena_t *arena, bool all)
+arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (all)
-		arena_purge_to_limit(tsd, arena, 0);
+		arena_purge_to_limit(tsdn, arena, 0);
 	else
-		arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+		arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static void
@@ -1845,7 +1846,8 @@ arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				void *ptr = (void *)((uintptr_t)chunk + (pageind
 				    << LG_PAGE));
-				size_t usize = isalloc(tsd, ptr, config_prof);
+				size_t usize = isalloc(tsd_tsdn(tsd), ptr,
+				    config_prof);
 
 				prof_free(tsd, ptr, usize);
 				npages = arena_mapbits_large_size_get(chunk,
@@ -1902,39 +1904,39 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	}
 
 	/* Huge allocations. */
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
 	    ql_last(&arena->huge, ql_link)) {
 		void *ptr = extent_node_addr_get(node);
 		size_t usize;
 
-		malloc_mutex_unlock(tsd, &arena->huge_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
 		if (config_stats || (config_prof && opt_prof))
-			usize = isalloc(tsd, ptr, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		/* Remove huge allocation from prof sample set. */
 		if (config_prof && opt_prof)
 			prof_free(tsd, ptr, usize);
-		huge_dalloc(tsd, ptr);
-		malloc_mutex_lock(tsd, &arena->huge_mtx);
+		huge_dalloc(tsd_tsdn(tsd), ptr);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats)
 			arena_huge_reset_stats_cancel(arena, usize);
 	}
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 
 	/* Bins. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->runcur = NULL;
 		arena_run_heap_new(&bin->runs);
 		if (config_stats) {
 			bin->stats.curregs = 0;
 			bin->stats.curruns = 0;
 		}
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	/*
@@ -1952,12 +1954,13 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
 	    ql_last(&arena->achunks, ql_link)) {
 		ql_remove(&arena->achunks, node, ql_link);
-		arena_chunk_discard(tsd, arena, extent_node_addr_get(node));
+		arena_chunk_discard(tsd_tsdn(tsd), arena,
+		    extent_node_addr_get(node));
 	}
 
 	/* Spare. */
 	if (arena->spare != NULL) {
-		arena_chunk_discard(tsd, arena, arena->spare);
+		arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare);
 		arena->spare = NULL;
 	}
 
@@ -1967,7 +1970,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	for(i = 0; i < runs_avail_nclasses; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
 static void
@@ -2084,7 +2087,7 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
+arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
     bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
@@ -2145,7 +2148,7 @@ arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(tsd, arena, chunk);
+		arena_chunk_dalloc(tsdn, arena, chunk);
 	}
 
 	/*
@@ -2156,11 +2159,11 @@ arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(tsd, arena);
+		arena_maybe_purge(tsdn, arena);
 }
 
 static void
-arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
@@ -2196,12 +2199,12 @@ arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(tsd, arena, run, false, false, (flag_decommitted !=
+	arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
 	    0));
 }
 
 static void
-arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
@@ -2241,7 +2244,7 @@ arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 
 	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(tsd, arena, tail_run, dirty, false, (flag_decommitted
+	arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted
 	    != 0));
 }
 
@@ -2268,7 +2271,7 @@ arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
@@ -2284,19 +2287,19 @@ arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsd, &arena->lock);
-	run = arena_run_alloc_small(tsd, arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
@@ -2319,7 +2322,7 @@ arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
@@ -2328,7 +2331,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(tsd, arena, bin);
+	run = arena_bin_nonfull_run_get(tsdn, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
@@ -2350,7 +2353,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 			if (run->nfree == bin_info->nregs) {
-				arena_dalloc_bin_run(tsd, arena, chunk, run,
+				arena_dalloc_bin_run(tsdn, arena, chunk, run,
 				    bin);
 			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
@@ -2369,7 +2372,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 }
 
 void
-arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
@@ -2377,10 +2380,10 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(tsd, arena, prof_accumbytes))
-		prof_idump(tsd);
+	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes))
+		prof_idump(tsdn);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
@@ -2388,7 +2391,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(tsd, arena, bin);
+			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2415,9 +2418,9 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	tbin->ncached = i;
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 void
@@ -2529,7 +2532,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 }
 
 static void *
-arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
@@ -2540,14 +2543,14 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(tsd, arena, bin);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		return (NULL);
 	}
 
@@ -2556,9 +2559,9 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(tsd, &bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(tsd, arena, usize))
-		prof_idump(tsd);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize))
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2578,12 +2581,12 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		memset(ret, 0, usize);
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2594,7 +2597,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2607,9 +2610,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(tsd, arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
@@ -2627,9 +2630,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (config_prof && idump)
-		prof_idump(tsd);
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2640,29 +2643,32 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		}
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(tsd, arena, ind, zero));
+		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero));
+		return (arena_malloc_large(tsdn, arena, ind, zero));
+	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
 static void *
-arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -2672,19 +2678,21 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
 	alloc_size = usize + large_pad + alignment;
 
-	malloc_mutex_lock(tsd, &arena->lock);
-	run = arena_run_alloc_large(tsd, arena, alloc_size, false);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -2704,11 +2712,11 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(tsd, arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(tsd, arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
@@ -2719,8 +2727,8 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(tsd, arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
@@ -2735,7 +2743,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
@@ -2743,12 +2751,12 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
@@ -2756,7 +2764,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
@@ -2765,25 +2773,25 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		 * the base of the run, so do some bit manipulation to retrieve
 		 * the base.
 		 */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
 		if (likely(usize <= large_maxclass)) {
-			ret = arena_palloc_large(tsd, arena, usize, alignment,
+			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero);
+			ret = huge_malloc(tsdn, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero);
+			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
 }
 
 void
-arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
+arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2792,8 +2800,8 @@ arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsd, ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -2802,8 +2810,8 @@ arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsd, ptr, true) == size);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == size);
 }
 
 static void
@@ -2834,19 +2842,19 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
 
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_run_dalloc(tsd, arena, run, true, false, false);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
@@ -2873,7 +2881,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
@@ -2895,7 +2903,7 @@ arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(tsd, arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2906,15 +2914,15 @@ arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 }
 
 void
-arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
@@ -2924,14 +2932,14 @@ arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
 	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(tsd, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
-arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind)
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
@@ -2941,8 +2949,8 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(tsd, arena, chunk, ptr, pageind, bitselm);
-	arena_decay_tick(tsd, arena);
+	arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsdn, arena);
 }
 
 #ifdef JEMALLOC_JET
@@ -2964,8 +2972,8 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 #endif
 
 static void
-arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, bool junked)
+arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2988,29 +2996,30 @@ arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 		}
 	}
 
-	arena_run_dalloc(tsd, arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true);
 }
 
 void
-arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, false);
-	malloc_mutex_unlock(tsd, &arena->lock);
-	arena_decay_tick(tsd, arena);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	arena_decay_tick(tsdn, arena);
 }
 
 static void
-arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -3024,8 +3033,8 @@ arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_run_trim_tail(tsd, arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
@@ -3043,11 +3052,11 @@ arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -3058,7 +3067,7 @@ arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
@@ -3138,11 +3147,11 @@ arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (true);
 }
 
@@ -3171,7 +3180,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
@@ -3186,16 +3195,16 @@ arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(tsd, arena, chunk, ptr,
+		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
 		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
 				memset((void *)((uintptr_t)ptr + oldsize),
 				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(tsd, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(tsd, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
@@ -3204,12 +3213,12 @@ arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(tsd, arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
@@ -3239,32 +3248,32 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(tsd, ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
+		arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero));
 	}
 }
 
 static void *
-arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 
 	if (alignment == 0)
-		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
-		    tcache, true));
+		return (arena_malloc(tsdn, arena, usize, size2index(usize),
+		    zero, tcache, true));
 	usize = sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
 
 void *
@@ -3282,7 +3291,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0,
+		    zero))
 			return (ptr);
 
 		/*
@@ -3290,8 +3300,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
-		    zero, tcache);
+		ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize,
+		    alignment, zero, tcache);
 		if (ret == NULL)
 			return (NULL);
 
@@ -3312,25 +3322,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 }
 
 dss_prec_t
-arena_dss_prec_get(tsd_t *tsd, arena_t *arena)
+arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (false);
 }
 
@@ -3387,19 +3397,19 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
@@ -3409,7 +3419,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
@@ -3440,12 +3450,12 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
@@ -3457,7 +3467,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
@@ -3483,7 +3493,7 @@ arena_nthreads_dec(arena_t *arena, bool internal)
 }
 
 arena_t *
-arena_new(tsd_t *tsd, unsigned ind)
+arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 	size_t arena_size;
@@ -3497,11 +3507,12 @@ arena_new(tsd_t *tsd, unsigned ind)
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(tsd, CACHELINE_CEILING(arena_size)
-		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
-		    nhclasses) * sizeof(malloc_huge_stats_t));
+		arena = (arena_t *)base_alloc(tsdn,
+		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
+		    sizeof(malloc_large_stats_t) + nhclasses) *
+		    sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(tsd, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, arena_size);
 	if (arena == NULL)
 		return (NULL);
 
@@ -3540,7 +3551,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get(tsd);
+	arena->dss_prec = chunk_dss_prec_get(tsdn);
 
 	ql_new(&arena->achunks);
 
@@ -3823,58 +3834,58 @@ arena_boot(void)
 }
 
 void
-arena_prefork0(tsd_t *tsd, arena_t *arena)
+arena_prefork0(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->lock);
+	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
-arena_prefork1(tsd_t *tsd, arena_t *arena)
+arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+	malloc_mutex_prefork(tsdn, &arena->chunks_mtx);
 }
 
 void
-arena_prefork2(tsd_t *tsd, arena_t *arena)
+arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
+	malloc_mutex_prefork(tsdn, &arena->node_cache_mtx);
 }
 
 void
-arena_prefork3(tsd_t *tsd, arena_t *arena)
+arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
-	malloc_mutex_prefork(tsd, &arena->huge_mtx);
+		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsdn, &arena->huge_mtx);
 }
 
 void
-arena_postfork_parent(tsd_t *tsd, arena_t *arena)
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
-	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->lock);
+		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
 void
-arena_postfork_child(tsd_t *tsd, arena_t *arena)
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
-	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->lock);
+		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 901553a1..81b0801f 100644
--- a/src/base.c
+++ b/src/base.c
@@ -14,11 +14,11 @@ static size_t		base_mapped;
 /******************************************************************************/
 
 static extent_node_t *
-base_node_try_alloc(tsd_t *tsd)
+base_node_try_alloc(tsdn_t *tsdn)
 {
 	extent_node_t *node;
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
 	if (base_nodes == NULL)
 		return (NULL);
@@ -29,10 +29,10 @@ base_node_try_alloc(tsd_t *tsd)
 }
 
 static void
-base_node_dalloc(tsd_t *tsd, extent_node_t *node)
+base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 {
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
@@ -40,22 +40,22 @@ base_node_dalloc(tsd_t *tsd, extent_node_t *node)
 }
 
 static extent_node_t *
-base_chunk_alloc(tsd_t *tsd, size_t minsize)
+base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc(tsd);
+	node = base_node_try_alloc(tsdn);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(tsd, node);
+			base_node_dalloc(tsdn, node);
 		return (NULL);
 	}
 	base_mapped += csize;
@@ -78,7 +78,7 @@ base_chunk_alloc(tsd_t *tsd, size_t minsize)
  * physical memory usage.
  */
 void *
-base_alloc(tsd_t *tsd, size_t size)
+base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
@@ -93,14 +93,14 @@ base_alloc(tsd_t *tsd, size_t size)
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(tsd, &base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(tsd, csize);
+		node = base_chunk_alloc(tsdn, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
@@ -113,7 +113,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(tsd, node);
+		base_node_dalloc(tsdn, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*
@@ -125,21 +125,22 @@ base_alloc(tsd_t *tsd, size_t size)
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(tsd, &base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped)
 {
 
-	malloc_mutex_lock(tsd, &base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(tsd, &base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 }
 
 bool
@@ -155,22 +156,22 @@ base_boot(void)
 }
 
 void
-base_prefork(tsd_t *tsd)
+base_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(tsd, &base_mtx);
+	malloc_mutex_prefork(tsdn, &base_mtx);
 }
 
 void
-base_postfork_parent(tsd_t *tsd)
+base_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(tsd, &base_mtx);
+	malloc_mutex_postfork_parent(tsdn, &base_mtx);
 }
 
 void
-base_postfork_child(tsd_t *tsd)
+base_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(tsd, &base_mtx);
+	malloc_mutex_postfork_child(tsdn, &base_mtx);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 1f2afd9d..adc666ff 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -49,7 +49,7 @@ const chunk_hooks_t	chunk_hooks_default = {
  * definition.
  */
 
-static void	chunk_record(tsd_t *tsd, arena_t *arena,
+static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
     extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
     bool committed);
@@ -64,23 +64,23 @@ chunk_hooks_get_locked(arena_t *arena)
 }
 
 chunk_hooks_t
-chunk_hooks_get(tsd_t *tsd, arena_t *arena)
+chunk_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -105,13 +105,13 @@ chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
@@ -120,28 +120,28 @@ chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(tsd, arena);
+		    chunk_hooks_get(tsdn, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
+chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -161,7 +161,7 @@ chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsd);
+			prof_gdump(tsdn);
 	}
 
 	return (false);
@@ -199,7 +199,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 }
 
 static void *
-chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
@@ -221,8 +221,8 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
@@ -234,7 +234,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
@@ -253,7 +253,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
@@ -273,19 +273,19 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(tsd, arena, node);
-			malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-			chunk_record(tsd, arena, chunk_hooks, chunks_szad,
+				arena_node_dalloc(tsdn, arena, node);
+			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
 			    chunks_ad, cache, ret, size + trailsize, zeroed,
 			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(tsd, arena);
+			node = arena_node_alloc(tsdn, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-				chunk_record(tsd, arena, chunk_hooks,
+				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+				chunk_record(tsdn, arena, chunk_hooks,
 				    chunks_szad, chunks_ad, cache, ret, size +
 				    trailsize, zeroed, committed);
 				return (NULL);
@@ -299,16 +299,16 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-		chunk_record(tsd, arena, chunk_hooks, chunks_szad, chunks_ad,
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
 		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(tsd, arena, node);
+		arena_node_dalloc(tsdn, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -331,7 +331,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
@@ -343,7 +343,7 @@ chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
@@ -352,7 +352,7 @@ chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL)
 		return (ret);
 
@@ -383,7 +383,7 @@ chunk_alloc_base(size_t size)
 }
 
 void *
-chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
 {
 	void *ret;
@@ -395,9 +395,9 @@ chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
-	    &commit, dalloc_node);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
+	    new_addr, size, alignment, zero, &commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -407,11 +407,11 @@ chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static arena_t *
-chunk_arena_get(tsd_t *tsd, unsigned arena_ind)
+chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsdn, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
@@ -425,12 +425,12 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	arena_t *arena;
 
-	tsd = tsd_fetch();
-	arena = chunk_arena_get(tsd, arena_ind);
-	ret = chunk_alloc_core(tsd, arena, new_addr, size, alignment, zero,
+	tsdn = tsdn_fetch();
+	arena = chunk_arena_get(tsdn, arena_ind);
+	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
@@ -441,7 +441,7 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 static void *
-chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
@@ -451,7 +451,7 @@ chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = chunk_recycle(tsd, arena, chunk_hooks,
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
 	    new_addr, size, alignment, zero, commit, true);
 
@@ -462,14 +462,14 @@ chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 void *
-chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(tsd, arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
 		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
@@ -484,7 +484,7 @@ chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
@@ -496,8 +496,8 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
@@ -522,7 +522,7 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(tsd, arena);
+		node = arena_node_alloc(tsdn, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
@@ -531,7 +531,7 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(tsd, arena, chunk_hooks,
+				chunk_purge_wrapper(tsdn, arena, chunk_hooks,
 				    chunk, size, 0, size);
 			}
 			goto label_return;
@@ -568,15 +568,15 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(tsd, arena, prev);
+		arena_node_dalloc(tsdn, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool committed)
 {
 
@@ -585,9 +585,9 @@ chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(tsd, arena);
+	arena_maybe_purge(tsdn, arena);
 }
 
 static bool
@@ -595,13 +595,13 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(tsd_fetch(), chunk))
+	if (!have_dss || !chunk_in_dss(tsdn_fetch(), chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
 
 void
-chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
 
@@ -610,7 +610,7 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
 	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
 		return;
@@ -621,7 +621,7 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 
 	if (config_stats)
@@ -662,11 +662,11 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_purge_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
@@ -688,8 +688,8 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 	if (!maps_coalesce)
 		return (true);
 	if (have_dss) {
-		tsd_t *tsd = tsd_fetch();
-		if (chunk_in_dss(tsd, chunk_a) != chunk_in_dss(tsd, chunk_b))
+		tsdn_t *tsdn = tsdn_fetch();
+		if (chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, chunk_b))
 			return (true);
 	}
 
@@ -700,7 +700,7 @@ static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(tsd_fetch(), nelms *
+	return ((rtree_node_elm_t *)base_alloc(tsdn_fetch(), nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
@@ -747,22 +747,22 @@ chunk_boot(void)
 }
 
 void
-chunk_prefork(tsd_t *tsd)
+chunk_prefork(tsdn_t *tsdn)
 {
 
-	chunk_dss_prefork(tsd);
+	chunk_dss_prefork(tsdn);
 }
 
 void
-chunk_postfork_parent(tsd_t *tsd)
+chunk_postfork_parent(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_parent(tsd);
+	chunk_dss_postfork_parent(tsdn);
 }
 
 void
-chunk_postfork_child(tsd_t *tsd)
+chunk_postfork_child(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_child(tsd);
+	chunk_dss_postfork_child(tsdn);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 3b3f2433..0b1f82bd 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -41,32 +41,32 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(tsd_t *tsd)
+chunk_dss_prec_get(tsdn_t *tsdn)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	ret = dss_prec_default;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec)
+chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (false);
 }
 
 void *
-chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
@@ -80,7 +80,7 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if (dss_prev != (void *)-1) {
 
 		/*
@@ -122,7 +122,7 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
 				/* Wrap-around. */
-				malloc_mutex_unlock(tsd, &dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				return (NULL);
 			}
 			incr = gap_size + cpad_size + size;
@@ -130,11 +130,11 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
-				malloc_mutex_unlock(tsd, &dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(tsd, arena,
+					chunk_dalloc_wrapper(tsdn, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
@@ -149,25 +149,25 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			}
 		} while (dss_prev != (void *)-1);
 	}
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (NULL);
 }
 
 bool
-chunk_in_dss(tsd_t *tsd, void *chunk)
+chunk_in_dss(tsdn_t *tsdn, void *chunk)
 {
 	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (ret);
 }
@@ -188,27 +188,27 @@ chunk_dss_boot(void)
 }
 
 void
-chunk_dss_prefork(tsd_t *tsd)
+chunk_dss_prefork(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_prefork(tsd, &dss_mtx);
+		malloc_mutex_prefork(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_parent(tsd_t *tsd)
+chunk_dss_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_parent(tsd, &dss_mtx);
+		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_child(tsd_t *tsd)
+chunk_dss_postfork_child(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_child(tsd, &dss_mtx);
+		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/ckh.c b/src/ckh.c
index 25185974..747c1c86 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
+static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -244,7 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsd_t *tsd, ckh_t *ckh)
+ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -270,8 +270,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsd, NULL));
+		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+		    true, arena_ichoose(tsdn, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, NULL, true, true);
+			idalloctm(tsdn, tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, NULL, true, true);
+		idalloctm(tsdn, ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -299,7 +299,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsd_t *tsd, ckh_t *ckh)
+ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -314,8 +314,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsd, NULL));
+	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
+	    arena_ichoose(tsdn, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, NULL, true, true);
+		idalloctm(tsdn, tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -347,7 +347,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 }
 
 bool
-ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -391,8 +391,8 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsd, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsdn, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -404,7 +404,7 @@ label_return:
 }
 
 void
-ckh_delete(tsd_t *tsd, ckh_t *ckh)
+ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@@ -456,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -468,7 +468,7 @@ ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsd, ckh)) {
+		if (ckh_grow(tsdn, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -480,7 +480,7 @@ label_return:
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -502,7 +502,7 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsd, ckh);
+			ckh_shrink(tsdn, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index fd5561a3..dad80086 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -46,20 +46,20 @@ static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
     void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(tsd_t *tsd,			\
+static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
     const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i);
-static bool	ctl_grow(tsd_t *tsd);
-static void	ctl_refresh(tsd_t *tsd);
-static bool	ctl_init(tsd_t *tsd);
-static int	ctl_lookup(tsd_t *tsd, const char *name,
+static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsdn_t *tsdn);
+static void	ctl_refresh(tsdn_t *tsdn);
+static bool	ctl_init(tsdn_t *tsdn);
+static int	ctl_lookup(tsdn_t *tsdn, const char *name,
     ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
@@ -117,7 +117,7 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
+static void	arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_reset)
@@ -560,12 +560,12 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(tsd, arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
@@ -578,7 +578,7 @@ ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(tsd, arena, &cstats->nthreads,
+		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
 		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
@@ -656,24 +656,24 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i)
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(tsd, astats, arena);
+	ctl_arena_stats_amerge(tsdn, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(tsd_t *tsd)
+ctl_grow(tsdn_t *tsdn)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(tsd, ctl_stats.narenas) == NULL)
+	if (arena_init(tsdn, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -708,7 +708,7 @@ ctl_grow(tsd_t *tsd)
 }
 
 static void
-ctl_refresh(tsd_t *tsd)
+ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -720,19 +720,19 @@ ctl_refresh(tsd_t *tsd)
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(tsd, i, false);
+		tarenas[i] = arena_get(tsdn, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tsd, tarenas[i], i);
+			ctl_arena_refresh(tsdn, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(tsd, &base_allocated, &base_resident,
+		base_stats_get(tsdn, &base_allocated, &base_resident,
 		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
@@ -758,11 +758,11 @@ ctl_refresh(tsd_t *tsd)
 }
 
 static bool
-ctl_init(tsd_t *tsd)
+ctl_init(tsdn_t *tsdn)
 {
 	bool ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
@@ -804,18 +804,18 @@ ctl_init(tsd_t *tsd)
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh(tsd);
+		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
+ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
     size_t *mibp, size_t *depthp)
 {
 	int ret;
@@ -868,7 +868,7 @@ ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd, mibp, *depthp, (size_t)index);
+			node = inode->index(tsdn, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -921,13 +921,13 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd, name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
@@ -944,16 +944,16 @@ label_return:
 }
 
 int
-ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsdn)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd, name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
@@ -966,7 +966,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -988,7 +988,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd, mib, miblen, mib[i]);
+			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -1021,24 +1021,24 @@ ctl_boot(void)
 }
 
 void
-ctl_prefork(tsd_t *tsd)
+ctl_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(tsd, &ctl_mtx);
+	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(tsd_t *tsd)
+ctl_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(tsd, &ctl_mtx);
+	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(tsd_t *tsd)
+ctl_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(tsd, &ctl_mtx);
+	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
 /******************************************************************************/
@@ -1104,7 +1104,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(tsd, &ctl_mtx);			\
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1112,7 +1112,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(tsd, &ctl_mtx);			\
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
 	return (ret);							\
 }
 
@@ -1126,14 +1126,14 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(tsd, &ctl_mtx);				\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(tsd, &ctl_mtx);				\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
@@ -1145,14 +1145,14 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(tsd, &ctl_mtx);				\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(tsd, &ctl_mtx);				\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
@@ -1243,15 +1243,15 @@ epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh(tsd);
+		ctl_refresh(tsd_tsdn(tsd));
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1317,7 +1317,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
@@ -1331,7 +1331,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(tsd, newind, true);
+		newarena = arena_get(tsd_tsdn(tsd), newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1341,15 +1341,15 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tsd, tcache, oldarena,
-				    newarena);
+				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+				    oldarena, newarena);
 			}
 		}
 	}
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1476,9 +1476,9 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (!config_tcache)
 		return (ENOENT);
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd, &tcache_ind)) {
+	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -1486,7 +1486,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1541,10 +1541,10 @@ label_return:
 /******************************************************************************/
 
 static void
-arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
+arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
@@ -1553,30 +1553,30 @@ arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(tsd, i, false);
+				tarenas[i] = arena_get(tsdn, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(tsd, &ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tsd, tarenas[i], all);
+					arena_purge(tsdn, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(tsd, arena_ind, false);
+			tarena = arena_get(tsdn, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(tsd, &ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tsd, tarena, all);
+				arena_purge(tsdn, tarena, all);
 		}
 	}
 }
@@ -1589,7 +1589,7 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd, (unsigned)mib[1], true);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1604,7 +1604,7 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd, (unsigned)mib[1], false);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1630,13 +1630,13 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	arena_ind = (unsigned)mib[1];
 	if (config_debug) {
-		malloc_mutex_lock(tsd, &ctl_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 		assert(arena_ind < ctl_stats.narenas);
-		malloc_mutex_unlock(tsd, &ctl_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	}
 	assert(arena_ind >= opt_narenas);
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 
 	arena_reset(tsd, arena);
 
@@ -1655,7 +1655,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
@@ -1676,20 +1676,20 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(tsd, arena_ind, false);
+		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(tsd, arena, dss_prec))) {
+		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(tsd, arena);
+		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd, dss_prec)) {
+		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd);
+		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1697,7 +1697,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1709,14 +1709,14 @@ arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(tsd, arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1724,7 +1724,8 @@ arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(tsd, arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1743,14 +1744,14 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(tsd, arena);
+		size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1758,7 +1759,8 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(tsd, arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1777,18 +1779,18 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd, arena_ind, false)) != NULL) {
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(tsd, arena,
+			old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(tsd,
-			    arena);
+			chunk_hooks_t old_chunk_hooks =
+			    chunk_hooks_get(tsd_tsdn(tsd), arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
@@ -1797,16 +1799,16 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
@@ -1814,7 +1816,7 @@ arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
@@ -1827,7 +1829,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
@@ -1838,7 +1840,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1849,7 +1851,7 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
@@ -1864,7 +1866,7 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1929,7 +1931,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
@@ -1940,7 +1942,7 @@ arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
@@ -1952,7 +1954,7 @@ CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
@@ -1967,9 +1969,9 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (ctl_grow(tsd)) {
+	if (ctl_grow(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1978,7 +1980,7 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1999,9 +2001,10 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(tsd, *(bool *)newp);
+		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
+		    *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get(tsd);
+		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2024,9 +2027,9 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_active_set(tsd, *(bool *)newp);
+		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_active_get(tsd);
+		oldval = prof_active_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2072,9 +2075,9 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_gdump_set(tsd, *(bool *)newp);
+		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_gdump_get(tsd);
+		oldval = prof_gdump_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2097,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	prof_reset(tsd, lg_sample);
+	prof_reset(tsd_tsdn(tsd), lg_sample);
 
 	ret = 0;
 label_return:
@@ -2185,7 +2188,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2204,7 +2207,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2224,7 +2227,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2234,11 +2237,11 @@ stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
@@ -2246,6 +2249,6 @@ stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index 71fb50c5..0bf61622 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,12 @@ huge_node_get(const void *ptr)
 }
 
 static bool
-huge_node_set(tsd_t *tsd, const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(tsd, ptr, node));
+	return (chunk_register(tsdn, ptr, node));
 }
 
 static void
@@ -31,16 +31,16 @@ huge_node_unset(const void *ptr, const extent_node_t *node)
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero)
+huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero));
+	return (huge_palloc(tsdn, arena, usize, chunksize, zero));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -50,15 +50,17 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Allocate one or more contiguous chunks for this request. */
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	assert(tsd != NULL || arena != NULL);
-	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsd, arena));
+	assert(tsdn != NULL || arena != NULL);
+	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
+	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
 	if (node == NULL)
 		return (NULL);
 
@@ -67,26 +69,26 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
-	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, NULL, true, true);
+	arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(tsd, ret, node)) {
-		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
-		idalloctm(tsd, node, NULL, true, true);
+	if (huge_node_set(tsdn, ret, node)) {
+		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
@@ -94,7 +96,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	} else if (config_fill && unlikely(opt_junk_alloc))
 		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
@@ -103,7 +105,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
+huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -111,7 +113,7 @@ huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsd, ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -122,7 +124,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
+huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
@@ -151,22 +153,22 @@ huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
 			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
 			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
 			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(tsd, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
@@ -183,7 +185,8 @@ huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
 }
 
 static bool
-huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -194,7 +197,7 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
 
@@ -207,11 +210,11 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsd, (void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
 			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
 			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
@@ -219,31 +222,31 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(tsd, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
-    bool zero) {
+huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -251,14 +254,14 @@ huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
 	 */
 	is_zeroed_chunk = zero;
 
-	if (arena_chunk_ralloc_huge_expand(tsd, arena, ptr, oldsize, usize,
+	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -279,7 +282,7 @@ huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
 }
 
 bool
-huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
@@ -293,16 +296,16 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(tsd, ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max,
 		    zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsd,
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
 		    ptr, oldsize, usize_min, zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -313,16 +316,17 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(tsd, ptr, oldsize, usize_min,
+		huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero);
-		arena_decay_tick(tsd, huge_aalloc(ptr));
+		arena_decay_tick(tsdn, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(tsd, ptr, oldsize, usize_max)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize,
+		    usize_max)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -330,18 +334,18 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 }
 
 static void *
-huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero));
-	return (huge_palloc(tsd, arena, usize, alignment, zero));
+		return (huge_malloc(tsdn, arena, usize, zero));
+	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
@@ -350,7 +354,8 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize,
+	    zero))
 		return (ptr);
 
 	/*
@@ -358,7 +363,8 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero);
+	ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment,
+	    zero);
 	if (ret == NULL)
 		return (NULL);
 
@@ -369,7 +375,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr)
+huge_dalloc(tsdn_t *tsdn, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -377,17 +383,17 @@ huge_dalloc(tsd_t *tsd, void *ptr)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsd, extent_node_addr_get(node),
+	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, NULL, true, true);
+	idalloctm(tsdn, node, NULL, true, true);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 arena_t *
@@ -398,7 +404,7 @@ huge_aalloc(const void *ptr)
 }
 
 size_t
-huge_salloc(tsd_t *tsd, const void *ptr)
+huge_salloc(tsdn_t *tsdn, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
@@ -406,15 +412,15 @@ huge_salloc(tsd_t *tsd, const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
@@ -422,29 +428,29 @@ huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(tsd_t *tsd, const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr)
 {
 
-	huge_prof_tctx_set(tsd, ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b1d691ed..40eb2eaa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -318,15 +318,15 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, NULL, is_metadata,
-	    arena_get(NULL, 0, true), true));
+	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
 a0idalloc(void *ptr, bool is_metadata)
 {
 
-	idalloctm(NULL, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }
 
 void *
@@ -413,7 +413,7 @@ narenas_total_get(void)
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsd_t *tsd, unsigned ind)
+arena_init_locked(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
@@ -427,26 +427,26 @@ arena_init_locked(tsd_t *tsd, unsigned ind)
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(tsd, ind);
+	arena = arena_new(tsdn, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(tsd_t *tsd, unsigned ind)
+arena_init(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(tsd, &arenas_lock);
-	arena = arena_init_locked(tsd, ind);
-	malloc_mutex_unlock(tsd, &arenas_lock);
+	malloc_mutex_lock(tsdn, &arenas_lock);
+	arena = arena_init_locked(tsdn, ind);
+	malloc_mutex_unlock(tsdn, &arenas_lock);
 	return (arena);
 }
 
@@ -455,7 +455,7 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
 	if (tsd_nominal(tsd)) {
@@ -471,8 +471,8 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(tsd, oldind, false);
-	newarena = arena_get(tsd, newind, false);
+	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
+	newarena = arena_get(tsd_tsdn(tsd), newind, false);
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
@@ -483,7 +483,7 @@ arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_dec(arena, internal);
 	if (internal)
 		tsd_iarena_set(tsd, NULL);
@@ -588,19 +588,20 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 			choose[j] = 0;
 
 		first_null = narenas_auto;
-		malloc_mutex_lock(tsd, &arenas_lock);
-		assert(arena_get(tsd, 0, false) != NULL);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
+		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(tsd, i, false) != NULL) {
+			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
-					if (arena_nthreads_get(arena_get(tsd, i,
-					    false), !!j) <
-					    arena_nthreads_get(arena_get(tsd,
-					    choose[j], false), !!j))
+					if (arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), i, false), !!j) <
+					    arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), choose[j], false),
+					    !!j))
 						choose[j] = i;
 				}
 			} else if (first_null == narenas_auto) {
@@ -618,22 +619,27 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 		}
 
 		for (j = 0; j < 2; j++) {
-			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
-			    !!j) == 0 || first_null == narenas_auto) {
+			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
+			    choose[j], false), !!j) == 0 || first_null ==
+			    narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
-				if (!!j == internal)
-					ret = arena_get(tsd, choose[j], false);
+				if (!!j == internal) {
+					ret = arena_get(tsd_tsdn(tsd),
+					    choose[j], false);
+				}
 			} else {
 				arena_t *arena;
 
 				/* Initialize a new arena. */
 				choose[j] = first_null;
-				arena = arena_init_locked(tsd, choose[j]);
+				arena = arena_init_locked(tsd_tsdn(tsd),
+				    choose[j]);
 				if (arena == NULL) {
-					malloc_mutex_unlock(tsd, &arenas_lock);
+					malloc_mutex_unlock(tsd_tsdn(tsd),
+					    &arenas_lock);
 					return (NULL);
 				}
 				if (!!j == internal)
@@ -641,9 +647,9 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 			}
 			arena_bind(tsd, choose[j], !!j);
 		}
-		malloc_mutex_unlock(tsd, &arenas_lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
 	} else {
-		ret = arena_get(tsd, 0, false);
+		ret = arena_get(tsd_tsdn(tsd), 0, false);
 		arena_bind(tsd, 0, false);
 		arena_bind(tsd, 0, true);
 	}
@@ -719,10 +725,10 @@ stats_print_atexit(void)
 {
 
 	if (config_tcache && config_stats) {
-		tsd_t *tsd;
+		tsdn_t *tsdn;
 		unsigned narenas, i;
 
-		tsd = tsd_fetch();
+		tsdn = tsdn_fetch();
 
 		/*
 		 * Merge stats from extant threads.  This is racy, since
@@ -732,7 +738,7 @@ stats_print_atexit(void)
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(tsd, i, false);
+			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -742,11 +748,11 @@ stats_print_atexit(void)
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(tsd, &arena->lock);
+				malloc_mutex_lock(tsdn, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tsd, tcache, arena);
+					tcache_stats_merge(tsdn, tcache, arena);
 				}
-				malloc_mutex_unlock(tsd, &arena->lock);
+				malloc_mutex_unlock(tsdn, &arena->lock);
 			}
 		}
 	}
@@ -1256,7 +1262,7 @@ malloc_init_hard_needed(void)
 }
 
 static bool
-malloc_init_hard_a0_locked(tsd_t **tsd)
+malloc_init_hard_a0_locked()
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1283,7 +1289,7 @@ malloc_init_hard_a0_locked(tsd_t **tsd)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot(*tsd))
+	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
@@ -1299,15 +1305,7 @@ malloc_init_hard_a0_locked(tsd_t **tsd)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(*tsd, 0) == NULL)
-		return (true);
-
-	/*
-	 * Initialize tsd, since some code paths cause chunk allocation, which
-	 * in turn depends on tsd.
-	 */
-	*tsd = malloc_tsd_boot0();
-	if (*tsd == NULL)
+	if (arena_init(TSDN_NULL, 0) == NULL)
 		return (true);
 
 	malloc_init_state = malloc_init_a0_initialized;
@@ -1319,21 +1317,19 @@ static bool
 malloc_init_hard_a0(void)
 {
 	bool ret;
-	tsd_t *tsd = NULL;
 
-	malloc_mutex_lock(tsd, &init_lock);
-	ret = malloc_init_hard_a0_locked(&tsd);
-	malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
+	ret = malloc_init_hard_a0_locked();
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	return (ret);
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(tsd_t *tsd)
+malloc_init_hard_recursible(void)
 {
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(tsd, &init_lock);
 
 	ncpus = malloc_ncpus();
 
@@ -1345,17 +1341,15 @@ malloc_init_hard_recursible(tsd_t *tsd)
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
-		malloc_mutex_lock(tsd, &init_lock);
 		return (true);
 	}
 #endif
 
-	malloc_mutex_lock(tsd, &init_lock);
 	return (false);
 }
 
 static bool
-malloc_init_hard_finish(tsd_t *tsd)
+malloc_init_hard_finish(tsdn_t *tsdn)
 {
 
 	if (malloc_mutex_boot())
@@ -1383,7 +1377,7 @@ malloc_init_hard_finish(tsd_t *tsd)
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(tsd, sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
@@ -1399,39 +1393,43 @@ malloc_init_hard_finish(tsd_t *tsd)
 static bool
 malloc_init_hard(void)
 {
-	tsd_t *tsd = NULL;
+	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(tsd, &init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(tsd, &init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked(&tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	    malloc_init_hard_a0_locked()) {
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
+	/* Recursive allocation relies on functional tsd. */
+	tsd = malloc_tsd_boot0();
+	if (tsd == NULL)
+		return (true);
+	if (malloc_init_hard_recursible())
+		return (true);
+	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+
+	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
-		return (true);
-	}
-
-	malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
@@ -1457,7 +1455,7 @@ ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ialloc(tsd, usize, ind, zero, slow_path);
 
@@ -1479,7 +1477,7 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
@@ -1487,19 +1485,24 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 /*
  * ialloc_body() is inlined so that fast and slow paths are generated separately
  * with statically known slow_path.
+ *
+ * This function guarantees that *tsdn is non-NULL on success.
  */
 JEMALLOC_ALWAYS_INLINE_C void *
-ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
 {
+	tsd_t *tsd;
 	szind_t ind;
 
 	if (slow_path && unlikely(malloc_init())) {
-		*tsd = NULL;
+		*tsdn = NULL;
 		return (NULL);
 	}
 
-	*tsd = tsd_fetch();
-	witness_assert_lockless(*tsd);
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
@@ -1512,16 +1515,18 @@ ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
 	}
 
 	if (config_prof && opt_prof)
-		return (ialloc_prof(*tsd, *usize, ind, zero, slow_path));
+		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
 
-	return (ialloc(*tsd, size, ind, zero, slow_path));
+	return (ialloc(tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
+ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
     bool update_errno, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || ret == NULL);
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_printf("<jemalloc>: Error in %s(): out of "
@@ -1532,10 +1537,10 @@ ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
 			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1544,20 +1549,20 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (size == 0)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		ret = ialloc_body(size, false, &tsd, &usize, false);
-		ialloc_post_check(ret, tsd, usize, "malloc", true, false);
+		ret = ialloc_body(size, false, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
 	} else {
-		ret = ialloc_body(size, false, &tsd, &usize, true);
-		ialloc_post_check(ret, tsd, usize, "malloc", true, true);
+		ret = ialloc_body(size, false, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1576,7 +1581,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1598,7 +1603,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
@@ -1620,7 +1625,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (size == 0)
 		size = 1;
 
@@ -1655,12 +1660,13 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(tsd, result, config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
-	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd, result, usize, false);
-	witness_assert_lockless(tsd);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
+	    false);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1670,7 +1676,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	goto label_return;
 }
 
@@ -1707,7 +1713,7 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t num_size;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
@@ -1727,13 +1733,13 @@ je_calloc(size_t num, size_t size)
 		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
 
 	if (likely(!malloc_slow)) {
-		ret = ialloc_body(num_size, true, &tsd, &usize, false);
-		ialloc_post_check(ret, tsd, usize, "calloc", true, false);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
 	} else {
-		ret = ialloc_body(num_size, true, &tsd, &usize, true);
-		ialloc_post_check(ret, tsd, usize, "calloc", true, true);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
 		UTRACE(0, num_size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1751,7 +1757,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
@@ -1766,7 +1772,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -1788,16 +1794,16 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1805,7 +1811,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(tsd, ptr);
+			rzsize = p2rz(tsd_tsdn(tsd), ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
@@ -1816,7 +1822,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1826,7 +1832,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(tsd, ptr);
+		rzsize = p2rz(tsd_tsdn(tsd), ptr);
 	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
@@ -1837,13 +1843,15 @@ JEMALLOC_ALLOC_SIZE(2)
 je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-	tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL);
+	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
+			tsd_t *tsd;
+
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
@@ -1854,14 +1862,17 @@ je_realloc(void *ptr, size_t size)
 	}
 
 	if (likely(ptr != NULL)) {
+		tsd_t *tsd;
+
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
-		witness_assert_lockless(tsd);
 
-		old_usize = isalloc(tsd, ptr, config_prof);
+		witness_assert_lockless(tsd_tsdn(tsd));
+
+		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		if (config_valgrind && unlikely(in_valgrind)) {
-			old_rzsize = config_prof ? p2rz(tsd, ptr) :
+			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
 			    u2rz(old_usize);
 		}
 
@@ -1875,12 +1886,14 @@ je_realloc(void *ptr, size_t size)
 				usize = s2u(size);
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
+		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = ialloc_body(size, false, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsdn, &usize, false);
 		else
-			ret = ialloc_body(size, false, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsdn, &usize, true);
+		assert(!tsdn_null(tsdn) || ret == NULL);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1892,14 +1905,17 @@ je_realloc(void *ptr, size_t size)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
+		tsd_t *tsd;
+
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -1910,12 +1926,12 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		witness_assert_lockless(tsd);
+		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
-		witness_assert_lockless(tsd);
+		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
@@ -2012,7 +2028,7 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(tsd, arena_ind, true);
+		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2021,21 +2037,21 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
 	if (unlikely(alignment != 0))
-		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	return (iallocztm(tsd, usize, ind, zero, tcache, false, arena,
+	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
 	    slow_path));
 }
 
 static void *
-imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
@@ -2043,13 +2059,13 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena, slow_path);
+		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
+		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsdn, p, usize);
 	} else {
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena,
+		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
 		    slow_path);
 	}
 
@@ -2070,19 +2086,19 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 	    &zero, &tcache, &arena)))
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
-		    slow_path);
-	else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena, slow_path);
+	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
+	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2101,24 +2117,27 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
 	    &zero, &tcache, &arena)))
 		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
-	    slow_path);
+	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
+	    arena, slow_path);
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
 }
 
+/* This function guarantees that *tsdn is non-NULL on success. */
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
+imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
     bool slow_path)
 {
+	tsd_t *tsd;
 
 	if (slow_path && unlikely(malloc_init())) {
-		*tsd = NULL;
+		*tsdn = NULL;
 		return (NULL);
 	}
 
-	*tsd = tsd_fetch();
-	witness_assert_lockless(*tsd);
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
@@ -2131,17 +2150,17 @@ imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
 		}
 
 		if (config_prof && opt_prof) {
-			return (ialloc_prof(*tsd, *usize, ind, false,
+			return (ialloc_prof(tsd, *usize, ind, false,
 			    slow_path));
 		}
 
-		return (ialloc(*tsd, size, ind, false, slow_path));
+		return (ialloc(tsd, size, ind, false, slow_path));
 	}
 
 	if (config_prof && opt_prof)
-		return (imallocx_prof(*tsd, size, flags, usize, slow_path));
+		return (imallocx_prof(tsd, size, flags, usize, slow_path));
 
-	return (imallocx_no_prof(*tsd, size, flags, usize, slow_path));
+	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2149,20 +2168,20 @@ void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	void *p;
 	size_t usize;
 
 	assert(size != 0);
 
 	if (likely(!malloc_slow)) {
-		p = imallocx_body(size, flags, &tsd, &usize, false);
-		ialloc_post_check(p, tsd, usize, "mallocx", false, false);
+		p = imallocx_body(size, flags, &tsdn, &usize, false);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
 	} else {
-		p = imallocx_body(size, flags, &tsd, &usize, true);
-		ialloc_post_check(p, tsd, usize, "mallocx", false, true);
+		p = imallocx_body(size, flags, &tsdn, &usize, true);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
 		UTRACE(0, size, p);
-		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize,
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
 		    MALLOCX_ZERO_GET(flags));
 	}
 
@@ -2183,7 +2202,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
@@ -2202,7 +2221,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
@@ -2225,7 +2244,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(tsd, p, config_prof);
+		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
@@ -2253,11 +2272,11 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(tsd, arena_ind, true);
+		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2271,7 +2290,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(tsd, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
@@ -2289,7 +2308,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(tsd, p, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2298,9 +2317,9 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd, p, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
-	witness_assert_lockless(tsd);
+	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2308,32 +2327,32 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(tsd, ptr, config_prof);
+	usize = isalloc(tsdn, ptr, config_prof);
 
 	return (usize);
 }
 
 static size_t
-ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment,
+	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
 	    zero);
 
 	return (usize);
@@ -2348,7 +2367,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2373,11 +2392,11 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
-		    alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+		    size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2404,9 +2423,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
-	old_usize = isalloc(tsd, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2431,8 +2450,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
@@ -2441,11 +2460,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, tsd, ptr, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (usize);
 }
 
@@ -2454,20 +2473,20 @@ JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		usize = ivsalloc(tsd, ptr, config_prof);
+		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2481,7 +2500,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2495,21 +2514,21 @@ je_dallocx(void *ptr, int flags)
 		ifree(tsd, ptr, tcache, false);
 	else
 		ifree(tsd, ptr, tcache, true);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(tsd_t *tsd, size_t size, int flags)
+inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2523,10 +2542,10 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	usize = inallocx(tsd, size, flags);
-	assert(usize == isalloc(tsd, ptr, config_prof));
+	usize = inallocx(tsd_tsdn(tsd), size, flags);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2540,7 +2559,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		isfree(tsd, ptr, usize, tcache, false);
 	else
 		isfree(tsd, ptr, usize, tcache, true);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2548,21 +2567,21 @@ JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
-	usize = inallocx(tsd, size, flags);
+	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2577,9 +2596,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2587,15 +2606,15 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
-	ret = ctl_nametomib(tsd, name, mibp, miblenp);
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2610,9 +2629,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2620,32 +2639,32 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		ret = ivsalloc(tsd, ptr, config_prof);
+		ret = ivsalloc(tsdn, ptr, config_prof);
 	else
-		ret = (ptr == NULL) ? 0 : isalloc(tsd, ptr, config_prof);
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2705,28 +2724,35 @@ _malloc_prefork(void)
 
 	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork(tsd);
-	malloc_mutex_prefork(tsd, &arenas_lock);
-	prof_prefork0(tsd);
+	ctl_prefork(tsd_tsdn(tsd));
+	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(tsd, j, false)) != NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
+			    NULL) {
 				switch (i) {
-				case 0: arena_prefork0(tsd, arena); break;
-				case 1: arena_prefork1(tsd, arena); break;
-				case 2: arena_prefork2(tsd, arena); break;
+				case 0:
+					arena_prefork0(tsd_tsdn(tsd), arena);
+					break;
+				case 1:
+					arena_prefork1(tsd_tsdn(tsd), arena);
+					break;
+				case 2:
+					arena_prefork2(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
-	base_prefork(tsd);
-	chunk_prefork(tsd);
+	base_prefork(tsd_tsdn(tsd));
+	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_prefork3(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_prefork3(tsd_tsdn(tsd), arena);
 	}
-	prof_prefork1(tsd);
+	prof_prefork1(tsd_tsdn(tsd));
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2750,17 +2776,17 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd);
-	base_postfork_parent(tsd);
+	chunk_postfork_parent(tsd_tsdn(tsd));
+	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_postfork_parent(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_parent(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_parent(tsd);
-	malloc_mutex_postfork_parent(tsd, &arenas_lock);
-	ctl_postfork_parent(tsd);
+	prof_postfork_parent(tsd_tsdn(tsd));
+	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
 void
@@ -2775,17 +2801,17 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd);
-	base_postfork_child(tsd);
+	chunk_postfork_child(tsd_tsdn(tsd));
+	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_postfork_child(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_child(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_child(tsd);
-	malloc_mutex_postfork_child(tsd, &arenas_lock);
-	ctl_postfork_child(tsd);
+	prof_postfork_child(tsd_tsdn(tsd));
+	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 4174f42e..a1fac342 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -109,25 +109,25 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 }
 
 void
-malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(tsd, mutex);
+	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(tsd, mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(tsd, mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 #else
 	if (malloc_mutex_init(mutex, mutex->witness.name,
 	    mutex->witness.rank)) {
diff --git a/src/prof.c b/src/prof.c
index df7f1f9b..c1f58d46 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -121,13 +121,13 @@ static bool		prof_booted = false;
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static char	*prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
 /******************************************************************************/
 /* Red-black trees. */
@@ -213,23 +213,23 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tsd, tctx->tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tsd, tctx))
+		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tsd, tctx->tdata->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(tsd, ptr, usize, tctx);
+	prof_tctx_set(tsdn, ptr, usize, tctx);
 
-	malloc_mutex_lock(tsd, tctx->tdata->lock);
+	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -237,23 +237,23 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tsd, tctx->tdata->lock);
+	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tsd, tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tsd, tctx))
+	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tsd, tctx->tdata->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
@@ -278,7 +278,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(tsd, &bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
@@ -288,7 +288,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
@@ -301,9 +301,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump(tsd);
+			prof_idump(tsd_tsdn(tsd));
 		if (gdump)
-			prof_gdump(tsd);
+			prof_gdump(tsd_tsdn(tsd));
 	}
 }
 
@@ -547,14 +547,14 @@ prof_tdata_mutex_choose(uint64_t thr_uid)
 }
 
 static prof_gctx_t *
-prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 {
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, NULL, true, arena_get(NULL, 0, true),
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
 	    true);
 	if (gctx == NULL)
 		return (NULL);
@@ -587,32 +587,32 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd, gctx->lock);
-		idalloctm(tsd, gctx, NULL, true, true);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd, gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
 static bool
-prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	if (opt_prof_accum)
 		return (false);
@@ -643,7 +643,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
@@ -651,11 +651,11 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd, tdata, false);
-	malloc_mutex_unlock(tsd, tdata->lock);
+	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
@@ -695,19 +695,19 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(tsd, gctx->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
-	malloc_mutex_assert_not_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, false);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
 }
 
 static bool
@@ -727,16 +727,16 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	prof_enter(tsd, tdata);
 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
 		/* bt has never been seen before.  Insert it. */
-		gctx.p = prof_gctx_create(tsd, bt);
+		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
 		if (gctx.v == NULL) {
 			prof_leave(tsd, tdata);
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -745,9 +745,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(tsd, gctx.p->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd, gctx.p->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
@@ -774,11 +774,11 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
 		void *btkey;
 		prof_gctx_t *gctx;
@@ -793,9 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
+		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -809,20 +809,21 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd, tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd, tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
+		    ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(tsd, gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd, gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 
 	return (ret.p);
@@ -897,13 +898,13 @@ size_t
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
-	tsd = tsd_fetch();
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata_count);
 }
@@ -922,9 +923,9 @@ prof_bt_count(void)
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(tsd, &bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	return (bt_count);
 }
@@ -1038,20 +1039,20 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 }
 
 static void
-prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-	malloc_mutex_lock(tsd, tctx->gctx->lock);
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsd, tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsd, tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
@@ -1071,10 +1072,10 @@ prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
 }
 
 static void
-prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
-	malloc_mutex_assert_owner(tsd, gctx->lock);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
@@ -1087,9 +1088,9 @@ prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1097,7 +1098,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsd, tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
@@ -1107,7 +1108,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 }
 
 struct prof_tctx_dump_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	bool	propagate_err;
 };
 
@@ -1117,7 +1118,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 	struct prof_tctx_dump_iter_arg_s *arg =
 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_assert_owner(arg->tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -1142,10 +1143,10 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
-	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1167,12 +1168,12 @@ label_return:
 }
 
 static void
-prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsdn, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
@@ -1184,11 +1185,11 @@ prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(tsd, gctx->lock);
+	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
 struct prof_gctx_merge_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	size_t	leak_ngctx;
 };
 
@@ -1198,12 +1199,12 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 	struct prof_gctx_merge_iter_arg_s *arg =
 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsd);
+	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0)
 		arg->leak_ngctx++;
-	malloc_mutex_unlock(arg->tsd, gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
 	return (NULL);
 }
@@ -1222,7 +1223,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd, gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		{
 			prof_tctx_t *next;
 
@@ -1230,14 +1231,15 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, (void *)tsd);
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy, NULL, true,
-					    true);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1245,15 +1247,15 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd, gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(tsd, gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 }
 
 struct prof_tdata_merge_iter_arg_s {
-	tsd_t		*tsd;
+	tsdn_t		*tsdn;
 	prof_cnt_t	cnt_all;
 };
 
@@ -1264,7 +1266,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	struct prof_tdata_merge_iter_arg_s *arg =
 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, tdata->lock);
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
@@ -1276,7 +1278,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(arg->tsd, tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 
 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
@@ -1286,7 +1288,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(arg->tsd, tdata->lock);
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
 	return (NULL);
 }
@@ -1315,7 +1317,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
@@ -1326,10 +1328,10 @@ prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -1339,7 +1341,7 @@ prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
 static bool
-prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
     const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
@@ -1347,7 +1349,7 @@ prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
-	malloc_mutex_assert_owner(tsd, gctx->lock);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -1381,7 +1383,7 @@ prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
 		goto label_return;
 	}
 
-	prof_tctx_dump_iter_arg.tsd = tsd;
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
@@ -1515,7 +1517,7 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 }
 
 struct prof_gctx_dump_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	bool	propagate_err;
 };
 
@@ -1526,9 +1528,9 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 	struct prof_gctx_dump_iter_arg_s *arg =
 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 
-	if (prof_dump_gctx(arg->tsd, arg->propagate_err, gctx, &gctx->bt,
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
 	    gctxs)) {
 		ret = gctx;
 		goto label_return;
@@ -1536,7 +1538,7 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(arg->tsd, gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return (ret);
 }
 
@@ -1560,7 +1562,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(tsd, &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
@@ -1569,21 +1571,21 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(tsd, gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	prof_tdata_merge_iter_arg.tsd = tsd;
+	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
 	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
 	    (void *)&prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg.tsd = tsd;
+	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
 	prof_gctx_merge_iter_arg.leak_ngctx = 0;
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
 	    (void *)&prof_gctx_merge_iter_arg);
@@ -1595,12 +1597,12 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(tsd, propagate_err,
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
 	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg.tsd = tsd;
+	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
 	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
 	    (void *)&prof_gctx_dump_iter_arg) != NULL)
@@ -1614,7 +1616,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
@@ -1625,7 +1627,7 @@ label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	return (true);
 }
 
@@ -1665,21 +1667,23 @@ prof_fdump(void)
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(tsd_t *tsd)
+prof_idump(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsd == NULL)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1690,10 +1694,10 @@ prof_idump(tsd_t *tsd)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1712,24 +1716,26 @@ prof_mdump(tsd_t *tsd, const char *filename)
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(tsd_t *tsd)
+prof_gdump(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsd == NULL)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1740,10 +1746,10 @@ prof_gdump(tsd_t *tsd)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1772,20 +1778,20 @@ prof_bt_keycomp(const void *k1, const void *k2)
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(tsd_t *tsd)
+prof_thr_uid_alloc(tsdn_t *tsdn)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(tsd, &next_thr_uid_mtx);
+	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(tsd, &next_thr_uid_mtx);
+	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@@ -1793,9 +1799,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(NULL,
-	    0, true), true);
+	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1807,9 +1813,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
+	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, NULL, true, true);
+		idalloctm(tsdn, tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1823,24 +1829,23 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsd_t *tsd)
+prof_tdata_init(tsdn_t *tsdn)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd), 0, NULL,
-	    prof_thread_active_init_get(tsd)));
+	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
+	    prof_thread_active_init_get(tsdn)));
 }
 
 static bool
-prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached)
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
@@ -1851,43 +1856,41 @@ prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
 }
 
 static bool
-prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsd, tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
-	return (prof_tdata_should_destroy_unlocked(tsd, tdata,
-	    even_if_attached));
+	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 }
 
 static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
+	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
 
-	assert(tsd_prof_tdata_get(tsd) != tdata);
+	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
 
 	tdata_tree_remove(&tdatas, tdata);
 
-	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
-	    even_if_attached));
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, NULL, true, true);
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, NULL, true, true);
+		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
+	ckh_delete(tsdn, &tdata->bt2tctx);
+	idalloctm(tsdn, tdata, NULL, true, true);
 }
 
 static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 }
 
 static void
@@ -1895,9 +1898,10 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd, tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1907,9 +1911,9 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, true);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
 }
 
 prof_tdata_t *
@@ -1918,27 +1922,27 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active));
+	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
+	    thread_name, active));
 }
 
 static bool
-prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsd, tdata, false);
+		    prof_tdata_should_destroy(tsdn, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsdn, tdata->lock);
 
 	return (destroy_tdata);
 }
@@ -1946,36 +1950,36 @@ prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	return (prof_tdata_expire(tsd, tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
 }
 
 void
-prof_reset(tsd_t *tsd, size_t lg_sample)
+prof_reset(tsdn_t *tsdn, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(tsd, &prof_dump_mtx);
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &prof_dump_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
+		    prof_tdata_reset_iter, (void *)tsdn);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
+			prof_tdata_destroy_locked(tsdn, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
 }
 
 void
@@ -1992,25 +1996,25 @@ prof_tdata_cleanup(tsd_t *tsd)
 }
 
 bool
-prof_active_get(tsd_t *tsd)
+prof_active_get(tsdn_t *tsdn)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(tsd, &prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(tsd, &prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(tsd_t *tsd, bool active)
+prof_active_set(tsdn_t *tsdn, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(tsd, &prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(tsd, &prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_old);
 }
 
@@ -2026,7 +2030,7 @@ prof_thread_name_get(tsd_t *tsd)
 }
 
 static char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
 {
 	char *ret;
 	size_t size;
@@ -2038,8 +2042,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, NULL, true,
-	    arena_get(NULL, 0, true), true);
+	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -2066,12 +2070,12 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 			return (EFAULT);
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
 	if (s == NULL)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
@@ -2103,48 +2107,48 @@ prof_thread_active_set(tsd_t *tsd, bool active)
 }
 
 bool
-prof_thread_active_init_get(tsd_t *tsd)
+prof_thread_active_init_get(tsdn_t *tsdn)
 {
 	bool active_init;
 
-	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(tsd_t *tsd, bool active_init)
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(tsd_t *tsd)
+prof_gdump_get(tsdn_t *tsdn)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(tsd, &prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(tsd_t *tsd, bool gdump)
+prof_gdump_set(tsdn_t *tsdn, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(tsd, &prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
@@ -2185,7 +2189,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsd_t *tsd)
+prof_boot2(tsdn_t *tsdn)
 {
 
 	cassert(config_prof);
@@ -2211,7 +2215,7 @@ prof_boot2(tsd_t *tsd)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@@ -2242,8 +2246,8 @@ prof_boot2(tsd_t *tsd)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsd, PROF_NCTX_LOCKS *
-		    sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
+		    * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2252,7 +2256,7 @@ prof_boot2(tsd_t *tsd)
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsd,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
@@ -2277,76 +2281,77 @@ prof_boot2(tsd_t *tsd)
 }
 
 void
-prof_prefork0(tsd_t *tsd)
+prof_prefork0(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(tsd, &prof_dump_mtx);
-		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
-		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
+		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsdn, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &tdata_locks[i]);
+			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 	}
 }
 
 void
-prof_prefork1(tsd_t *tsd)
+prof_prefork1(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
-		malloc_mutex_prefork(tsd, &prof_active_mtx);
-		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_prefork(tsd, &prof_gdump_mtx);
-		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
-		malloc_mutex_prefork(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_active_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
 	}
 }
 
 void
-prof_postfork_parent(tsd_t *tsd)
+prof_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsd, &prof_thread_active_init_mtx);
-		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_gdump_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_active_mtx);
+		malloc_mutex_postfork_parent(tsdn,
+		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
+			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
-		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
-		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_mtx);
+			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
 	}
 }
 
 void
-prof_postfork_child(tsd_t *tsd)
+prof_postfork_child(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(tsd, &prof_thread_active_init_mtx);
-		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_gdump_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_active_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
+			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
-		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
-		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_dump_mtx);
+			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
 	}
 }
 
diff --git a/src/quarantine.c b/src/quarantine.c
index ff1637ec..18903fb5 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -13,24 +13,22 @@
 /* Function prototypes for non-inline static functions. */
 
 static quarantine_t	*quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain(tsd_t *tsd, quarantine_t *quarantine,
+static void	quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine);
+static void	quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine,
     size_t upper_bound);
 
 /******************************************************************************/
 
 static quarantine_t *
-quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
+quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs)
 {
 	quarantine_t *quarantine;
 	size_t size;
 
-	assert(tsd_nominal(tsd));
-
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
-	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, NULL, true, arena_get(NULL, 0, true), true);
+	quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -49,7 +47,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (!tsd_nominal(tsd))
 		return;
 
-	quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT);
+	quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT);
 	/*
 	 * Check again whether quarantine has been initialized, because
 	 * quarantine_init() may have triggered recursive initialization.
@@ -57,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 }
 
 static quarantine_t *
@@ -65,9 +63,9 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_t *ret;
 
-	ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1);
+	ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1);
 	if (ret == NULL) {
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsd_tsdn(tsd), quarantine);
 		return (quarantine);
 	}
 
@@ -89,18 +87,18 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
 }
 
 static void
-quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
+quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(tsd, obj->ptr, config_prof));
-	idalloctm(tsd, obj->ptr, NULL, false, true);
+	assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof));
+	idalloctm(tsdn, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
 	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -108,24 +106,24 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
 }
 
 static void
-quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound)
+quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound)
 {
 
 	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsdn, quarantine);
 }
 
 void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(tsd, ptr, config_prof);
+	size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
 
 	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 		return;
 	}
 	/*
@@ -135,7 +133,7 @@ quarantine(tsd_t *tsd, void *ptr)
 	if (quarantine->curbytes + usize > opt_quarantine) {
 		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
 		    - usize : 0;
-		quarantine_drain(tsd, quarantine, upper_bound);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound);
 	}
 	/* Grow the quarantine ring buffer if it's full. */
 	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
@@ -164,7 +162,7 @@ quarantine(tsd_t *tsd, void *ptr)
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 	}
 }
 
@@ -178,8 +176,8 @@ quarantine_cleanup(tsd_t *tsd)
 
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
-		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, NULL, true, true);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, 0);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index 88005f30..175759c7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,10 +24,10 @@ static tcaches_t	*tcaches_avail;
 /******************************************************************************/
 
 size_t
-tcache_salloc(tsd_t *tsd, const void *ptr)
+tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(tsd, ptr, false));
+	return (arena_salloc(tsdn, ptr, false));
 }
 
 void
@@ -71,12 +71,12 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 }
 
 void *
-tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
 {
 	void *ret;
 
-	arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ?
+	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
@@ -107,13 +107,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(tsd, arena,
+			if (arena_prof_accum(tsd_tsdn(tsd), arena,
 			    tcache->prof_accumbytes))
-				prof_idump(tsd);
+				prof_idump(tsd_tsdn(tsd));
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -131,8 +131,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get_mutable(chunk, pageind);
-				arena_dalloc_bin_junked_locked(tsd, bin_arena,
-				    chunk, ptr, bitselm);
+				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+				    bin_arena, chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -144,8 +144,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd, &bin->lock);
-		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
@@ -153,11 +153,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -190,7 +190,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(tsd, &locked_arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -213,7 +213,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(tsd,
+				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
 				    locked_arena, chunk, ptr);
 			} else {
 				/*
@@ -226,22 +226,23 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd, &locked_arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump(tsd);
-		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
+			prof_idump(tsd_tsdn(tsd));
+		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+		    ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -251,35 +252,26 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		tbin->low_water = tbin->ncached;
 }
 
-void
-tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
-void
-tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena)
-{
-
-	tcache_arena_dissociate(tsd, tcache, oldarena);
-	tcache_arena_associate(tsd, tcache, newarena);
-}
-
-void
-tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -292,11 +284,20 @@ tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tsd, tcache, arena);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		tcache_stats_merge(tsdn, tcache, arena);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
+void
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
+{
+
+	tcache_arena_dissociate(tsdn, tcache, oldarena);
+	tcache_arena_associate(tsdn, tcache, newarena);
+}
+
 tcache_t *
 tcache_get_hard(tsd_t *tsd)
 {
@@ -310,11 +311,11 @@ tcache_get_hard(tsd_t *tsd)
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
-	return (tcache_create(tsd, arena));
+	return (tcache_create(tsd_tsdn(tsd), arena));
 }
 
 tcache_t *
-tcache_create(tsd_t *tsd, arena_t *arena)
+tcache_create(tsdn_t *tsdn, arena_t *arena)
 {
 	tcache_t *tcache;
 	size_t size, stack_offset;
@@ -328,12 +329,12 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, NULL, true,
-	    arena_get(NULL, 0, true));
+	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tsd, tcache, arena);
+	tcache_arena_associate(tsdn, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
@@ -360,7 +361,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tsd, tcache, arena);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
@@ -368,9 +369,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(tsd, &bin->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd, &bin->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		}
 	}
 
@@ -379,19 +380,19 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(tsd, &arena->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd, &arena->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
-		prof_idump(tsd);
+	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
+		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd, tcache, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void
@@ -416,21 +417,21 @@ tcache_enabled_cleanup(tsd_t *tsd)
 }
 
 void
-tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_assert_owner(tsd, &arena->lock);
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
@@ -444,14 +445,14 @@ tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
+tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsd, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -459,10 +460,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsd, NULL);
+	arena = arena_ichoose(tsdn, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsd, arena);
+	tcache = tcache_create(tsdn, arena);
 	if (tcache == NULL)
 		return (true);
 
@@ -508,7 +509,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind)
 }
 
 bool
-tcache_boot(tsd_t *tsd)
+tcache_boot(tsdn_t *tsdn)
 {
 	unsigned i;
 
@@ -526,7 +527,7 @@ tcache_boot(tsd_t *tsd)
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsd, nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
diff --git a/src/witness.c b/src/witness.c
index 31c36a24..f5176b6f 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -34,17 +34,19 @@ witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
 #endif
 
 void
-witness_lock(tsd_t *tsd, witness_t *witness)
+witness_lock(tsdn_t *tsdn, witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_not_owner(tsd, witness);
+	witness_assert_not_owner(tsdn, witness);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
@@ -69,16 +71,18 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 }
 
 void
-witness_unlock(tsd_t *tsd, witness_t *witness)
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_owner(tsd, witness);
+	witness_assert_owner(tsdn, witness);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_remove(witnesses, witness, link);
@@ -104,13 +108,15 @@ witness_owner_error_t *witness_owner_error =
 #endif
 
 void
-witness_assert_owner(tsd_t *tsd, const witness_t *witness)
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
@@ -142,13 +148,15 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif
 
 void
-witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
@@ -183,13 +191,15 @@ witness_lockless_error_t *witness_lockless_error =
 #endif
 
 void
-witness_assert_lockless(tsd_t *tsd)
+witness_assert_lockless(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
@@ -202,7 +212,7 @@ void
 witnesses_cleanup(tsd_t *tsd)
 {
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	/* Do nothing. */
 }
diff --git a/src/zone.c b/src/zone.c
index 8f25051a..2c17123a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(tsd_fetch(), ptr, config_prof));
+	return (ivsalloc(tsdn_fetch(), ptr, config_prof));
 }
 
 static void *
@@ -87,7 +87,7 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0) {
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@ static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0)
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(tsd_fetch(), ptr, config_prof);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8e769de6..8ba36c21 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -86,7 +86,7 @@ TEST_BEGIN(test_arena_reset)
 	void **ptrs;
 	int flags;
 	size_t mib[3];
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
 	    && unlikely(opt_quarantine)));
@@ -124,11 +124,11 @@ TEST_BEGIN(test_arena_reset)
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
 	/* Verify allocations. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsd, ptrs[i], false), 0,
+		assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0,
 		    "Allocation should have queryable size");
 	}
 
@@ -142,7 +142,7 @@ TEST_BEGIN(test_arena_reset)
 
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_eq(ivsalloc(tsd, ptrs[i], false), 0,
+		assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0,
 		    "Allocation should no longer exist");
 	}
 
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index b1175959..961e2acb 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	ckh_delete(tsdn, &ckh);
 
-	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,17 +30,17 @@ TEST_BEGIN(test_count_insert_search_remove)
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsd, &ckh, strs[i], strs[i]);
+		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove)
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove)
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove)
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 414874a0..acddc601 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(tsd_t *tsd, void *ptr, size_t usize)
+huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(tsd, ptr, usize);
+	huge_dalloc_junk_orig(tsdn, ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 83f51df8..5ae45fd2 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -94,7 +94,7 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(tsd_t *tsd, bool propagate_err,
+prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
     const prof_cnt_t *cnt_all)
 {
 
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 430d8203..ed172753 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -60,76 +60,76 @@ witness_comp_reverse(const witness_t *a, const witness_t *b)
 TEST_BEGIN(test_witness)
 {
 	witness_t a, b;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
-	witness_assert_not_owner(tsd, &a);
-	witness_lock(tsd, &a);
-	witness_assert_owner(tsd, &a);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
 
 	witness_init(&b, "b", 2, NULL);
-	witness_assert_not_owner(tsd, &b);
-	witness_lock(tsd, &b);
-	witness_assert_owner(tsd, &b);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
 
-	witness_unlock(tsd, &a);
-	witness_unlock(tsd, &b);
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 TEST_END
 
 TEST_BEGIN(test_witness_comp)
 {
 	witness_t a, b, c, d;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, witness_comp);
-	witness_assert_not_owner(tsd, &a);
-	witness_lock(tsd, &a);
-	witness_assert_owner(tsd, &a);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
 
 	witness_init(&b, "b", 1, witness_comp);
-	witness_assert_not_owner(tsd, &b);
-	witness_lock(tsd, &b);
-	witness_assert_owner(tsd, &b);
-	witness_unlock(tsd, &b);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+	witness_unlock(tsdn, &b);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
 	witness_init(&c, "c", 1, witness_comp_reverse);
-	witness_assert_not_owner(tsd, &c);
+	witness_assert_not_owner(tsdn, &c);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &c);
+	witness_lock(tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsd, &c);
+	witness_unlock(tsdn, &c);
 
 	saw_lock_error = false;
 
 	witness_init(&d, "d", 1, NULL);
-	witness_assert_not_owner(tsd, &d);
+	witness_assert_not_owner(tsdn, &d);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &d);
+	witness_lock(tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsd, &d);
+	witness_unlock(tsdn, &d);
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -138,7 +138,7 @@ TEST_END
 TEST_BEGIN(test_witness_reversal)
 {
 	witness_t a, b;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -146,22 +146,22 @@ TEST_BEGIN(test_witness_reversal)
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_init(&b, "b", 2, NULL);
 
-	witness_lock(tsd, &b);
+	witness_lock(tsdn, &b);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
-	witness_unlock(tsd, &a);
-	witness_unlock(tsd, &b);
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -170,7 +170,7 @@ TEST_END
 TEST_BEGIN(test_witness_recursive)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -182,22 +182,22 @@ TEST_BEGIN(test_witness_recursive)
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	assert_true(saw_not_owner_error, "Expected witness not owner error");
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
@@ -208,7 +208,7 @@ TEST_END
 TEST_BEGIN(test_witness_unlock_not_owned)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -216,17 +216,17 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 	witness_owner_error = witness_owner_error_intercept;
 	saw_owner_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
 	assert_false(saw_owner_error, "Unexpected owner error");
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 	assert_true(saw_owner_error, "Expected owner error");
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 }
@@ -235,7 +235,7 @@ TEST_END
 TEST_BEGIN(test_witness_lockful)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -243,22 +243,22 @@ TEST_BEGIN(test_witness_lockful)
 	witness_lockless_error = witness_lockless_error_intercept;
 	saw_lockless_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
 	assert_false(saw_lockless_error, "Unexpected lockless error");
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
-	witness_lock(tsd, &a);
-	witness_assert_lockless(tsd);
+	witness_lock(tsdn, &a);
+	witness_assert_lockless(tsdn);
 	assert_true(saw_lockless_error, "Expected lockless error");
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lockless_error = witness_lockless_error_orig;
 }

From 3a9ec676267cf215ed2591a1060f870daced2472 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 00:52:16 -0700
Subject: [PATCH 77/82] Disable junk filling for tests that could otherwise
 easily OOM.

---
 test/integration/mallocx.c | 4 ++++
 test/integration/xallocx.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 578c229a..55e1a090 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 5c4998b6..ad292bb5 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where

From 7790a0ba403b02bcb8804534c8120d605b4dc5f4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 00:52:59 -0700
Subject: [PATCH 78/82] Fix chunk accounting related to triggering gdump
 profiles.

Fix in place huge reallocation to update the chunk counters that are
used for triggering gdump profiles.
---
 ChangeLog  |  1 +
 src/huge.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 68dedfa0..a9390947 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,6 +23,7 @@ brevity.  Much more detail can be found in the git revision history:
     to avoid unfortunate interactions during fork(2).  (@jasone)
 
   Bug fixes:
+  - Fix chunk accounting related to triggering gdump profiles.  (@jasone)
   - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
   - Scale leak report summary according to sampling probability.  (@jasone)
 
diff --git a/src/huge.c b/src/huge.c
index 0bf61622..ba083684 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -23,6 +23,15 @@ huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 	return (chunk_register(tsdn, ptr, node));
 }
 
+static void
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+{
+	bool err;
+
+	err = huge_node_set(tsdn, ptr, node);
+	assert(!err);
+}
+
 static void
 huge_node_unset(const void *ptr, const extent_node_t *node)
 {
@@ -162,8 +171,10 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -224,7 +235,9 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -260,7 +273,9 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {

From 73d3d58dc234315214c0d73d6badd4fdc53cbeff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 15:33:28 -0700
Subject: [PATCH 79/82] Optimize witness fast path.

Short-circuit commonly called witness functions so that they only
execute in debug builds, and remove equivalent guards from mutex
functions.  This avoids pointless code execution in
witness_assert_lockless(), which is typically called twice per
allocation/deallocation function invocation.

Inline commonly called witness functions so that optimized builds can
completely remove calls as dead code.
---
 .../jemalloc/internal/jemalloc_internal.h.in  |   2 +-
 include/jemalloc/internal/mutex.h             |  13 +-
 include/jemalloc/internal/witness.h           | 152 +++++++++++++++++-
 src/witness.c                                 | 122 +-------------
 4 files changed, 157 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 69d94ec5..51bf8974 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -531,9 +531,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 00f0b91c..52217991 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -81,8 +81,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		if (config_debug)
-			witness_assert_not_owner(tsdn, &mutex->witness);
+		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -94,8 +93,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
-		if (config_debug)
-			witness_lock(tsdn, &mutex->witness);
+		witness_lock(tsdn, &mutex->witness);
 	}
 }
 
@@ -104,8 +102,7 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		if (config_debug)
-			witness_unlock(tsdn, &mutex->witness);
+		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -124,7 +121,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded && config_debug)
+	if (isthreaded)
 		witness_assert_owner(tsdn, &mutex->witness);
 }
 
@@ -132,7 +129,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded && config_debug)
+	if (isthreaded)
 		witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4d312eab..d78dca2d 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -74,24 +74,28 @@ void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 #ifdef JEMALLOC_JET
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
 #endif
-void	witness_lock(tsdn_t *tsdn, witness_t *witness);
-void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
 #endif
-void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
 #endif
-void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_lockless_error_t)(const witness_list_t *);
 extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
 #endif
-void	witness_assert_lockless(tsdn_t *tsdn);
 
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
@@ -103,5 +107,143 @@ void	witness_postfork_child(tsd_t *tsd);
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return;
+	}
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_remove(witnesses, witness, link);
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/src/witness.c b/src/witness.c
index f5176b6f..24312031 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -15,7 +15,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 #undef witness_lock_error
 #define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
 #endif
-static void
+void
 witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 {
 	witness_t *w;
@@ -33,66 +33,11 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
 #endif
 
-void
-witness_lock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witness_assert_not_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w == NULL) {
-		/* No other locks; do nothing. */
-	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
-		/* Forking, and relaxed ranking satisfied. */
-	} else if (w->rank > witness->rank) {
-		/* Not forking, rank order reversal. */
-		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, witness) > 0)) {
-		/*
-		 * Missing/incompatible comparison function, or comparison
-		 * function indicates rank order reversal.
-		 */
-		witness_lock_error(witnesses, witness);
-	}
-
-	ql_elm_new(witness, link);
-	ql_tail_insert(witnesses, witness, link);
-}
-
-void
-witness_unlock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
 #define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
 #endif
-static void
+void
 witness_owner_error(const witness_t *witness)
 {
 
@@ -107,32 +52,11 @@ witness_owner_error_t *witness_owner_error =
     JEMALLOC_N(witness_owner_error_impl);
 #endif
 
-void
-witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
-	witness_owner_error(witness);
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
 #endif
-static void
+void
 witness_not_owner_error(const witness_t *witness)
 {
 
@@ -147,31 +71,11 @@ witness_not_owner_error_t *witness_not_owner_error =
     JEMALLOC_N(witness_not_owner_error_impl);
 #endif
 
-void
-witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			witness_not_owner_error(witness);
-	}
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
 #define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
 #endif
-static void
+void
 witness_lockless_error(const witness_list_t *witnesses)
 {
 	witness_t *w;
@@ -190,24 +94,6 @@ witness_lockless_error_t *witness_lockless_error =
     JEMALLOC_N(witness_lockless_error_impl);
 #endif
 
-void
-witness_assert_lockless(tsdn_t *tsdn)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w != NULL) {
-		witness_lockless_error(witnesses);
-	}
-}
-
 void
 witnesses_cleanup(tsd_t *tsd)
 {

From 0fc1317fc6989e1090c5bcf1713b6a7482110ccc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 16:14:20 -0700
Subject: [PATCH 80/82] Mangle tested functions as n_witness_* rather than
 witness_*_impl.

---
 src/witness.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/witness.c b/src/witness.c
index 24312031..23753f24 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -13,7 +13,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
-#define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
+#define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
 #endif
 void
 witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
@@ -30,12 +30,12 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
 #define	witness_lock_error JEMALLOC_N(witness_lock_error)
-witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
-#define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
+#define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
 #endif
 void
 witness_owner_error(const witness_t *witness)
@@ -48,13 +48,12 @@ witness_owner_error(const witness_t *witness)
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
 #define	witness_owner_error JEMALLOC_N(witness_owner_error)
-witness_owner_error_t *witness_owner_error =
-    JEMALLOC_N(witness_owner_error_impl);
+witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
-#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
+#define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
 #endif
 void
 witness_not_owner_error(const witness_t *witness)
@@ -68,12 +67,12 @@ witness_not_owner_error(const witness_t *witness)
 #undef witness_not_owner_error
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
 witness_not_owner_error_t *witness_not_owner_error =
-    JEMALLOC_N(witness_not_owner_error_impl);
+    JEMALLOC_N(n_witness_not_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
+#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
 #endif
 void
 witness_lockless_error(const witness_list_t *witnesses)
@@ -91,7 +90,7 @@ witness_lockless_error(const witness_list_t *witnesses)
 #undef witness_lockless_error
 #define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
 witness_lockless_error_t *witness_lockless_error =
-    JEMALLOC_N(witness_lockless_error_impl);
+    JEMALLOC_N(n_witness_lockless_error);
 #endif
 
 void

From 1c35f63797d63a1d08507ea724ec5d8898e8d76d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 16:52:58 -0700
Subject: [PATCH 81/82] Guard tsdn_tsd() call with tsdn_null() check.

---
 src/huge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/huge.c b/src/huge.c
index ba083684..1aa02a0f 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -67,7 +67,6 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	assert(tsdn != NULL || arena != NULL);
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
 	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
 	if (node == NULL)
@@ -78,7 +77,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsdn, node, NULL, true, true);

From 09f8585ce8a57baa387cc0327e51c0baffbdce6f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 14:23:50 -0700
Subject: [PATCH 82/82] Update ChangeLog for 4.2.0.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index a9390947..926209e5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.2.0 (XXX)
+* 4.2.0 (May 12, 2016)
 
   New features:
   - Add the arena.<i>.reset mallctl, which makes it possible to discard all of