From fc0b3b7383373d66cfed2cd4e2faa272a6868d32 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Oct 2014 17:54:06 -0700
Subject: [PATCH] Add configure options.

Add:
  --with-lg-page
  --with-lg-page-sizes
  --with-lg-size-class-group
  --with-lg-quantum

Get rid of STATIC_PAGE_SHIFT, in favor of directly setting LG_PAGE.

Fix various edge conditions exposed by the configure options.
---
 INSTALL                                       | 67 +++++++++++++++++++
 Makefile.in                                   |  1 +
 configure.ac                                  | 47 ++++++++++---
 include/jemalloc/internal/arena.h             |  8 ++-
 include/jemalloc/internal/huge.h              |  9 +--
 .../jemalloc/internal/jemalloc_internal.h.in  | 28 ++++----
 .../internal/jemalloc_internal_defs.h.in      | 10 ++-
 include/jemalloc/internal/private_symbols.txt |  4 +-
 include/jemalloc/internal/size_classes.sh     | 12 +++-
 include/jemalloc/internal/tcache.h            |  4 +-
 src/arena.c                                   | 42 +++++++++---
 src/huge.c                                    | 52 +++++++-------
 src/jemalloc.c                                | 50 ++++++++------
 src/tcache.c                                  | 53 ++++-----------
 test/unit/lg_chunk.c                          | 26 +++++++
 test/unit/mallctl.c                           |  2 +-
 16 files changed, 278 insertions(+), 137 deletions(-)
 create mode 100644 test/unit/lg_chunk.c
diff --git a/INSTALL b/INSTALL
index 9af23369..73bf7185 100644
--- a/INSTALL
+++ b/INSTALL
@@ -189,6 +189,73 @@ any of the following arguments (not a definitive list) to 'configure':
     Specify where to find DocBook XSL stylesheets when building the
     documentation.
 
+--with-lg-page=<lg-page>
+    Specify the base 2 log of the system page size.  This option is only useful
+    when cross compiling, since the configure script automatically determines the
+    host's page size by default.
+
+--with-lg-page-sizes=<lg-page-sizes>
+    Specify the comma-separated base 2 logs of the page sizes to support.  This
+    option may be useful when cross-compiling in combination with
+    --with-lg-page, but its primary use case is for integration with FreeBSD's
+    libc, wherein jemalloc is embedded.
+
+--with-lg-size-class-group=<lg-size-class-group>
+    Specify the base 2 log of how many size classes to use for each doubling in
+    size.  By default jemalloc uses <lg-size-class-group>=2, which results in
+    e.g. the following size classes:
+
+      [...], 64,
+      80, 96, 112, 128,
+      160, [...]
+
+    <lg-size-class-group>=3 results in e.g. the following size classes:
+
+      [...], 64,
+      72, 80, 88, 96, 104, 112, 120, 128,
+      144, [...]
+
+    The minimal <lg-size-class-group>=0 causes jemalloc to only provide size
+    classes that are powers of 2:
+
+      [...],
+      64,
+      128,
+      256,
+      [...]
+
+    An implementation detail currently limits the total number of small size
+    classes to 255, and a compilation error will result if the
+    <lg-size-class-group> you specify cannot be supported.  The limit is
+    roughly <lg-size-class-group>=4, depending on page size.
+
+--with-lg-quantum=<lg-quantum>
+    Specify the base 2 log of the minimum allocation alignment (only
+    <lg-quantum>=3 and <lg-quantum>=4 are supported).  jemalloc needs to know
+    the minimum alignment that meets the following C standard requirement
+    (quoted from the April 12, 2011 draft of the C11 standard):
+
+      The pointer returned if the allocation succeeds is suitably aligned so
+      that it may be assigned to a pointer to any type of object with a
+      fundamental alignment requirement and then used to access such an object
+      or an array of such objects in the space allocated [...]
+
+    This setting is architecture-specific, and although jemalloc includes known
+    safe values for the most commonly used modern architectures, there is a
+    wrinkle related to GNU libc (glibc) that may impact your choice of
+    <lg-quantum>.  On most modern architectures, this mandates 16-byte alignment
+    (<lg-quantum>=4), but the glibc developers chose not to meet this requirement
+    for performance reasons.  An old discussion can be found at
+    https://sourceware.org/bugzilla/show_bug.cgi?id=206 .  Unlike glibc,
+    jemalloc does follow the C standard by default (caveat: jemalloc technically
+    cheats by only providing 8-byte alignment for 8-byte allocation requests),
+    but the fact that Linux systems already work around this allocator
+    noncompliance means that it is generally safe in practice to let jemalloc's
+    minimum alignment follow glibc's lead.  If you specify --with-lg-quantum=3
+    during configuration, jemalloc will provide additional size classes that
+    are not 16-byte-aligned (24, 40, and 56, assuming
+    --with-lg-size-class-group=2).
+
 The following environment variables (not a definitive list) impact configure's
 behavior:
 
diff --git a/Makefile.in b/Makefile.in
index 50f6596a..40644ce8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -118,6 +118,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
+	$(srcroot)test/unit/lg_chunk.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
diff --git a/configure.ac b/configure.ac
index 1d79ded6..f8c09c46 100644
--- a/configure.ac
+++ b/configure.ac
@@ -969,8 +969,17 @@ else
   fi
 fi
 
-AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
-               [je_cv_static_page_shift],
+AC_ARG_WITH([lg_quantum],
+  [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
+   [Base 2 log of minimum allocation alignment])],
+  [AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])])
+
+AC_ARG_WITH([lg_page],
+  [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
+  [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+if test "x$LG_PAGE" == "xdetect"; then
+  AC_CACHE_CHECK([LG_PAGE],
+               [je_cv_lg_page],
                AC_RUN_IFELSE([AC_LANG_PROGRAM(
 [[
 #include <strings.h>
@@ -1006,15 +1015,29 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
 
     return 0;
 ]])],
-                             [je_cv_static_page_shift=`cat conftest.out`],
-                             [je_cv_static_page_shift=undefined],
-                             [je_cv_static_page_shift=12]))
-
-if test "x$je_cv_static_page_shift" != "xundefined"; then
-   AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift])
-else
-   AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT])
+                             [je_cv_lg_page=`cat conftest.out`],
+                             [je_cv_lg_page=undefined],
+                             [je_cv_lg_page=12]))
 fi
+if test "x${je_cv_lg_page}" != "x" ; then
+  LG_PAGE="${je_cv_lg_page}"
+fi
+if test "x${LG_PAGE}" != "xundefined" ; then
+   AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
+else
+   AC_MSG_ERROR([cannot determine value for LG_PAGE])
+fi
+
+AC_ARG_WITH([lg_page_sizes],
+  [AS_HELP_STRING([--with-lg-page-sizes=<lg-page-sizes>],
+   [Base 2 logs of system page sizes to support])],
+  [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"])
+
+AC_ARG_WITH([lg_size_class_group],
+  [AS_HELP_STRING([--with-lg-size-class-group=<lg-size-class-group>],
+   [Base 2 log of size classes per doubling])],
+  [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"],
+  [LG_SIZE_CLASS_GROUP="2"])
 
 dnl ============================================================================
 dnl jemalloc configuration.
@@ -1456,10 +1479,12 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
   mkdir -p "${objroot}include/jemalloc/internal"
-  "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+  "${srcdir}/include/jemalloc/internal/size_classes.sh" ${LG_PAGE_SIZES} ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
 ], [
   srcdir="${srcdir}"
   objroot="${objroot}"
+  LG_PAGE_SIZES=${LG_PAGE_SIZES}
+  LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP}
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
   mkdir -p "${objroot}include/jemalloc"
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 894ce9af..f5b9fc62 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -362,8 +362,8 @@ void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
 void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
 void	*arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
 void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
@@ -371,8 +371,10 @@ void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
+#else
+void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 5d4d3a16..39d8aa50 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,19 +9,20 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero);
+void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+    bool try_tcache);
 void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
+    bool zero, bool try_tcache);
 bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t extra, size_t alignment, bool zero,
-    bool try_tcache_dalloc);
+    bool try_tcache_alloc, bool try_tcache_dalloc);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr);
+void	huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache);
 size_t	huge_salloc(const void *ptr);
 prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
 void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index f4d5de6a..3f65fad0 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -185,7 +185,7 @@ typedef unsigned index_t;
 #define	TINY_MIN		(1U << LG_TINY_MIN)
 
 /*
- * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
  * classes).
  */
 #ifndef LG_QUANTUM
@@ -235,7 +235,8 @@ typedef unsigned index_t;
 #    define LG_QUANTUM		4
 #  endif
 #  ifndef LG_QUANTUM
-#    error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS"
+#    error "Unknown minimum alignment for architecture; specify via "
+	 "--with-lg-quantum"
 #  endif
 #endif
 
@@ -275,12 +276,11 @@ typedef unsigned index_t;
 #define	CACHELINE_CEILING(s)						\
 	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
-/* Page size.  STATIC_PAGE_SHIFT is determined by the configure script. */
+/* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
 #  undef PAGE_MASK
 #endif
-#define	LG_PAGE		STATIC_PAGE_SHIFT
-#define	PAGE		((size_t)(1U << STATIC_PAGE_SHIFT))
+#define	PAGE		((size_t)(1U << LG_PAGE))
 #define	PAGE_MASK	((size_t)(PAGE - 1))
 
 /* Return the smallest pagesize multiple that is >= s. */
@@ -809,7 +809,7 @@ imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena)
 	if (size <= arena_maxclass)
 		return (arena_malloc(tsd, arena, size, false, try_tcache));
 	else
-		return (huge_malloc(tsd, arena, size, false));
+		return (huge_malloc(tsd, arena, size, false, try_tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -826,7 +826,7 @@ icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena)
 	if (size <= arena_maxclass)
 		return (arena_malloc(tsd, arena, size, true, try_tcache));
 	else
-		return (huge_malloc(tsd, arena, size, true));
+		return (huge_malloc(tsd, arena, size, true, try_tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -854,9 +854,11 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache,
 				return (NULL);
 			ret = arena_palloc(arena, usize, alignment, zero);
 		} else if (alignment <= chunksize)
-			ret = huge_malloc(tsd, arena, usize, zero);
-		else
-			ret = huge_palloc(tsd, arena, usize, alignment, zero);
+			ret = huge_malloc(tsd, arena, usize, zero, try_tcache);
+		else {
+			ret = huge_palloc(tsd, arena, usize, alignment, zero,
+			    try_tcache);
+		}
 	}
 
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -938,7 +940,7 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache)
 	if (chunk != ptr)
 		arena_dalloc(tsd, chunk, ptr, try_tcache);
 	else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsd, ptr, try_tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -952,7 +954,7 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache)
 	if (chunk != ptr)
 		arena_sdalloc(tsd, chunk, ptr, size, try_tcache);
 	else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsd, ptr, try_tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1042,7 +1044,7 @@ iralloct(tsd_t *tsd, void *ptr, size_t size, size_t alignment, bool zero,
 		    alignment, zero, try_tcache_alloc, try_tcache_dalloc));
 	} else {
 		return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0,
-		    alignment, zero, try_tcache_dalloc));
+		    alignment, zero, try_tcache_alloc, try_tcache_dalloc));
 	}
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index fd85e5cf..0ff939c6 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -144,8 +144,14 @@
 /* Support lazy locking (avoid locking unless a second thread is launched). */
 #undef JEMALLOC_LAZY_LOCK
 
-/* One page is 2^STATIC_PAGE_SHIFT bytes. */
-#undef STATIC_PAGE_SHIFT
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#undef LG_QUANTUM
+
+/* One page is 2^LG_PAGE bytes. */
+#undef LG_PAGE
 
 /*
  * If defined, use munmap() to unmap freed chunks, rather than storing them for
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d5e6fdcf..66d48221 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -16,11 +16,11 @@ arena_chunk_dalloc_huge
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin
-arena_dalloc_bin_locked
+arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
 arena_dalloc_junk_small
 arena_dalloc_large
-arena_dalloc_large_locked
+arena_dalloc_large_junked_locked
 arena_dalloc_small
 arena_dss_prec_get
 arena_dss_prec_set
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 897570cc..733338c5 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -1,4 +1,6 @@
 #!/bin/sh
+#
+# Usage: size_classes.sh <lg_parr> <lg_g>
 
 # The following limits are chosen such that they cover all supported platforms.
 
@@ -15,10 +17,10 @@ lg_tmin=3
 lg_kmax=12
 
 # Page sizes.
-lg_parr="12 13 16"
+lg_parr=`echo $1 | tr ',' ' '`
 
 # Size class group size (number of size classes for each size doubling).
-lg_g=2
+lg_g=$2
 
 pow2() {
   e=$1
@@ -159,7 +161,11 @@ size_classes() {
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
-        lg_large_minclass=$((${lg_grp} + 1))
+        if [ ${lg_g} -gt 0 ] ; then
+          lg_large_minclass=$((${lg_grp} + 1))
+        else
+          lg_large_minclass=$((${lg_grp} + 2))
+        fi
       fi
       index=$((${index} + 1))
       ndelta=$((${ndelta} + 1))
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 02eec5db..fe9c47e8 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -112,7 +112,7 @@ void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
 void	tcache_arena_reassociate(tcache_t *tcache, arena_t *arena);
 void	tcache_arena_dissociate(tcache_t *tcache);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(arena_t *arena);
+tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
@@ -363,7 +363,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 	binind = size2index(size);
 
 	if (config_fill && unlikely(opt_junk))
-		memset(ptr, 0x5a, size);
+		arena_dalloc_junk_large(ptr, size);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
diff --git a/src/arena.c b/src/arena.c
index 86e54404..bbe58fa6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -623,7 +623,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 	arena_chunk_t *chunk;
 	arena_run_t *run;
 
-	assert(size <= arena_maxclass);
+	assert(size <= arena_maxrun);
 	assert((size & PAGE_MASK) == 0);
 
 	/* Search the arena's chunks for the lowest best fit. */
@@ -673,7 +673,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, index_t binind)
 	arena_chunk_t *chunk;
 	arena_run_t *run;
 
-	assert(size <= arena_maxclass);
+	assert(size <= arena_maxrun);
 	assert((size & PAGE_MASK) == 0);
 	assert(binind != BININD_INVALID);
 
@@ -1728,9 +1728,9 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 		arena_bin_runs_insert(bin, run);
 }
 
-void
-arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+static void
+arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
@@ -1749,7 +1749,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	if (config_fill || config_stats)
 		size = bin_info->reg_size;
 
-	if (config_fill && unlikely(opt_junk))
+	if (!junked && config_fill && unlikely(opt_junk))
 		arena_dalloc_junk_small(ptr, bin_info);
 
 	arena_run_reg_dalloc(run, ptr);
@@ -1765,6 +1765,14 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	}
 }
 
+void
+arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    arena_chunk_map_bits_t *bitselm)
+{
+
+	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+}
+
 void
 arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
@@ -1777,7 +1785,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	run = &arena_miscelm_get(chunk, rpages_ind)->run;
 	bin = run->bin;
 	malloc_mutex_lock(&bin->lock);
-	arena_dalloc_bin_locked(arena, chunk, ptr, bitselm);
+	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
 	malloc_mutex_unlock(&bin->lock);
 }
 
@@ -1800,7 +1808,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
 #endif
-static void
+void
 arena_dalloc_junk_large(void *ptr, size_t usize)
 {
 
@@ -1815,7 +1823,8 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 #endif
 
 void
-arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
@@ -1824,7 +1833,8 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 	if (config_fill || config_stats) {
 		size_t usize = arena_mapbits_large_size_get(chunk, pageind);
 
-		arena_dalloc_junk_large(ptr, usize);
+		if (!junked)
+			arena_dalloc_junk_large(ptr, usize);
 		if (config_stats) {
 			index_t index = size2index(usize) - NBINS;
 
@@ -1838,12 +1848,20 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 	arena_run_dalloc(arena, run, true, false);
 }
 
+void
+arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr)
+{
+
+	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
+}
+
 void
 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
 	malloc_mutex_lock(&arena->lock);
-	arena_dalloc_large_locked(arena, chunk, ptr);
+	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
 	malloc_mutex_unlock(&arena->lock);
 }
 
@@ -2398,6 +2416,7 @@ arena_boot(void)
 	    sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
 
 	arena_maxrun = chunksize - (map_bias << LG_PAGE);
+	assert(arena_maxrun > 0);
 	arena_maxclass = index2size(size2index(chunksize)-1);
 	if (arena_maxclass > arena_maxrun) {
 		/*
@@ -2407,6 +2426,7 @@ arena_boot(void)
 		 */
 		arena_maxclass = arena_maxrun;
 	}
+	assert(arena_maxclass > 0);
 	nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS);
 
 	bin_info_init();
diff --git a/src/huge.c b/src/huge.c
index 541df60a..6c9b97bb 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -13,7 +13,7 @@ static malloc_mutex_t	huge_mtx;
 static extent_tree_t	huge;
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
+huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache)
 {
 	size_t usize;
 
@@ -23,12 +23,12 @@ huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 		return (NULL);
 	}
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero));
+	return (huge_palloc(tsd, arena, usize, chunksize, zero, try_tcache));
 }
 
 void *
 huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero)
+    bool zero, bool try_tcache)
 {
 	void *ret;
 	size_t csize;
@@ -42,7 +42,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, tsd != NULL, NULL);
+	    CACHELINE, false, try_tcache, NULL);
 	if (node == NULL)
 		return (NULL);
 
@@ -58,7 +58,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	}
 	ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed);
 	if (ret == NULL) {
-		idalloct(tsd, node, tsd != NULL);
+		idalloct(tsd, node, try_tcache);
 		return (NULL);
 	}
 
@@ -122,6 +122,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
 
 	expand_addr = ptr + CHUNK_CEILING(oldsize);
 	expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	assert(expand_size > 0);
 
 	malloc_mutex_lock(&huge_mtx);
 
@@ -223,13 +224,8 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 		return (false);
 	}
 
-	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
-	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
-		return (false);
-	}
-
 	/* Shrink the allocation in-place. */
-	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) {
+	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)) {
 		extent_node_t *node, key;
 		void *excess_addr;
 		size_t excess_size;
@@ -251,7 +247,10 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 		/* Zap the excess chunks. */
 		huge_dalloc_junk(ptr + usize, oldsize - usize);
-		arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size);
+		if (excess_size > 0) {
+			arena_chunk_dalloc_huge(node->arena, excess_addr,
+			    excess_size);
+		}
 
 		return (false);
 	}
@@ -269,7 +268,8 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 void *
 huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, bool try_tcache_dalloc)
+    size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
+    bool try_tcache_dalloc)
 {
 	void *ret;
 	size_t copysize;
@@ -283,19 +283,25 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	if (alignment > chunksize)
-		ret = huge_palloc(tsd, arena, size + extra, alignment, zero);
-	else
-		ret = huge_malloc(tsd, arena, size + extra, zero);
+	if (alignment > chunksize) {
+		ret = huge_palloc(tsd, arena, size + extra, alignment, zero,
+		    try_tcache_alloc);
+	} else {
+		ret = huge_malloc(tsd, arena, size + extra, zero,
+		    try_tcache_alloc);
+	}
 
 	if (ret == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment > chunksize)
-			ret = huge_palloc(tsd, arena, size, alignment, zero);
-		else
-			ret = huge_malloc(tsd, arena, size, zero);
+		if (alignment > chunksize) {
+			ret = huge_palloc(tsd, arena, size, alignment, zero,
+			    try_tcache_alloc);
+		} else {
+			ret = huge_malloc(tsd, arena, size, zero,
+			    try_tcache_alloc);
+		}
 
 		if (ret == NULL)
 			return (NULL);
@@ -312,7 +318,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr)
+huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache)
 {
 	extent_node_t *node, key;
 
@@ -330,7 +336,7 @@ huge_dalloc(tsd_t *tsd, void *ptr)
 	huge_dalloc_junk(node->addr, node->size);
 	arena_chunk_dalloc_huge(node->arena, node->addr,
 	    CHUNK_CEILING(node->size));
-	idalloct(tsd, node, tsd != NULL);
+	idalloct(tsd, node, try_tcache);
 }
 
 size_t
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c62d8ce6..a862104a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -67,6 +67,8 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_7(i)	S2B_6(i) S2B_6(i)
 #define	S2B_8(i)	S2B_7(i) S2B_7(i)
 #define	S2B_9(i)	S2B_8(i) S2B_8(i)
+#define	S2B_10(i)	S2B_9(i) S2B_9(i)
+#define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #define	S2B_no(i)
 #define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
 	S2B_##lg_delta_lookup(index)
@@ -78,6 +80,8 @@ const uint8_t	size2index_tab[] = {
 #undef S2B_7
 #undef S2B_8
 #undef S2B_9
+#undef S2B_10
+#undef S2B_11
 #undef S2B_no
 #undef SC
 };
@@ -199,6 +203,7 @@ static void *
 a0alloc(size_t size, bool zero)
 {
 	void *ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (NULL);
@@ -206,10 +211,11 @@ a0alloc(size_t size, bool zero)
 	if (size == 0)
 		size = 1;
 
+	tsd = tsd_fetch();
 	if (size <= arena_maxclass)
-		ret = arena_malloc(NULL, a0get(), size, zero, false);
+		ret = arena_malloc(tsd, a0get(), size, zero, false);
 	else
-		ret = huge_malloc(NULL, a0get(), size, zero);
+		ret = huge_malloc(tsd, a0get(), size, zero, false);
 
 	return (ret);
 }
@@ -231,16 +237,18 @@ a0calloc(size_t num, size_t size)
 void
 a0free(void *ptr)
 {
+	tsd_t *tsd;
 	arena_chunk_t *chunk;
 
 	if (ptr == NULL)
 		return;
 
+	tsd = tsd_fetch();
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr)
-		arena_dalloc(NULL, chunk, ptr, false);
+		arena_dalloc(tsd, chunk, ptr, false);
 	else
-		huge_dalloc(NULL, ptr);
+		huge_dalloc(tsd, ptr, false);
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */
@@ -817,15 +825,15 @@ malloc_conf_init(void)
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if (min != 0 && um < min)	\
-						o = min;		\
-					else if (um > max)		\
-						o = max;		\
+					if ((min) != 0 && um < (min))	\
+						o = (min);		\
+					else if (um > (max))		\
+						o = (max);		\
 					else				\
 						o = um;			\
 				} else {				\
-					if ((min != 0 && um < min) ||	\
-					    um > max) {			\
+					if (((min) != 0 && um < (min))	\
+					    || um > (max)) {		\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@@ -847,8 +855,8 @@ malloc_conf_init(void)
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
-				} else if (l < (ssize_t)min || l >	\
-				    (ssize_t)max) {			\
+				} else if (l < (ssize_t)(min) || l >	\
+				    (ssize_t)(max)) {			\
 					malloc_conf_error(		\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
@@ -868,15 +876,16 @@ malloc_conf_init(void)
 
 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
 			/*
-			 * Chunks always require at least one header page, plus
-			 * one data page in the absence of redzones, or three
-			 * pages in the presence of redzones.  In order to
-			 * simplify options processing, fix the limit based on
-			 * config_fill.
+			 * Chunks always require at least one header page,
+			 * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and
+			 * possibly an additional page in the presence of
+			 * redzones.  In order to simplify options processing,
+			 * use a conservative bound that accommodates all these
+			 * constraints.
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1,
-			    true)
+			    LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
+			    (sizeof(size_t) << 3) - 1, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -2088,8 +2097,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		// XX Dangerous arenas read.
-		arena = arenas[arena_ind];
+		arena = arena_get(tsd, arena_ind, true, true);
 	} else
 		arena = NULL;
 
diff --git a/src/tcache.c b/src/tcache.c
index 1bf70269..34224ec4 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -117,8 +117,8 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem,
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get(chunk, pageind);
-				arena_dalloc_bin_locked(arena, chunk, ptr,
-				    bitselm);
+				arena_dalloc_bin_junked_locked(arena, chunk,
+				    ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -193,9 +193,10 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem,
 			ptr = tbin->avail[i];
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (chunk->arena == arena)
-				arena_dalloc_large_locked(arena, chunk, ptr);
-			else {
+			if (chunk->arena == arena) {
+				arena_dalloc_large_junked_locked(arena, chunk,
+				    ptr);
+			} else {
 				/*
 				 * This object was allocated via a different
 				 * arena than the one that is currently locked.
@@ -279,11 +280,11 @@ tcache_get_hard(tsd_t *tsd)
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
-	return (tcache_create(arena));
+	return (tcache_create(tsd, arena));
 }
 
 tcache_t *
-tcache_create(arena_t *arena)
+tcache_create(tsd_t *tsd, arena_t *arena)
 {
 	tcache_t *tcache;
 	size_t size, stack_offset;
@@ -294,23 +295,10 @@ tcache_create(arena_t *arena)
 	size = PTR_CEILING(size);
 	stack_offset = size;
 	size += stack_nelms * sizeof(void *);
-	/*
-	 * Round up to the nearest multiple of the cacheline size, in order to
-	 * avoid the possibility of false cacheline sharing.
-	 *
-	 * That this works relies on the same logic as in ipalloc(), but we
-	 * cannot directly call ipalloc() here due to tcache bootstrapping
-	 * issues.
-	 */
-	size = (size + CACHELINE_MASK) & (-CACHELINE);
-
-	if (size <= SMALL_MAXCLASS)
-		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
-	else if (size <= tcache_maxclass)
-		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
-	else
-		tcache = (tcache_t *)icalloct(NULL, size, false, arena);
+	/* Avoid false cacheline sharing. */
+	size = sa2u(size, CACHELINE);
 
+	tcache = ipalloct(tsd, size, CACHELINE, true, false, arena);
 	if (tcache == NULL)
 		return (NULL);
 
@@ -331,7 +319,6 @@ static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 {
 	unsigned i;
-	size_t tcache_size;
 
 	tcache_arena_dissociate(tcache);
 
@@ -366,23 +353,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(tcache->arena, tcache->prof_accumbytes))
 		prof_idump();
 
-	tcache_size = arena_salloc(tcache, false);
-	if (tcache_size <= SMALL_MAXCLASS) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
-		    LG_PAGE;
-		arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk,
-		    pageind);
-
-		arena_dalloc_bin(arena, chunk, tcache, pageind, bitselm);
-	} else if (tcache_size <= tcache_maxclass) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-
-		arena_dalloc_large(arena, chunk, tcache);
-	} else
-		idalloct(tsd, tcache, false);
+	idalloct(tsd, tcache, false);
 }
 
 void
diff --git a/test/unit/lg_chunk.c b/test/unit/lg_chunk.c
new file mode 100644
index 00000000..7f0b31ce
--- /dev/null
+++ b/test/unit/lg_chunk.c
@@ -0,0 +1,26 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Make sure that opt.lg_chunk clamping is sufficient.  In practice, this test
+ * program will fail a debug assertion during initialization and abort (rather
+ * than the test soft-failing) if clamping is insufficient.
+ */
+const char *malloc_conf = "lg_chunk:0";
+
+TEST_BEGIN(test_lg_chunk_clamp)
+{
+	void *p;
+
+	p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	dallocx(p, 0);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+		test_lg_chunk_clamp));
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e62e54f2..a8f7aed6 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants)
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2)));
+	TEST_ARENAS_LRUN_CONSTANT(size_t, size, LARGE_MINCLASS);
 
 #undef TEST_ARENAS_LRUN_CONSTANT
 }