From 155bfa7da18cab0d21d87aa2dce4554166836f5d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Oct 2014 17:54:10 -0700
Subject: [PATCH] Normalize size classes.

Normalize size classes to use the same number of size classes per size
doubling (currently hard coded to 4), across the intire range of size
classes.  Small size classes already used this spacing, but in order to
support this change, additional small size classes now fill [4 KiB .. 16
KiB).  Large size classes range from [16 KiB .. 4 MiB).  Huge size
classes now support non-multiples of the chunk size in order to fill (4
MiB .. 16 MiB).
---
 include/jemalloc/internal/arena.h             | 231 +++-----------
 include/jemalloc/internal/chunk.h             |   3 -
 include/jemalloc/internal/huge.h              |   2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 299 +++++++++++++-----
 include/jemalloc/internal/private_symbols.txt |  22 +-
 include/jemalloc/internal/size_classes.sh     |  15 +-
 include/jemalloc/internal/stats.h             |   7 +-
 include/jemalloc/internal/tcache.h            |  52 +--
 src/arena.c                                   | 223 ++++++-------
 src/chunk.c                                   |   3 -
 src/ctl.c                                     |   2 +-
 src/huge.c                                    | 113 +++++--
 src/jemalloc.c                                |  34 +-
 src/tcache.c                                  |   8 +-
 test/unit/junk.c                              |  17 +-
 test/unit/mallctl.c                           |   2 +-
 16 files changed, 558 insertions(+), 475 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 1f985723..681b5802 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1,6 +1,8 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
+#define	LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
+
 /* Maximum number of regions in one run. */
 #define	LG_RUN_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
@@ -96,11 +98,15 @@ struct arena_chunk_map_bits_s {
 	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
 	 *     -------- -------- ----++++ ++++D-LA
 	 *
-	 *   Large (sampled, size <= PAGE):
+	 *   Large (sampled, size <= LARGE_MINCLASS):
 	 *     ssssssss ssssssss ssssnnnn nnnnD-LA
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     -------- -------- ----++++ ++++D-LA
 	 *
-	 *   Large (not sampled, size == PAGE):
+	 *   Large (not sampled, size == LARGE_MINCLASS):
 	 *     ssssssss ssssssss ssss++++ ++++D-LA
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     -------- -------- ----++++ ++++D-LA
 	 */
 	size_t				bits;
 #define	CHUNK_MAP_BININD_SHIFT	4
@@ -325,30 +331,21 @@ struct arena_s {
 #ifdef JEMALLOC_H_EXTERNS
 
 extern ssize_t	opt_lg_dirty_mult;
-/*
- * small_size2bin_tab is a compact lookup table that rounds request sizes up to
- * size classes.  In order to reduce cache footprint, the table is compressed,
- * and all accesses are via small_size2bin().
- */
-extern uint8_t const	small_size2bin_tab[];
-/*
- * small_bin2size_tab duplicates information in arena_bin_info, but in a const
- * array, for which it is easier for the compiler to optimize repeated
- * dereferences.
- */
-extern uint32_t const	small_bin2size_tab[NBINS];
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
-/* Number of large size classes. */
-#define			nlclasses (chunk_npages - map_bias)
+extern size_t		map_bias; /* Number of arena chunk header pages. */
+extern size_t		map_misc_offset;
+extern size_t		arena_maxrun; /* Max run size for arenas. */
+extern size_t		arena_maxclass; /* Max size class for arenas. */
+extern size_t		nlclasses; /* Number of large size classes. */
 
 void	*arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero);
 void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size);
 void	arena_purge_all(arena_t *arena);
 void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
-    size_t binind, uint64_t prof_accumbytes);
+    index_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
@@ -403,15 +400,6 @@ void	arena_postfork_child(arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-size_t	small_size2bin_compute(size_t size);
-size_t	small_size2bin_lookup(size_t size);
-size_t	small_size2bin(size_t size);
-size_t	small_bin2size_compute(size_t binind);
-size_t	small_bin2size_lookup(size_t binind);
-size_t	small_bin2size(size_t binind);
-size_t	small_s2u_compute(size_t size);
-size_t	small_s2u_lookup(size_t size);
-size_t	small_s2u(size_t size);
 arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
     size_t pageind);
 arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
@@ -426,7 +414,7 @@ size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
     size_t pageind);
 size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+index_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
@@ -439,16 +427,16 @@ void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 void	arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
     size_t size, size_t flags);
 void	arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    size_t binind);
+    index_t binind);
 void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
-    size_t runind, size_t binind, size_t flags);
+    size_t runind, index_t binind, size_t flags);
 void	arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
     size_t unzeroed);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-size_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
-size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
+index_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
+index_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
@@ -464,148 +452,6 @@ void	arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_INLINE size_t
-small_size2bin_compute(size_t size)
-{
-#if (NTBINS != 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
-		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
-	} else
-#endif
-	{
-		size_t x = lg_floor((size<<1)-1);
-		size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
-		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		size_t grp = shift << LG_SIZE_CLASS_GROUP;
-
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-
-		size_t delta_inverse_mask = ZI(-1) << lg_delta;
-		size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		size_t bin = NTBINS + grp + mod;
-		return (bin);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_size2bin_lookup(size_t size)
-{
-
-	assert(size <= LOOKUP_MAXCLASS);
-	{
-		size_t ret = ((size_t)(small_size2bin_tab[(size-1) >>
-		    LG_TINY_MIN]));
-		assert(ret == small_size2bin_compute(size));
-		return (ret);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_size2bin(size_t size)
-{
-
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS))
-		return (small_size2bin_lookup(size));
-	else
-		return (small_size2bin_compute(size));
-}
-
-JEMALLOC_INLINE size_t
-small_bin2size_compute(size_t binind)
-{
-#if (NTBINS > 0)
-	if (binind < NTBINS)
-		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind));
-	else
-#endif
-	{
-		size_t reduced_binind = binind - NTBINS;
-		size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP;
-		size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
-		    1);
-
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
-
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
-
-		size_t usize = grp_size + mod_size;
-		return (usize);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_bin2size_lookup(size_t binind)
-{
-
-	assert(binind < NBINS);
-	{
-		size_t ret = (size_t)small_bin2size_tab[binind];
-		assert(ret == small_bin2size_compute(binind));
-		return (ret);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_bin2size(size_t binind)
-{
-
-	return (small_bin2size_lookup(binind));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_s2u_compute(size_t size)
-{
-#if (NTBINS > 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
-	} else
-#endif
-	{
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
-		return (usize);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_s2u_lookup(size_t size)
-{
-	size_t ret = small_bin2size(small_size2bin(size));
-
-	assert(ret == small_s2u_compute(size));
-	return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-small_s2u(size_t size)
-{
-
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS))
-		return (small_s2u_lookup(size));
-	else
-		return (small_s2u_compute(size));
-}
-#  endif /* JEMALLOC_ARENA_INLINE_A */
-
-#  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
 arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
 {
@@ -714,11 +560,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
 	return (mapbits >> LG_PAGE);
 }
 
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE index_t
 arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
-	size_t binind;
+	index_t binind;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -810,20 +656,20 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    size_t binind)
+    index_t binind)
 {
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
-	assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
+	assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS);
 	arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
 	    (binind << CHUNK_MAP_BININD_SHIFT));
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
-    size_t binind, size_t flags)
+    index_t binind, size_t flags)
 {
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
@@ -893,10 +739,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE index_t
 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 {
-	size_t binind;
+	index_t binind;
 
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 
@@ -908,7 +754,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		size_t rpages_ind;
 		arena_run_t *run;
 		arena_bin_t *bin;
-		size_t actual_binind;
+		index_t actual_binind;
 		arena_bin_info_t *bin_info;
 		arena_chunk_map_misc_t *miscelm;
 		void *rpages;
@@ -938,13 +784,13 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 
 	return (binind);
 }
-#  endif /* JEMALLOC_ARENA_INLINE_B */
+#  endif /* JEMALLOC_ARENA_INLINE_A */
 
-#  ifdef JEMALLOC_ARENA_INLINE_C
-JEMALLOC_INLINE size_t
+#  ifdef JEMALLOC_ARENA_INLINE_B
+JEMALLOC_INLINE index_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
-	size_t binind = bin - arena->bins;
+	index_t binind = bin - arena->bins;
 	assert(binind < NBINS);
 	return (binind);
 }
@@ -1102,7 +948,8 @@ arena_salloc(const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
-	size_t pageind, binind;
+	size_t pageind;
+	index_t binind;
 
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
@@ -1122,10 +969,6 @@ arena_salloc(const void *ptr, bool demote)
 		ret = arena_mapbits_large_size_get(chunk, pageind);
 		assert(ret != 0);
 		assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
-		assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
-		    pageind+(ret>>LG_PAGE)-1) == 0);
-		assert(binind == arena_mapbits_binind_get(chunk,
-		    pageind+(ret>>LG_PAGE)-1));
 		assert(arena_mapbits_dirty_get(chunk, pageind) ==
 		    arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
 	} else {
@@ -1133,7 +976,7 @@ arena_salloc(const void *ptr, bool demote)
 		assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
 		    arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) == binind);
-		ret = small_bin2size(binind);
+		ret = index2size(binind);
 	}
 
 	return (ret);
@@ -1155,7 +998,7 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache)
 		/* Small allocation. */
 		if (likely(try_tcache) && likely((tcache = tcache_get(tsd,
 		    false)) != NULL)) {
-			size_t binind = arena_ptr_small_binind_get(ptr,
+			index_t binind = arena_ptr_small_binind_get(ptr,
 			    mapbits);
 			tcache_dalloc_small(tcache, ptr, binind);
 		} else
@@ -1186,7 +1029,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size,
 		/* Small allocation. */
 		if (likely(try_tcache) && likely((tcache = tcache_get(tsd,
 		    false)) != NULL)) {
-			size_t binind = small_size2bin(size);
+			index_t binind = size2index(size);
 			tcache_dalloc_small(tcache, ptr, binind);
 		} else {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
@@ -1203,7 +1046,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size,
 			arena_dalloc_large(chunk->arena, chunk, ptr);
 	}
 }
-#  endif /* JEMALLOC_ARENA_INLINE_C */
+#  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 2e68a020..764b7aca 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -40,9 +40,6 @@ extern rtree_t		*chunks_rtree;
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
-extern size_t		map_bias; /* Number of arena chunk header pages. */
-extern size_t		map_misc_offset;
-extern size_t		arena_maxclass; /* Max size class for arenas. */
 
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_arena(chunk_alloc_t *chunk_alloc,
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 00d8c09d..939993f2 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -10,7 +10,7 @@
 #ifdef JEMALLOC_H_EXTERNS
 
 void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero);
 bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a169221b..8f0beb9e 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -165,6 +165,9 @@ static const bool config_ivsalloc =
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Size class index type. */
+typedef unsigned index_t;
+
 #define	MALLOCX_ARENA_MASK	((int)~0xff)
 #define	MALLOCX_LG_ALIGN_MASK	((int)0x3f)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
@@ -397,6 +400,18 @@ extern arena_t		**arenas;
 extern unsigned		narenas_total;
 extern unsigned		narenas_auto; /* Read-only after initialization. */
 
+/*
+ * index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by index2size_compute().
+ */
+extern size_t const	index2size_tab[NSIZES];
+/*
+ * size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via size2index().
+ */
+extern uint8_t const	size2index_tab[];
+
 arena_t	*arenas_extend(unsigned ind);
 arena_t	*choose_arena_hard(tsd_t *tsd);
 void	thread_allocated_cleanup(tsd_t *tsd);
@@ -449,15 +464,15 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/huge.h"
 
-/*
- * Include arena.h the first time in order to provide inline functions for this
- * header's inlines.
- */
-#define	JEMALLOC_ARENA_INLINE_A
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_A
-
 #ifndef JEMALLOC_ENABLE_INLINE
+index_t	size2index_compute(size_t size);
+index_t	size2index_lookup(size_t size);
+index_t	size2index(size_t size);
+size_t	index2size_compute(index_t index);
+size_t	index2size_lookup(index_t index);
+size_t	index2size(index_t index);
+size_t	s2u_compute(size_t size);
+size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 unsigned	narenas_total_get(void);
@@ -465,6 +480,135 @@ arena_t	*choose_arena(tsd_t *tsd, arena_t *arena);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE index_t
+size2index_compute(size_t size)
+{
+
+#if (NTBINS != 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+	} else
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+		size_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t index = NTBINS + grp + mod;
+		return (index);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE index_t
+size2index_lookup(size_t size)
+{
+
+	assert(size <= LOOKUP_MAXCLASS);
+	{
+		size_t ret = ((size_t)(size2index_tab[(size-1) >>
+		    LG_TINY_MIN]));
+		assert(ret == size2index_compute(size));
+		return (ret);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE index_t
+size2index(size_t size)
+{
+
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS))
+		return (size2index_lookup(size));
+	else
+		return (size2index_compute(size));
+}
+
+JEMALLOC_INLINE size_t
+index2size_compute(index_t index)
+{
+
+#if (NTBINS > 0)
+	if (index < NTBINS)
+		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+	else
+#endif
+	{
+		size_t reduced_index = index - NTBINS;
+		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		    1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_QUANTUM-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t usize = grp_size + mod_size;
+		return (usize);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size_lookup(index_t index)
+{
+	size_t ret = (size_t)index2size_tab[index];
+	assert(ret == index2size_compute(index));
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size(index_t index)
+{
+
+	assert(index < NSIZES);
+	return (index2size_lookup(index));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_compute(size_t size)
+{
+
+#if (NTBINS > 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+		    (ZU(1) << lg_ceil));
+	} else
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (size + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_lookup(size_t size)
+{
+	size_t ret = index2size_lookup(size2index_lookup(size));
+
+	assert(ret == s2u_compute(size));
+	return (ret);
+}
+
 /*
  * Compute usable size that would result from allocating an object with the
  * specified size.
@@ -473,11 +617,11 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u(size_t size)
 {
 
-	if (size <= SMALL_MAXCLASS)
-		return (small_s2u(size));
-	if (size <= arena_maxclass)
-		return (PAGE_CEILING(size));
-	return (CHUNK_CEILING(size));
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS))
+		return (s2u_lookup(size));
+	else
+		return (s2u_compute(size));
 }
 
 /*
@@ -491,71 +635,78 @@ sa2u(size_t size, size_t alignment)
 
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
 
-	/*
-	 * Round size up to the nearest multiple of alignment.
-	 *
-	 * This done, we can take advantage of the fact that for each small
-	 * size class, every object is aligned at the smallest power of two
-	 * that is non-zero in the base two representation of the size.  For
-	 * example:
-	 *
-	 *   Size |   Base 2 | Minimum alignment
-	 *   -----+----------+------------------
-	 *     96 |  1100000 |  32
-	 *    144 | 10100000 |  32
-	 *    192 | 11000000 |  64
-	 */
-	usize = ALIGNMENT_CEILING(size, alignment);
-	/*
-	 * (usize < size) protects against the combination of maximal
-	 * alignment and size greater than maximal alignment.
-	 */
-	if (usize < size) {
-		/* size_t overflow. */
-		return (0);
+	/* Try for a small size class. */
+	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+		/*
+		 * Round size up to the nearest multiple of alignment.
+		 *
+		 * This done, we can take advantage of the fact that for each
+		 * small size class, every object is aligned at the smallest
+		 * power of two that is non-zero in the base two representation
+		 * of the size.  For example:
+		 *
+		 *   Size |   Base 2 | Minimum alignment
+		 *   -----+----------+------------------
+		 *     96 |  1100000 |  32
+		 *    144 | 10100000 |  32
+		 *    192 | 11000000 |  64
+		 */
+		usize = s2u(ALIGNMENT_CEILING(size, alignment));
+		if (usize < LARGE_MINCLASS)
+			return (usize);
 	}
 
-	if (usize <= arena_maxclass && alignment <= PAGE) {
-		if (usize <= SMALL_MAXCLASS)
-			return (small_s2u(usize));
-		return (PAGE_CEILING(usize));
-	} else {
-		size_t run_size;
-
+	/* Try for a large size class. */
+	if (size <= arena_maxclass && alignment < chunksize) {
 		/*
 		 * We can't achieve subpage alignment, so round up alignment
-		 * permanently; it makes later calculations simpler.
+		 * to the minimum that can actually be supported.
 		 */
 		alignment = PAGE_CEILING(alignment);
-		usize = PAGE_CEILING(size);
-		/*
-		 * (usize < size) protects against very large sizes within
-		 * PAGE of SIZE_T_MAX.
-		 *
-		 * (usize + alignment < usize) protects against the
-		 * combination of maximal alignment and usize large enough
-		 * to cause overflow.  This is similar to the first overflow
-		 * check above, but it needs to be repeated due to the new
-		 * usize value, which may now be *equal* to maximal
-		 * alignment, whereas before we only detected overflow if the
-		 * original size was *greater* than maximal alignment.
-		 */
-		if (usize < size || usize + alignment < usize) {
-			/* size_t overflow. */
-			return (0);
-		}
+
+		/* Make sure result is a large size class. */
+		usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size);
 
 		/*
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
-		 * If the run wouldn't fit within a chunk, round up to a huge
-		 * allocation size.
 		 */
-		run_size = usize + alignment - PAGE;
-		if (run_size <= arena_maxclass)
-			return (PAGE_CEILING(usize));
-		return (CHUNK_CEILING(usize));
+		if (usize + alignment - PAGE <= arena_maxrun)
+			return (usize);
 	}
+
+	/* Huge size class.  Beware of size_t overflow. */
+
+	/*
+	 * We can't achieve subchunk alignment, so round up alignment to the
+	 * minimum that can actually be supported.
+	 */
+	alignment = CHUNK_CEILING(alignment);
+	if (alignment == 0) {
+		/* size_t overflow. */
+		return (0);
+	}
+
+	/* Make sure result is a huge size class. */
+	if (size <= chunksize)
+		usize = chunksize;
+	else {
+		usize = s2u(size);
+		if (usize < size) {
+			/* size_t overflow. */
+			return (0);
+		}
+	}
+
+	/*
+	 * Calculate the multi-chunk mapping that huge_palloc() would need in
+	 * order to guarantee the alignment.
+	 */
+	if (usize + alignment - PAGE < usize) {
+		/* size_t overflow. */
+		return (0);
+	}
+	return (usize);
 }
 
 JEMALLOC_INLINE unsigned
@@ -591,16 +742,16 @@ choose_arena(tsd_t *tsd, arena_t *arena)
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/rtree.h"
 /*
- * Include arena.h the second and third times in order to resolve circular
- * dependencies with tcache.h.
+ * Include portions of arena.h interleaved with tcache.h in order to resolve
+ * circular dependencies.
  */
+#define	JEMALLOC_ARENA_INLINE_A
+#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_INLINE_A
+#include "jemalloc/internal/tcache.h"
 #define	JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_INLINE_B
-#include "jemalloc/internal/tcache.h"
-#define	JEMALLOC_ARENA_INLINE_C
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_C
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/quarantine.h"
 
@@ -678,7 +829,7 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache,
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
 
-	if (usize <= arena_maxclass && alignment <= PAGE)
+	if (usize <= SMALL_MAXCLASS && alignment < PAGE)
 		ret = arena_malloc(tsd, arena, usize, zero, try_tcache);
 	else {
 		if (usize <= arena_maxclass) {
@@ -742,7 +893,7 @@ u2rz(size_t usize)
 	size_t ret;
 
 	if (usize <= SMALL_MAXCLASS) {
-		size_t binind = small_size2bin(usize);
+		index_t binind = size2index(usize);
 		ret = arena_bin_info[binind].redzone_size;
 	} else
 		ret = 0;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4ea9a953..1a7fde4b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -41,6 +41,7 @@ arena_mapbitsp_get
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxclass
+arena_maxrun
 arena_miscelm_get
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
@@ -216,6 +217,10 @@ idalloct
 imalloc
 imalloct
 in_valgrind
+index2size
+index2size_compute
+index2size_lookup
+index2size_tab
 ipalloc
 ipalloct
 iqalloc
@@ -338,19 +343,14 @@ rtree_postfork_parent
 rtree_prefork
 rtree_set
 s2u
+s2u_compute
+s2u_lookup
 sa2u
 set_errno
-small_bin2size
-small_bin2size_compute
-small_bin2size_lookup
-small_bin2size_tab
-small_s2u
-small_s2u_compute
-small_s2u_lookup
-small_size2bin
-small_size2bin_compute
-small_size2bin_lookup
-small_size2bin_tab
+size2index
+size2index_compute
+size2index_lookup
+size2index_tab
 stats_cactive
 stats_cactive_add
 stats_cactive_get
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 0cfac72d..897570cc 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -61,7 +61,7 @@ size_class() {
     rem="yes"
   fi
 
-  if [ ${lg_size} -lt ${lg_p} ] ; then
+  if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
     bin="yes"
   else
     bin="no"
@@ -159,6 +159,7 @@ size_classes() {
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+        lg_large_minclass=$((${lg_grp} + 1))
       fi
       index=$((${index} + 1))
       ndelta=$((${ndelta} + 1))
@@ -167,14 +168,17 @@ size_classes() {
     lg_delta=$((${lg_delta} + 1))
   done
   echo
+  nsizes=${index}
 
   # Defined upon completion:
   # - ntbins
   # - nlbins
   # - nbins
+  # - nsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
+  # - lg_large_minclass
 }
 
 cat <<EOF
@@ -199,10 +203,11 @@ cat <<EOF
  *   NTBINS: Number of tiny bins.
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
+ *   NSIZES: Number of size classes.
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
- *   LARGE_MINCLASS: Minimum large size class.
+ *   LG_LARGE_MINCLASS: Lg of minimum large size class.
  */
 
 #define	LG_SIZE_CLASS_GROUP	${lg_g}
@@ -221,9 +226,11 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	NTBINS			${ntbins}"
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
+        echo "#define	NSIZES			${nsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
+        echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
         echo "#endif"
         echo
       done
@@ -238,7 +245,7 @@ cat <<EOF
 #endif
 #undef SIZE_CLASSES_DEFINED
 /*
- * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
  * cannot support more than 256 small size classes.  Further constrain NBINS to
  * 255 since all small size classes, plus a "not small" size class must be
  * stored in 8 bits of arena_chunk_map_bits_t's bits field.
@@ -247,8 +254,6 @@ cat <<EOF
 #  error "Too many small size classes"
 #endif
 
-#define	LARGE_MINCLASS (PAGE_CEILING(SMALL_MAXCLASS+1))
-
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index ce96476a..6104cb3a 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -106,12 +106,7 @@ struct arena_stats_s {
 	uint64_t	ndalloc_huge;
 	uint64_t	nrequests_huge;
 
-	/*
-	 * One element for each possible size class, including sizes that
-	 * overlap with bin size classes.  This is necessary because ipalloc()
-	 * sometimes has to use such large objects in order to assure proper
-	 * alignment.
-	 */
+	/* One element for each large size class. */
 	malloc_large_stats_t	*lstats;
 };
 
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 1b1d8d98..da8e4ef4 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -72,7 +72,7 @@ struct tcache_s {
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum() */
 	arena_t		*arena;		/* This thread's arena. */
 	unsigned	ev_cnt;		/* Event count since incremental GC. */
-	unsigned	next_gc_bin;	/* Next bin to GC. */
+	index_t		next_gc_bin;	/* Next bin to GC. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
@@ -103,10 +103,10 @@ extern size_t			tcache_maxclass;
 size_t	tcache_salloc(const void *ptr);
 void	tcache_event_hard(tcache_t *tcache);
 void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
-    size_t binind);
-void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    index_t binind);
+void	tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem,
     tcache_t *tcache);
-void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+void	tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem,
     tcache_t *tcache);
 void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
 void	tcache_arena_dissociate(tcache_t *tcache);
@@ -130,7 +130,7 @@ void	tcache_enabled_set(bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin);
 void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
 void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void	tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind);
 void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
 #endif
 
@@ -233,20 +233,21 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 {
 	void *ret;
-	size_t binind;
+	index_t binind;
+	size_t usize;
 	tcache_bin_t *tbin;
 
-	binind = small_size2bin(size);
+	binind = size2index(size);
 	assert(binind < NBINS);
 	tbin = &tcache->tbins[binind];
-	size = small_bin2size(binind);
+	usize = index2size(binind);
 	ret = tcache_alloc_easy(tbin);
 	if (unlikely(ret == NULL)) {
 		ret = tcache_alloc_small_hard(tcache, tbin, binind);
 		if (ret == NULL)
 			return (NULL);
 	}
-	assert(tcache_salloc(ret) == size);
+	assert(tcache_salloc(ret) == usize);
 
 	if (likely(!zero)) {
 		if (config_fill) {
@@ -254,20 +255,20 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
-				memset(ret, 0, size);
+				memset(ret, 0, usize);
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
-		memset(ret, 0, size);
+		memset(ret, 0, usize);
 	}
 
 	if (config_stats)
 		tbin->tstats.nrequests++;
 	if (config_prof)
-		tcache->prof_accumbytes += size;
+		tcache->prof_accumbytes += usize;
 	tcache_event(tcache);
 	return (ret);
 }
@@ -276,12 +277,13 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 {
 	void *ret;
-	size_t binind;
+	index_t binind;
+	size_t usize;
 	tcache_bin_t *tbin;
 
-	size = PAGE_CEILING(size);
-	assert(size <= tcache_maxclass);
-	binind = NBINS + (size >> LG_PAGE) - 1;
+	binind = size2index(size);
+	usize = index2size(binind);
+	assert(usize <= tcache_maxclass);
 	assert(binind < nhbins);
 	tbin = &tcache->tbins[binind];
 	ret = tcache_alloc_easy(tbin);
@@ -290,11 +292,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		ret = arena_malloc_large(tcache->arena, size, zero);
+		ret = arena_malloc_large(tcache->arena, usize, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
-		if (config_prof && size == PAGE) {
+		if (config_prof && usize == LARGE_MINCLASS) {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
@@ -305,17 +307,17 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 		if (likely(!zero)) {
 			if (config_fill) {
 				if (unlikely(opt_junk))
-					memset(ret, 0xa5, size);
+					memset(ret, 0xa5, usize);
 				else if (unlikely(opt_zero))
-					memset(ret, 0, size);
+					memset(ret, 0, usize);
 			}
 		} else
-			memset(ret, 0, size);
+			memset(ret, 0, usize);
 
 		if (config_stats)
 			tbin->tstats.nrequests++;
 		if (config_prof)
-			tcache->prof_accumbytes += size;
+			tcache->prof_accumbytes += usize;
 	}
 
 	tcache_event(tcache);
@@ -323,7 +325,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
+tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind)
 {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
@@ -349,7 +351,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 {
-	size_t binind;
+	index_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
@@ -357,7 +359,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
 	assert(tcache_salloc(ptr) <= tcache_maxclass);
 
-	binind = NBINS + (size >> LG_PAGE) - 1;
+	binind = size2index(size);
 
 	if (config_fill && unlikely(opt_junk))
 		memset(ptr, 0x5a, size);
diff --git a/src/arena.c b/src/arena.c
index b7300a92..49a30572 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -7,42 +7,11 @@
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 arena_bin_info_t	arena_bin_info[NBINS];
 
-JEMALLOC_ALIGNED(CACHELINE)
-const uint32_t	small_bin2size_tab[NBINS] = {
-#define	B2S_bin_yes(size) \
-	size,
-#define	B2S_bin_no(size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
-	B2S_bin_##bin((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef B2S_bin_yes
-#undef B2S_bin_no
-#undef SC
-};
-
-JEMALLOC_ALIGNED(CACHELINE)
-const uint8_t	small_size2bin_tab[] = {
-#define	S2B_3(i)	i,
-#define	S2B_4(i)	S2B_3(i) S2B_3(i)
-#define	S2B_5(i)	S2B_4(i) S2B_4(i)
-#define	S2B_6(i)	S2B_5(i) S2B_5(i)
-#define	S2B_7(i)	S2B_6(i) S2B_6(i)
-#define	S2B_8(i)	S2B_7(i) S2B_7(i)
-#define	S2B_9(i)	S2B_8(i) S2B_8(i)
-#define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
-	S2B_##lg_delta_lookup(index)
-	SIZE_CLASSES
-#undef S2B_3
-#undef S2B_4
-#undef S2B_5
-#undef S2B_6
-#undef S2B_7
-#undef S2B_8
-#undef S2B_9
-#undef S2B_no
-#undef SC
-};
+size_t		map_bias;
+size_t		map_misc_offset;
+size_t		arena_maxrun; /* Max run size for arenas. */
+size_t		arena_maxclass; /* Max size class for arenas. */
+size_t		nlclasses; /* Number of large size classes. */
 
 /******************************************************************************/
 /*
@@ -198,7 +167,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
-	size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
+	index_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	unsigned regind = arena_run_regind(run, bin_info, ptr);
 
@@ -375,7 +344,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
 
 static void
 arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
-    size_t binind)
+    index_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -429,9 +398,9 @@ arena_chunk_init_spare(arena_t *arena)
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
 	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
-	    arena_maxclass);
+	    arena_maxrun);
 	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
-	    arena_maxclass);
+	    arena_maxrun);
 	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 
@@ -518,8 +487,7 @@ arena_chunk_init_hard(arena_t *arena)
 	 * the pages as zeroed iff chunk_alloc() returned a zeroed chunk.
 	 */
 	unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
-	arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
-	    unzeroed);
+	arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, unzeroed);
 	/*
 	 * There is no need to initialize the internal page map entries unless
 	 * the chunk is not zeroed.
@@ -544,7 +512,7 @@ arena_chunk_init_hard(arena_t *arena)
 			}
 		}
 	}
-	arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass,
+	arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun,
 	    unzeroed);
 
 	return (chunk);
@@ -607,9 +575,9 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
 	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
-	    arena_maxclass);
+	    arena_maxrun);
 	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
-	    arena_maxclass);
+	    arena_maxrun);
 	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 
@@ -682,7 +650,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind)
+arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind)
 {
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
@@ -700,7 +668,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
+arena_run_alloc_small(arena_t *arena, size_t size, index_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1034,7 +1002,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
 		    arena_mapbits_large_size_get(chunk,
 		    run_ind+(size>>LG_PAGE)-1) == 0);
 	} else {
-		size_t binind = arena_bin_index(arena, run->bin);
+		index_t binind = arena_bin_index(arena, run->bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		size = bin_info->run_size;
 	}
@@ -1079,9 +1047,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
 		arena_dirty_insert(arena, chunk, run_ind, run_pages);
 
 	/* Deallocate chunk if it is now completely unused. */
-	if (size == arena_maxclass) {
+	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
-		assert(run_pages == (arena_maxclass >> LG_PAGE));
+		assert(run_pages == (arena_maxrun >> LG_PAGE));
 		arena_chunk_dalloc(arena, chunk);
 	}
 
@@ -1212,7 +1180,7 @@ static arena_run_t *
 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
-	size_t binind;
+	index_t binind;
 	arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
@@ -1264,7 +1232,7 @@ static void *
 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 {
 	void *ret;
-	size_t binind;
+	index_t binind;
 	arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
@@ -1310,7 +1278,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 }
 
 void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind,
     uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
@@ -1450,14 +1418,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small =
 void
 arena_quarantine_junk_small(void *ptr, size_t usize)
 {
-	size_t binind;
+	index_t binind;
 	arena_bin_info_t *bin_info;
 	cassert(config_fill);
 	assert(opt_junk);
 	assert(opt_quarantine);
 	assert(usize <= SMALL_MAXCLASS);
 
-	binind = small_size2bin(usize);
+	binind = size2index(usize);
 	bin_info = &arena_bin_info[binind];
 	arena_redzones_validate(ptr, bin_info, true);
 }
@@ -1468,12 +1436,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 	void *ret;
 	arena_bin_t *bin;
 	arena_run_t *run;
-	size_t binind;
+	index_t binind;
 
-	binind = small_size2bin(size);
+	binind = size2index(size);
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
-	size = small_bin2size(binind);
+	size = index2size(binind);
 
 	malloc_mutex_lock(&bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
@@ -1520,14 +1488,15 @@ void *
 arena_malloc_large(arena_t *arena, size_t size, bool zero)
 {
 	void *ret;
+	size_t usize;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
 	UNUSED bool idump;
 
 	/* Large allocation. */
-	size = PAGE_CEILING(size);
+	usize = s2u(size);
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_large(arena, size, zero);
+	run = arena_run_alloc_large(arena, usize, zero);
 	if (run == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
@@ -1535,15 +1504,17 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	miscelm = arena_run_to_miscelm(run);
 	ret = arena_miscelm_to_rpages(miscelm);
 	if (config_stats) {
+		index_t index = size2index(usize) - NBINS;
+
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
-		arena->stats.allocated_large += size;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+		arena->stats.allocated_large += usize;
+		arena->stats.lstats[index].nmalloc++;
+		arena->stats.lstats[index].nrequests++;
+		arena->stats.lstats[index].curruns++;
 	}
 	if (config_prof)
-		idump = arena_prof_accum_locked(arena, size);
+		idump = arena_prof_accum_locked(arena, usize);
 	malloc_mutex_unlock(&arena->lock);
 	if (config_prof && idump)
 		prof_idump();
@@ -1551,9 +1522,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk))
-				memset(ret, 0xa5, size);
+				memset(ret, 0xa5, usize);
 			else if (unlikely(opt_zero))
-				memset(ret, 0, size);
+				memset(ret, 0, usize);
 		}
 	}
 
@@ -1610,12 +1581,14 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
 	ret = arena_miscelm_to_rpages(miscelm);
 
 	if (config_stats) {
+		index_t index = size2index(size) - NBINS;
+
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
 		arena->stats.allocated_large += size;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+		arena->stats.lstats[index].nmalloc++;
+		arena->stats.lstats[index].nrequests++;
+		arena->stats.lstats[index].curruns++;
 	}
 	malloc_mutex_unlock(&arena->lock);
 
@@ -1632,22 +1605,23 @@ void
 arena_prof_promoted(const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
-	size_t pageind, binind;
+	size_t pageind;
+	index_t binind;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr, false) == PAGE);
-	assert(isalloc(ptr, true) == PAGE);
+	assert(isalloc(ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	binind = small_size2bin(size);
+	binind = size2index(size);
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(ptr, false) == PAGE);
+	assert(isalloc(ptr, false) == LARGE_MINCLASS);
 	assert(isalloc(ptr, true) == size);
 }
 
@@ -1660,7 +1634,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
-		size_t binind = arena_bin_index(chunk->arena, bin);
+		index_t binind = arena_bin_index(chunk->arena, bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		if (bin_info->nregs != 1) {
@@ -1678,7 +1652,7 @@ static void
 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     arena_bin_t *bin)
 {
-	size_t binind;
+	index_t binind;
 	arena_bin_info_t *bin_info;
 	size_t npages, run_ind, past;
 	arena_chunk_map_misc_t *miscelm;
@@ -1762,7 +1736,8 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	arena_run_t *run;
 	arena_bin_t *bin;
 	arena_bin_info_t *bin_info;
-	size_t size, binind;
+	size_t size;
+	index_t binind;
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
@@ -1851,10 +1826,12 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 
 		arena_dalloc_junk_large(ptr, usize);
 		if (config_stats) {
+			index_t index = size2index(usize) - NBINS;
+
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= usize;
-			arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++;
-			arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--;
+			arena->stats.lstats[index].ndalloc++;
+			arena->stats.lstats[index].curruns--;
 		}
 	}
 
@@ -1887,17 +1864,20 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	malloc_mutex_lock(&arena->lock);
 	arena_run_trim_tail(arena, chunk, run, oldsize, size, true);
 	if (config_stats) {
+		index_t oldindex = size2index(oldsize) - NBINS;
+		index_t index = size2index(size) - NBINS;
+
 		arena->stats.ndalloc_large++;
 		arena->stats.allocated_large -= oldsize;
-		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
-		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+		arena->stats.lstats[oldindex].ndalloc++;
+		arena->stats.lstats[oldindex].curruns--;
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
 		arena->stats.allocated_large += size;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
-		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+		arena->stats.lstats[index].nmalloc++;
+		arena->stats.lstats[index].nrequests++;
+		arena->stats.lstats[index].curruns++;
 	}
 	malloc_mutex_unlock(&arena->lock);
 }
@@ -1909,24 +1889,30 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = oldsize >> LG_PAGE;
 	size_t followsize;
+	size_t usize_min = s2u(size);
 
 	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
 
 	/* Try to extend the run. */
-	assert(size + extra > oldsize);
+	assert(usize_min > oldsize);
 	malloc_mutex_lock(&arena->lock);
 	if (pageind + npages < chunk_npages &&
 	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
 	    (followsize = arena_mapbits_unallocated_size_get(chunk,
-	    pageind+npages)) >= size - oldsize) {
+	    pageind+npages)) >= usize_min - oldsize) {
 		/*
 		 * The next run is available and sufficiently large.  Split the
 		 * following run, then merge the first part with the existing
 		 * allocation.
 		 */
-		size_t flag_dirty;
-		size_t splitsize = (oldsize + followsize <= size + extra)
-		    ? followsize : size + extra - oldsize;
+		size_t flag_dirty, splitsize, usize;
+
+		usize = s2u(size + extra);
+		while (oldsize + followsize < usize)
+			usize = index2size(size2index(usize)-1);
+		assert(usize >= usize_min);
+		splitsize = usize - oldsize;
+
 		arena_run_t *run = &arena_miscelm_get(chunk,
 		    pageind+npages)->run;
 		arena_run_split_large(arena, run, splitsize, zero);
@@ -1948,17 +1934,20 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
 
 		if (config_stats) {
+			index_t oldindex = size2index(oldsize) - NBINS;
+			index_t index = size2index(size) - NBINS;
+
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= oldsize;
-			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
-			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+			arena->stats.lstats[oldindex].ndalloc++;
+			arena->stats.lstats[oldindex].curruns--;
 
 			arena->stats.nmalloc_large++;
 			arena->stats.nrequests_large++;
 			arena->stats.allocated_large += size;
-			arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
-			arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
-			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+			arena->stats.lstats[index].nmalloc++;
+			arena->stats.lstats[index].nrequests++;
+			arena->stats.lstats[index].curruns++;
 		}
 		malloc_mutex_unlock(&arena->lock);
 		return (false);
@@ -1996,10 +1985,14 @@ static bool
 arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
     bool zero)
 {
-	size_t psize;
+	size_t usize;
 
-	psize = PAGE_CEILING(size + extra);
-	if (psize == oldsize) {
+	/* Make sure extra can't cause size_t overflow. */
+	if (extra >= arena_maxclass)
+		return (true);
+
+	usize = s2u(size + extra);
+	if (usize == oldsize) {
 		/* Same size class. */
 		return (false);
 	} else {
@@ -2009,16 +2002,15 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = chunk->arena;
 
-		if (psize < oldsize) {
+		if (usize < oldsize) {
 			/* Fill before shrinking in order avoid a race. */
-			arena_ralloc_junk_large(ptr, oldsize, psize);
+			arena_ralloc_junk_large(ptr, oldsize, usize);
 			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
-			    psize);
+			    usize);
 			return (false);
 		} else {
 			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
-			    oldsize, PAGE_CEILING(size),
-			    psize - PAGE_CEILING(size), zero);
+			    oldsize, size, extra, zero);
 			if (config_fill && !ret && !zero) {
 				if (unlikely(opt_junk)) {
 					memset((void *)((uintptr_t)ptr +
@@ -2045,12 +2037,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 */
 	if (oldsize <= arena_maxclass) {
 		if (oldsize <= SMALL_MAXCLASS) {
-			assert(arena_bin_info[small_size2bin(oldsize)].reg_size
+			assert(arena_bin_info[size2index(oldsize)].reg_size
 			    == oldsize);
-			if ((size + extra <= SMALL_MAXCLASS &&
-			    small_size2bin(size + extra) ==
-			    small_size2bin(oldsize)) || (size <= oldsize &&
-			    size + extra >= oldsize))
+			if ((size + extra <= SMALL_MAXCLASS && size2index(size +
+			    extra) == size2index(oldsize)) || (size <= oldsize
+			    && size + extra >= oldsize))
 				return (false);
 		} else {
 			assert(size <= arena_maxclass);
@@ -2258,7 +2249,7 @@ arena_new(arena_t *arena, unsigned ind)
 /*
  * Calculate bin_info->run_size such that it meets the following constraints:
  *
- *   *) bin_info->run_size <= arena_maxclass
+ *   *) bin_info->run_size <= arena_maxrun
  *   *) bin_info->nregs <= RUN_MAXREGS
  *
  * bin_info->nregs and bin_info->reg0_offset are also calculated here, since
@@ -2330,7 +2321,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	/*
 	 * Make sure that the run will fit within an arena chunk.
 	 */
-	while (actual_run_size > arena_maxclass) {
+	while (actual_run_size > arena_maxrun) {
 		actual_run_size -= PAGE;
 		actual_nregs = (actual_run_size - pad_size) /
 		    bin_info->reg_interval;
@@ -2396,7 +2387,17 @@ arena_boot(void)
 	map_misc_offset = offsetof(arena_chunk_t, map_bits) +
 	    sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
 
-	arena_maxclass = chunksize - (map_bias << LG_PAGE);
+	arena_maxrun = chunksize - (map_bias << LG_PAGE);
+	arena_maxclass = index2size(size2index(chunksize)-1);
+	if (arena_maxclass > arena_maxrun) {
+		/*
+		 * For small chunk sizes it's possible for there to be fewer
+		 * non-header pages available than are necessary to serve the
+		 * size classes just below chunksize.
+		 */
+		arena_maxclass = arena_maxrun;
+	}
+	nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS);
 
 	bin_info_init();
 }
diff --git a/src/chunk.c b/src/chunk.c
index 32b8b3a6..618aaca0 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -27,9 +27,6 @@ rtree_t		*chunks_rtree;
 size_t		chunksize;
 size_t		chunksize_mask; /* (chunksize - 1). */
 size_t		chunk_npages;
-size_t		map_bias;
-size_t		map_misc_offset;
-size_t		arena_maxclass; /* Max size class for arenas. */
 
 /******************************************************************************/
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 309f1f65..f1f3234b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1628,7 +1628,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t)
-CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t)
+CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
 {
diff --git a/src/huge.c b/src/huge.c
index 6bdc0767..ae416253 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,19 @@ static extent_tree_t	huge;
 void *
 huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 {
+	size_t usize;
 
-	return (huge_palloc(tsd, arena, size, chunksize, zero));
+	usize = s2u(size);
+	if (usize == 0) {
+		/* size_t overflow. */
+		return (NULL);
+	}
+
+	return (huge_palloc(tsd, arena, usize, chunksize, zero));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -30,11 +37,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 
 	/* Allocate one or more contiguous chunks for this request. */
 
-	csize = CHUNK_CEILING(size);
-	if (csize == 0) {
-		/* size is large enough to cause size_t wrap-around. */
-		return (NULL);
-	}
+	csize = CHUNK_CEILING(usize);
+	assert(csize >= usize);
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = base_node_alloc();
@@ -55,7 +59,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 
 	/* Insert node into huge. */
 	node->addr = ret;
-	node->size = csize;
+	node->size = usize;
 	node->arena = arena;
 
 	malloc_mutex_lock(&huge_mtx);
@@ -64,9 +68,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk))
-			memset(ret, 0xa5, csize);
+			memset(ret, 0xa5, usize);
 		else if (unlikely(opt_zero) && !is_zeroed)
-			memset(ret, 0, csize);
+			memset(ret, 0, usize);
 	}
 
 	return (ret);
@@ -97,7 +101,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 
 static bool
 huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
-	size_t csize;
+	size_t usize;
 	void *expand_addr;
 	size_t expand_size;
 	extent_node_t *node, key;
@@ -105,14 +109,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
 	bool is_zeroed;
 	void *ret;
 
-	csize = CHUNK_CEILING(size);
-	if (csize == 0) {
-		/* size is large enough to cause size_t wrap-around. */
+	usize = s2u(size);
+	if (usize == 0) {
+		/* size_t overflow. */
 		return (true);
 	}
 
-	expand_addr = ptr + oldsize;
-	expand_size = csize - oldsize;
+	expand_addr = ptr + CHUNK_CEILING(oldsize);
+	expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
 	malloc_mutex_lock(&huge_mtx);
 
@@ -140,14 +144,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
 
 	malloc_mutex_lock(&huge_mtx);
 	/* Update the size of the huge allocation. */
-	node->size = csize;
+	node->size = usize;
 	malloc_mutex_unlock(&huge_mtx);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk))
-			memset(expand_addr, 0xa5, expand_size);
+			memset(ptr + oldsize, 0xa5, usize - oldsize);
 		else if (unlikely(opt_zero) && !is_zeroed)
-			memset(expand_addr, 0, expand_size);
+			memset(ptr + oldsize, 0, usize - oldsize);
 	}
 	return (false);
 }
@@ -156,27 +160,71 @@ bool
 huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
     bool zero)
 {
+	size_t usize;
 
 	/* Both allocations must be huge to avoid a move. */
-	if (oldsize <= arena_maxclass)
+	if (oldsize < chunksize)
 		return (true);
 
-	assert(CHUNK_CEILING(oldsize) == oldsize);
+	assert(s2u(oldsize) == oldsize);
+	usize = s2u(size);
+	if (usize == 0) {
+		/* size_t overflow. */
+		return (true);
+	}
 
 	/*
-	 * Avoid moving the allocation if the size class can be left the same.
+	 * Avoid moving the allocation if the existing chunk size accommodates
+	 * the new size.
 	 */
+	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)
+	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
+		size_t usize_next;
+
+		/* Increase usize to incorporate extra. */
+		while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) <
+		    oldsize)
+			usize = usize_next;
+
+		/* Update the size of the huge allocation if it changed. */
+		if (oldsize != usize) {
+			extent_node_t *node, key;
+
+			malloc_mutex_lock(&huge_mtx);
+
+			key.addr = ptr;
+			node = extent_tree_ad_search(&huge, &key);
+			assert(node != NULL);
+			assert(node->addr == ptr);
+
+			assert(node->size != usize);
+			node->size = usize;
+
+			malloc_mutex_unlock(&huge_mtx);
+
+			if (oldsize < usize) {
+				if (zero || (config_fill &&
+				    unlikely(opt_zero))) {
+					memset(ptr + oldsize, 0, usize -
+					    oldsize);
+				} else if (config_fill && unlikely(opt_junk)) {
+					memset(ptr + oldsize, 0xa5, usize -
+					    oldsize);
+				}
+			} else if (config_fill && unlikely(opt_junk) && oldsize
+			    > usize)
+				memset(ptr + usize, 0x5a, oldsize - usize);
+		}
+		return (false);
+	}
+
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
 		return (false);
 	}
 
-	/* Overflow. */
-	if (CHUNK_CEILING(size) == 0)
-		return (true);
-
 	/* Shrink the allocation in-place. */
-	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(size)) {
+	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) {
 		extent_node_t *node, key;
 		void *excess_addr;
 		size_t excess_size;
@@ -189,15 +237,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 		assert(node->addr == ptr);
 
 		/* Update the size of the huge allocation. */
-		node->size = CHUNK_CEILING(size);
+		node->size = usize;
 
 		malloc_mutex_unlock(&huge_mtx);
 
-		excess_addr = node->addr + CHUNK_CEILING(size);
-		excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(size);
+		excess_addr = node->addr + CHUNK_CEILING(usize);
+		excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
 		/* Zap the excess chunks. */
-		huge_dalloc_junk(excess_addr, excess_size);
+		huge_dalloc_junk(ptr + usize, oldsize - usize);
 		arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size);
 
 		return (false);
@@ -275,7 +323,8 @@ huge_dalloc(void *ptr)
 	malloc_mutex_unlock(&huge_mtx);
 
 	huge_dalloc_junk(node->addr, node->size);
-	arena_chunk_dalloc_huge(node->arena, node->addr, node->size);
+	arena_chunk_dalloc_huge(node->arena, node->addr,
+	    CHUNK_CEILING(node->size));
 	base_node_dalloc(node);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3490ecdf..f3750b40 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -42,6 +42,38 @@ unsigned		narenas_auto;
 /* Set to true once the allocator has been initialized. */
 static bool		malloc_initialized = false;
 
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
+	SIZE_CLASSES
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t	size2index_tab[] = {
+#define	S2B_3(i)	i,
+#define	S2B_4(i)	S2B_3(i) S2B_3(i)
+#define	S2B_5(i)	S2B_4(i) S2B_4(i)
+#define	S2B_6(i)	S2B_5(i) S2B_5(i)
+#define	S2B_7(i)	S2B_6(i) S2B_6(i)
+#define	S2B_8(i)	S2B_7(i) S2B_7(i)
+#define	S2B_9(i)	S2B_8(i) S2B_8(i)
+#define	S2B_no(i)
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+	S2B_##lg_delta_lookup(index)
+	SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
+#undef S2B_8
+#undef S2B_9
+#undef S2B_no
+#undef SC
+};
+
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
 #  define NO_INITIALIZER	((unsigned long)0)
@@ -1731,7 +1763,7 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
 		    alignment, zero))
 			return (old_usize);
 		usize = isalloc(ptr, config_prof);
-		if (max_usize < PAGE)
+		if (max_usize < LARGE_MINCLASS)
 			arena_prof_promoted(ptr, usize);
 	} else {
 		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
diff --git a/src/tcache.c b/src/tcache.c
index 07167b6d..2c968c68 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,7 +24,7 @@ size_t	tcache_salloc(const void *ptr)
 void
 tcache_event_hard(tcache_t *tcache)
 {
-	size_t binind = tcache->next_gc_bin;
+	index_t binind = tcache->next_gc_bin;
 	tcache_bin_t *tbin = &tcache->tbins[binind];
 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 
@@ -62,7 +62,7 @@ tcache_event_hard(tcache_t *tcache)
 }
 
 void *
-tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind)
 {
 	void *ret;
 
@@ -76,7 +76,7 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
 }
 
 void
-tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem,
     tcache_t *tcache)
 {
 	void *ptr;
@@ -153,7 +153,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
 }
 
 void
-tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem,
     tcache_t *tcache)
 {
 	void *ptr;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 301428f2..5b35a879 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -88,7 +88,6 @@ test_junk(size_t sz_min, size_t sz_max)
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			void *junked = (void *)s;
-
 			s = (char *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
@@ -134,13 +133,25 @@ TEST_END
 arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig;
 static void *most_recently_trimmed;
 
+static size_t
+shrink_size(size_t size)
+{
+	size_t shrink_size;
+
+	for (shrink_size = size - 1; nallocx(shrink_size, 0) == size;
+	    shrink_size--)
+		; /* Do nothing. */
+
+	return (shrink_size);
+}
+
 static void
 arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize)
 {
 
 	arena_ralloc_junk_large_orig(ptr, old_usize, usize);
 	assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize");
-	assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize");
+	assert_zu_eq(usize, shrink_size(arena_maxclass), "Unexpected usize");
 	most_recently_trimmed = ptr;
 }
 
@@ -154,7 +165,7 @@ TEST_BEGIN(test_junk_large_ralloc_shrink)
 	arena_ralloc_junk_large_orig = arena_ralloc_junk_large;
 	arena_ralloc_junk_large = arena_ralloc_junk_large_intercept;
 
-	p2 = rallocx(p1, arena_maxclass-PAGE, 0);
+	p2 = rallocx(p1, shrink_size(arena_maxclass), 0);
 	assert_ptr_eq(p1, p2, "Unexpected move during shrink");
 
 	arena_ralloc_junk_large = arena_ralloc_junk_large_orig;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index c70473cc..e62e54f2 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants)
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE));
+	TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2)));
 
 #undef TEST_ARENAS_LRUN_CONSTANT
 }