From 12efefb1953062795f5a971c1a72706787c7895c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 16 Oct 2012 22:06:56 -0700
Subject: [PATCH 1/7] Fix dss/mmap allocation precedence code.

Fix dss/mmap allocation precedence code to use recyclable mmap memory
only after primary dss allocation fails.
---
 ChangeLog   |  6 ++++++
 src/chunk.c | 40 ++++++++++++++--------------------------
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0ae4f118..9a618724 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,12 @@ found in the git revision history:
     http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
     git://canonware.com/jemalloc.git
 
+* 3.x.x (XXX Not released)
+
+  Bug fixes:
+  - Fix dss/mmap allocation precedence code to use recyclable mmap memory only
+    after primary dss allocation fails.
+
 * 3.1.0 (October 16, 2012)
 
   New features:
diff --git a/src/chunk.c b/src/chunk.c
index 5fc9e75c..1a3bb4f6 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -146,40 +146,28 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	/*
-	 * Try to recycle an existing mapping.
-	 */
-
 	/* "primary" dss. */
-	if (config_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment,
-	    base, zero)) != NULL)
-		goto label_return;
+	if (config_dss && dss_prec == dss_prec_primary) {
+		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
+		    alignment, base, zero)) != NULL)
+			goto label_return;
+		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
+			goto label_return;
+	}
 	/* mmap. */
 	if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
 	    alignment, base, zero)) != NULL)
 		goto label_return;
-	/* "secondary" dss. */
-	if (config_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, alignment,
-	    base, zero)) != NULL)
-		goto label_return;
-
-	/*
-	 * Try to allocate a new mapping.
-	 */
-
-	/* "primary" dss. */
-	if (config_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(size, alignment, zero)) != NULL)
-		goto label_return;
-	/* mmap. */
 	if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
 		goto label_return;
 	/* "secondary" dss. */
-	if (config_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(size, alignment, zero)) != NULL)
-		goto label_return;
+	if (config_dss && dss_prec == dss_prec_secondary) {
+		if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
+		    alignment, base, zero)) != NULL)
+			goto label_return;
+		if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
+			goto label_return;
+	}
 
 	/* All strategies for allocation failed. */
 	ret = NULL;

From 34457f51448e81f32a1bff16bbf600b79dd9ec5a Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Sat, 3 Nov 2012 21:18:28 -0700
Subject: [PATCH 2/7] Fix deadlock in the arenas.purge mallctl.

Fix deadlock in the arenas.purge mallctl due to recursive mutex
acquisition.
---
 src/ctl.c | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index e3ab9da9..6e01b1e2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,7 +113,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(arena_i_purge)
-static int	arena_purge(unsigned arena_ind);
+static void	arena_purge(unsigned arena_ind);
 CTL_PROTO(arena_i_dss)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
@@ -1274,35 +1274,27 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 
 /******************************************************************************/
 
-static int
+/* ctl_mutex must be held during execution of this function. */
+static void
 arena_purge(unsigned arena_ind)
 {
-	int ret;
+	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
-	malloc_mutex_lock(&ctl_mtx);
-	{
-		VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
+	malloc_mutex_lock(&arenas_lock);
+	memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
+	malloc_mutex_unlock(&arenas_lock);
 
-		malloc_mutex_lock(&arenas_lock);
-		memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
-		malloc_mutex_unlock(&arenas_lock);
-
-		if (arena_ind == ctl_stats.narenas) {
-			unsigned i;
-			for (i = 0; i < ctl_stats.narenas; i++) {
-				if (tarenas[i] != NULL)
-					arena_purge_all(tarenas[i]);
-			}
-		} else {
-			assert(arena_ind < ctl_stats.narenas);
-			if (tarenas[arena_ind] != NULL)
-				arena_purge_all(tarenas[arena_ind]);
+	if (arena_ind == ctl_stats.narenas) {
+		unsigned i;
+		for (i = 0; i < ctl_stats.narenas; i++) {
+			if (tarenas[i] != NULL)
+				arena_purge_all(tarenas[i]);
 		}
+	} else {
+		assert(arena_ind < ctl_stats.narenas);
+		if (tarenas[arena_ind] != NULL)
+			arena_purge_all(tarenas[arena_ind]);
 	}
-
-	ret = 0;
-	malloc_mutex_unlock(&ctl_mtx);
-	return (ret);
 }
 
 static int
@@ -1313,8 +1305,11 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	READONLY();
 	WRITEONLY();
-	ret = arena_purge(mib[1]);
+	malloc_mutex_lock(&ctl_mtx);
+	arena_purge(mib[1]);
+	malloc_mutex_unlock(&ctl_mtx);
 
+	ret = 0;
 label_return:
 	return (ret);
 }
@@ -1483,7 +1478,8 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	else {
 		if (arena_ind == UINT_MAX)
 			arena_ind = ctl_stats.narenas;
-		ret = arena_purge(arena_ind);
+		arena_purge(arena_ind);
+		ret = 0;
 	}
 
 label_return:

From e3d13060c8a04f08764b16b003169eb205fa09eb Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 30 Oct 2012 15:42:37 -0700
Subject: [PATCH 3/7] Purge unused dirty pages in a fragmentation-reducing
 order.

Purge unused dirty pages in an order that first performs clean/dirty run
defragmentation, in order to mitigate available run fragmentation.

Remove the limitation that prevented purging unless at least one chunk
worth of dirty pages had accumulated in an arena.  This limitation was
intended to avoid excessive purging for small applications, but the
threshold was arbitrary, and the effect of questionable utility.

Relax opt_lg_dirty_mult from 5 to 3.  This compensates for increased
likelihood of allocating clean runs, given the same ratio of clean:dirty
runs, and reduces the potential for repeated purging in pathological
large malloc/free loops that push the active:dirty page ratio just over
the purge threshold.
---
 ChangeLog                         |   5 +
 doc/jemalloc.xml.in               |   2 +-
 include/jemalloc/internal/arena.h |  53 ++--
 src/arena.c                       | 496 ++++++++++++++++++------------
 4 files changed, 336 insertions(+), 220 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9a618724..edba5269 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,9 +8,14 @@ found in the git revision history:
 
 * 3.x.x (XXX Not released)
 
+  Incompatible changes:
+  - Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1).
+
   Bug fixes:
   - Fix dss/mmap allocation precedence code to use recyclable mmap memory only
     after primary dss allocation fails.
+  - Fix deadlock in the "arenas.purge" mallctl.  This regression was introduced
+    in 3.1.0 by the addition of the "arena.<i>.purge" mallctl.
 
 * 3.1.0 (October 16, 2012)
 
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 441c1a4a..754fdcc7 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -833,7 +833,7 @@ for (i = 0; i < nbins; i++) {
         <manvolnum>2</manvolnum></citerefentry> or a similar system call.  This
         provides the kernel with sufficient information to recycle dirty pages
         if physical memory becomes scarce and the pages remain unused.  The
-        default minimum ratio is 32:1 (2^5:1); an option value of -1 will
+        default minimum ratio is 8:1 (2^3:1); an option value of -1 will
         disable dirty page purging.</para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 49213e32..561c9b6f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -38,10 +38,10 @@
  *
  *   (nactive >> opt_lg_dirty_mult) >= ndirty
  *
- * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
- * times as many active pages as dirty pages.
+ * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times
+ * as many active pages as dirty pages.
  */
-#define	LG_DIRTY_MULT_DEFAULT	5
+#define	LG_DIRTY_MULT_DEFAULT	3
 
 typedef struct arena_chunk_map_s arena_chunk_map_t;
 typedef struct arena_chunk_s arena_chunk_t;
@@ -69,7 +69,7 @@ struct arena_chunk_map_s {
 		/*
 		 * Linkage for run trees.  There are two disjoint uses:
 		 *
-		 * 1) arena_t's runs_avail_{clean,dirty} trees.
+		 * 1) arena_t's runs_avail tree.
 		 * 2) arena_run_t conceptually uses this linkage for in-use
 		 *    non-full runs, rather than directly embedding linkage.
 		 */
@@ -162,20 +162,24 @@ typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 /* Arena chunk header. */
 struct arena_chunk_s {
 	/* Arena that owns the chunk. */
-	arena_t		*arena;
+	arena_t			*arena;
 
-	/* Linkage for the arena's chunks_dirty list. */
-	ql_elm(arena_chunk_t) link_dirty;
-
-	/*
-	 * True if the chunk is currently in the chunks_dirty list, due to
-	 * having at some point contained one or more dirty pages.  Removal
-	 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
-	 */
-	bool		dirtied;
+	/* Linkage for tree of arena chunks that contain dirty runs. */
+	rb_node(arena_chunk_t)	dirty_link;
 
 	/* Number of dirty pages. */
-	size_t		ndirty;
+	size_t			ndirty;
+
+	/* Number of available runs. */
+	size_t			nruns_avail;
+
+	/*
+	 * Number of available run adjacencies.  Clean and dirty available runs
+	 * are not coalesced, which causes virtual memory fragmentation.  The
+	 * ratio of (nruns_avail-nruns_adjac):nruns_adjac is used for tracking
+	 * this fragmentation.
+	 * */
+	size_t			nruns_adjac;
 
 	/*
 	 * Map of pages within chunk that keeps track of free/large/small.  The
@@ -183,7 +187,7 @@ struct arena_chunk_s {
 	 * need to be tracked in the map.  This omission saves a header page
 	 * for common chunk sizes (e.g. 4 MiB).
 	 */
-	arena_chunk_map_t map[1]; /* Dynamically sized. */
+	arena_chunk_map_t	map[1]; /* Dynamically sized. */
 };
 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 
@@ -333,8 +337,8 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
-	/* List of dirty-page-containing chunks this arena manages. */
-	ql_head(arena_chunk_t)	chunks_dirty;
+	/* Tree of dirty-page-containing chunks this arena manages. */
+	arena_chunk_tree_t	chunks_dirty;
 
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
@@ -369,18 +373,9 @@ struct arena_s {
 
 	/*
 	 * Size/address-ordered trees of this arena's available runs.  The trees
-	 * are used for first-best-fit run allocation.  The dirty tree contains
-	 * runs with dirty pages (i.e. very likely to have been touched and
-	 * therefore have associated physical pages), whereas the clean tree
-	 * contains runs with pages that either have no associated physical
-	 * pages, or have pages that the kernel may recycle at any time due to
-	 * previous madvise(2) calls.  The dirty tree is used in preference to
-	 * the clean tree for allocations, because using dirty pages reduces
-	 * the amount of dirty purging necessary to keep the active:dirty page
-	 * ratio below the purge threshold.
+	 * are used for first-best-fit run allocation.
 	 */
-	arena_avail_tree_t	runs_avail_clean;
-	arena_avail_tree_t	runs_avail_dirty;
+	arena_avail_tree_t	runs_avail;
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
diff --git a/src/arena.c b/src/arena.c
index 1e6964a8..b93a6799 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -40,6 +40,12 @@ const uint8_t	small_size2bin[] = {
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
+static void	arena_avail_insert(arena_t *arena, arena_chunk_t *chunk,
+    size_t pageind, size_t npages, bool maybe_adjac_pred,
+    bool maybe_adjac_succ);
+static void	arena_avail_remove(arena_t *arena, arena_chunk_t *chunk,
+    size_t pageind, size_t npages, bool maybe_adjac_pred,
+    bool maybe_adjac_succ);
 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
     bool large, size_t binind, bool zero);
 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
@@ -48,8 +54,11 @@ static arena_run_t	*arena_run_alloc_helper(arena_t *arena, size_t size,
     bool large, size_t binind, bool zero);
 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
     size_t binind, bool zero);
+static arena_chunk_t	*chunks_dirty_iter_cb(arena_chunk_tree_t *tree,
+    arena_chunk_t *chunk, void *arg);
 static void	arena_purge(arena_t *arena, bool all);
-static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
+static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
+    bool cleaned);
 static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize);
 static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
@@ -101,9 +110,6 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
 	size_t a_size = a->bits & ~PAGE_MASK;
 	size_t b_size = b->bits & ~PAGE_MASK;
 
-	assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
-	    CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
-
 	ret = (a_size > b_size) - (a_size < b_size);
 	if (ret == 0) {
 		uintptr_t a_mapelm, b_mapelm;
@@ -129,6 +135,159 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
 rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
     u.rb_link, arena_avail_comp)
 
+static inline int
+arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
+{
+	size_t a_val, b_val;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	/*
+	 * Order such that chunks with higher fragmentation are "less than"
+	 * those with lower fragmentation.  Fragmentation is measured as:
+	 *
+	 *     mean current avail run size
+	 *   --------------------------------
+	 *   mean defragmented avail run size
+	 *
+	 *            navail
+	 *         -----------
+	 *         nruns_avail           nruns_avail-nruns_adjac
+	 * = ========================= = -----------------------
+	 *            navail                  nruns_avail
+	 *    -----------------------
+	 *    nruns_avail-nruns_adjac
+	 *
+	 * The following code multiplies away the denominator prior to
+	 * comparison, in order to avoid division.
+	 *
+	 */
+	a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail;
+	b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail;
+	if (a_val < b_val)
+		return (1);
+	if (a_val > b_val)
+		return (-1);
+	/* Break ties by chunk address. */
+	{
+		uintptr_t a_chunk = (uintptr_t)a;
+		uintptr_t b_chunk = (uintptr_t)b;
+
+		return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
+	}
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t,
+    dirty_link, arena_chunk_dirty_comp)
+
+static inline bool
+arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind)
+{
+	bool ret;
+
+	if (pageind-1 < map_bias)
+		ret = false;
+	else {
+		ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0);
+		assert(ret == false || arena_mapbits_dirty_get(chunk,
+		    pageind-1) != arena_mapbits_dirty_get(chunk, pageind));
+	}
+	return (ret);
+}
+
+static inline bool
+arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages)
+{
+	bool ret;
+
+	if (pageind+npages == chunk_npages)
+		ret = false;
+	else {
+		assert(pageind+npages < chunk_npages);
+		ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0);
+		assert(ret == false || arena_mapbits_dirty_get(chunk, pageind)
+		    != arena_mapbits_dirty_get(chunk, pageind+npages));
+	}
+	return (ret);
+}
+
+static inline bool
+arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages)
+{
+
+	return (arena_avail_adjac_pred(chunk, pageind) ||
+	    arena_avail_adjac_succ(chunk, pageind, npages));
+}
+
+static void
+arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+    size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
+{
+
+	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
+	    LG_PAGE));
+
+	/*
+	 * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
+	 * removed and reinserted even if the run to be inserted is clean.
+	 */
+	if (chunk->ndirty != 0)
+		arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
+
+	if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
+		chunk->nruns_adjac++;
+	if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
+		chunk->nruns_adjac++;
+	chunk->nruns_avail++;
+	assert(chunk->nruns_avail > chunk->nruns_adjac);
+
+	if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
+		arena->ndirty += npages;
+		chunk->ndirty += npages;
+	}
+	if (chunk->ndirty != 0)
+		arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+
+	arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk,
+	    pageind));
+}
+
+static void
+arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+    size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
+{
+
+	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
+	    LG_PAGE));
+
+	/*
+	 * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
+	 * removed and reinserted even if the run to be removed is clean.
+	 */
+	if (chunk->ndirty != 0)
+		arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
+
+	if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
+		chunk->nruns_adjac--;
+	if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
+		chunk->nruns_adjac--;
+	chunk->nruns_avail--;
+	assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail
+	    == 0 && chunk->nruns_adjac == 0));
+
+	if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
+		arena->ndirty -= npages;
+		chunk->ndirty -= npages;
+	}
+	if (chunk->ndirty != 0)
+		arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+
+	arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk,
+	    pageind));
+}
+
 static inline void *
 arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 {
@@ -193,7 +352,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	arena_chunk_t *chunk;
 	size_t run_ind, total_pages, need_pages, rem_pages, i;
 	size_t flag_dirty;
-	arena_avail_tree_t *runs_avail;
 
 	assert((large && binind == BININD_INVALID) || (large == false && binind
 	    != BININD_INVALID));
@@ -201,8 +359,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
-	runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
-	    &arena->runs_avail_clean;
 	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
 	    LG_PAGE;
 	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
@@ -212,7 +368,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 	assert(need_pages <= total_pages);
 	rem_pages = total_pages - need_pages;
 
-	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind));
+	arena_avail_remove(arena, chunk, run_ind, total_pages, true, true);
 	if (config_stats) {
 		/*
 		 * Update stats_cactive if nactive is crossing a chunk
@@ -244,14 +400,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 			    arena_mapbits_unzeroed_get(chunk,
 			    run_ind+total_pages-1));
 		}
-		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
-		    run_ind+need_pages));
-	}
-
-	/* Update dirty page accounting. */
-	if (flag_dirty != 0) {
-		chunk->ndirty -= need_pages;
-		arena->ndirty -= need_pages;
+		arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages,
+		    false, true);
 	}
 
 	/*
@@ -344,8 +494,6 @@ arena_chunk_alloc(arena_t *arena)
 	size_t i;
 
 	if (arena->spare != NULL) {
-		arena_avail_tree_t *runs_avail;
-
 		chunk = arena->spare;
 		arena->spare = NULL;
 
@@ -357,14 +505,6 @@ arena_chunk_alloc(arena_t *arena)
 		    chunk_npages-1) == arena_maxclass);
 		assert(arena_mapbits_dirty_get(chunk, map_bias) ==
 		    arena_mapbits_dirty_get(chunk, chunk_npages-1));
-
-		/* Insert the run into the appropriate runs_avail_* tree. */
-		if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
-			runs_avail = &arena->runs_avail_clean;
-		else
-			runs_avail = &arena->runs_avail_dirty;
-		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
-		    map_bias));
 	} else {
 		bool zero;
 		size_t unzeroed;
@@ -380,8 +520,6 @@ arena_chunk_alloc(arena_t *arena)
 			arena->stats.mapped += chunksize;
 
 		chunk->arena = arena;
-		ql_elm_new(chunk, link_dirty);
-		chunk->dirtied = false;
 
 		/*
 		 * Claim that no pages are in use, since the header is merely
@@ -389,6 +527,9 @@ arena_chunk_alloc(arena_t *arena)
 		 */
 		chunk->ndirty = 0;
 
+		chunk->nruns_avail = 0;
+		chunk->nruns_adjac = 0;
+
 		/*
 		 * Initialize the map to contain one maximal free untouched run.
 		 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
@@ -412,20 +553,18 @@ arena_chunk_alloc(arena_t *arena)
 		}
 		arena_mapbits_unallocated_set(chunk, chunk_npages-1,
 		    arena_maxclass, unzeroed);
-
-		/* Insert the run into the runs_avail_clean tree. */
-		arena_avail_tree_insert(&arena->runs_avail_clean,
-		    arena_mapp_get(chunk, map_bias));
 	}
 
+	/* Insert the run into the runs_avail tree. */
+	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias,
+	    false, false);
+
 	return (chunk);
 }
 
 static void
 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 {
-	arena_avail_tree_t *runs_avail;
-
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
 	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
@@ -436,24 +575,16 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
 
 	/*
-	 * Remove run from the appropriate runs_avail_* tree, so that the arena
-	 * does not use it.
+	 * Remove run from the runs_avail tree, so that the arena does not use
+	 * it.
 	 */
-	if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
-		runs_avail = &arena->runs_avail_clean;
-	else
-		runs_avail = &arena->runs_avail_dirty;
-	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias));
+	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias,
+	    false, false);
 
 	if (arena->spare != NULL) {
 		arena_chunk_t *spare = arena->spare;
 
 		arena->spare = chunk;
-		if (spare->dirtied) {
-			ql_remove(&chunk->arena->chunks_dirty, spare,
-			    link_dirty);
-			arena->ndirty -= spare->ndirty;
-		}
 		malloc_mutex_unlock(&arena->lock);
 		chunk_dealloc((void *)spare, chunksize, true);
 		malloc_mutex_lock(&arena->lock);
@@ -471,19 +602,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind,
 	arena_chunk_map_t *mapelm, key;
 
 	key.bits = size | CHUNK_MAP_KEY;
-	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
-	if (mapelm != NULL) {
-		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
-		size_t pageind = (((uintptr_t)mapelm -
-		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
-		    + map_bias;
-
-		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
-		    LG_PAGE));
-		arena_run_split(arena, run, size, large, binind, zero);
-		return (run);
-	}
-	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
+	mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
 	if (mapelm != NULL) {
 		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
 		size_t pageind = (((uintptr_t)mapelm -
@@ -537,29 +656,40 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind,
 static inline void
 arena_maybe_purge(arena_t *arena)
 {
+	size_t npurgeable, threshold;
 
-	/* Enforce opt_lg_dirty_mult. */
-	if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
-	    (arena->ndirty - arena->npurgatory) > chunk_npages &&
-	    (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
-	    arena->npurgatory))
-		arena_purge(arena, false);
+	/* Don't purge if the option is disabled. */
+	if (opt_lg_dirty_mult < 0)
+		return;
+	/* Don't purge if all dirty pages are already being purged. */
+	if (arena->ndirty <= arena->npurgatory)
+		return;
+	npurgeable = arena->ndirty - arena->npurgatory;
+	threshold = (arena->nactive >> opt_lg_dirty_mult);
+	/*
+	 * Don't purge unless the number of purgeable pages exceeds the
+	 * threshold.
+	 */
+	if (npurgeable <= threshold)
+		return;
+
+	arena_purge(arena, false);
 }
 
-static inline void
-arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
+static inline size_t
+arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all)
 {
+	size_t npurged;
 	ql_head(arena_chunk_map_t) mapelms;
 	arena_chunk_map_t *mapelm;
-	size_t pageind;
-	size_t ndirty;
+	size_t pageind, npages;
 	size_t nmadvise;
 
 	ql_new(&mapelms);
 
 	/*
 	 * If chunk is the spare, temporarily re-allocate it, 1) so that its
-	 * run is reinserted into runs_avail_dirty, and 2) so that it cannot be
+	 * run is reinserted into runs_avail, and 2) so that it cannot be
 	 * completely discarded by another thread while arena->lock is dropped
 	 * by this thread.  Note that the arena_run_dalloc() call will
 	 * implicitly deallocate the chunk, so no explicit action is required
@@ -579,54 +709,50 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 		arena_chunk_alloc(arena);
 	}
 
-	/* Temporarily allocate all free dirty runs within chunk. */
-	for (pageind = map_bias; pageind < chunk_npages;) {
+	if (config_stats)
+		arena->stats.purged += chunk->ndirty;
+
+	/*
+	 * Operate on all dirty runs if there is no clean/dirty run
+	 * fragmentation.
+	 */
+	if (chunk->nruns_adjac == 0)
+		all = true;
+
+	/*
+	 * Temporarily allocate free dirty runs within chunk.  If all is false,
+	 * only operate on dirty runs that are fragments; otherwise operate on
+	 * all dirty runs.
+	 */
+	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
 		mapelm = arena_mapp_get(chunk, pageind);
 		if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
-			size_t npages;
+			size_t run_size =
+			    arena_mapbits_unallocated_size_get(chunk, pageind);
 
-			npages = arena_mapbits_unallocated_size_get(chunk,
-			    pageind) >> LG_PAGE;
+			npages = run_size >> LG_PAGE;
 			assert(pageind + npages <= chunk_npages);
 			assert(arena_mapbits_dirty_get(chunk, pageind) ==
 			    arena_mapbits_dirty_get(chunk, pageind+npages-1));
-			if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
-				arena_avail_tree_remove(
-				    &arena->runs_avail_dirty, mapelm);
 
-				arena_mapbits_large_set(chunk, pageind,
-				    (npages << LG_PAGE), 0);
-				if (npages > 1) {
-					arena_mapbits_large_set(chunk,
-					    pageind+npages-1, 0, 0);
-				}
+			if (arena_mapbits_dirty_get(chunk, pageind) != 0 &&
+			    (all || arena_avail_adjac(chunk, pageind,
+			    npages))) {
+				arena_run_t *run = (arena_run_t *)((uintptr_t)
+				    chunk + (uintptr_t)(pageind << LG_PAGE));
 
-				if (config_stats) {
-					/*
-					 * Update stats_cactive if nactive is
-					 * crossing a chunk multiple.
-					 */
-					size_t cactive_diff =
-					    CHUNK_CEILING((arena->nactive +
-					    npages) << LG_PAGE) -
-					    CHUNK_CEILING(arena->nactive <<
-					    LG_PAGE);
-					if (cactive_diff != 0)
-						stats_cactive_add(cactive_diff);
-				}
-				arena->nactive += npages;
+				arena_run_split(arena, run, run_size, true,
+				    BININD_INVALID, false);
 				/* Append to list for later processing. */
 				ql_elm_new(mapelm, u.ql_link);
 				ql_tail_insert(&mapelms, mapelm, u.ql_link);
 			}
-
-			pageind += npages;
 		} else {
-			/* Skip allocated run. */
-			if (arena_mapbits_large_get(chunk, pageind))
-				pageind += arena_mapbits_large_size_get(chunk,
+			/* Skip run. */
+			if (arena_mapbits_large_get(chunk, pageind) != 0) {
+				npages = arena_mapbits_large_size_get(chunk,
 				    pageind) >> LG_PAGE;
-			else {
+			} else {
 				size_t binind;
 				arena_bin_info_t *bin_info;
 				arena_run_t *run = (arena_run_t *)((uintptr_t)
@@ -636,36 +762,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 				    pageind) == 0);
 				binind = arena_bin_index(arena, run->bin);
 				bin_info = &arena_bin_info[binind];
-				pageind += bin_info->run_size >> LG_PAGE;
+				npages = bin_info->run_size >> LG_PAGE;
 			}
 		}
 	}
 	assert(pageind == chunk_npages);
-
-	if (config_debug)
-		ndirty = chunk->ndirty;
-	if (config_stats)
-		arena->stats.purged += chunk->ndirty;
-	arena->ndirty -= chunk->ndirty;
-	chunk->ndirty = 0;
-	ql_remove(&arena->chunks_dirty, chunk, link_dirty);
-	chunk->dirtied = false;
+	assert(chunk->ndirty == 0 || all == false);
+	assert(chunk->nruns_adjac == 0);
 
 	malloc_mutex_unlock(&arena->lock);
 	if (config_stats)
 		nmadvise = 0;
+	npurged = 0;
 	ql_foreach(mapelm, &mapelms, u.ql_link) {
-		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
-		    sizeof(arena_chunk_map_t)) + map_bias;
-		size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
-		    LG_PAGE;
 		bool unzeroed;
 		size_t flag_unzeroed, i;
 
+		pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t)) + map_bias;
+		npages = arena_mapbits_large_size_get(chunk, pageind) >>
+		    LG_PAGE;
 		assert(pageind + npages <= chunk_npages);
-		assert(ndirty >= npages);
-		if (config_debug)
-			ndirty -= npages;
 		unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
 		    LG_PAGE)), (npages << LG_PAGE));
 		flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
@@ -683,10 +800,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 			arena_mapbits_unzeroed_set(chunk, pageind+i,
 			    flag_unzeroed);
 		}
+		npurged += npages;
 		if (config_stats)
 			nmadvise++;
 	}
-	assert(ndirty == 0);
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats)
 		arena->stats.nmadvise += nmadvise;
@@ -694,14 +811,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 	/* Deallocate runs. */
 	for (mapelm = ql_first(&mapelms); mapelm != NULL;
 	    mapelm = ql_first(&mapelms)) {
-		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
-		    sizeof(arena_chunk_map_t)) + map_bias;
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)(pageind << LG_PAGE));
+		arena_run_t *run;
 
+		pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t)) + map_bias;
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind <<
+		    LG_PAGE));
 		ql_remove(&mapelms, mapelm, u.ql_link);
-		arena_run_dalloc(arena, run, false);
+		arena_run_dalloc(arena, run, false, true);
 	}
+
+	return (npurged);
+}
+
+static arena_chunk_t *
+chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
+{
+       size_t *ndirty = (size_t *)arg;
+
+       assert(chunk->ndirty != 0);
+       *ndirty += chunk->ndirty;
+       return (NULL);
 }
 
 static void
@@ -712,14 +842,11 @@ arena_purge(arena_t *arena, bool all)
 	if (config_debug) {
 		size_t ndirty = 0;
 
-		ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
-		    assert(chunk->dirtied);
-		    ndirty += chunk->ndirty;
-		}
+		arena_chunk_dirty_iter(&arena->chunks_dirty, NULL,
+		    chunks_dirty_iter_cb, (void *)&ndirty);
 		assert(ndirty == arena->ndirty);
 	}
 	assert(arena->ndirty > arena->npurgatory || all);
-	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
 	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
 	    arena->npurgatory) || all);
 
@@ -731,16 +858,24 @@ arena_purge(arena_t *arena, bool all)
 	 * purge, and add the result to arena->npurgatory.  This will keep
 	 * multiple threads from racing to reduce ndirty below the threshold.
 	 */
-	npurgatory = arena->ndirty - arena->npurgatory;
-	if (all == false) {
-		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
-		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
+	{
+		size_t npurgeable = arena->ndirty - arena->npurgatory;
+
+		if (all == false) {
+			size_t threshold = (arena->nactive >>
+			    opt_lg_dirty_mult);
+
+			npurgatory = npurgeable - threshold;
+		} else
+			npurgatory = npurgeable;
 	}
 	arena->npurgatory += npurgatory;
 
 	while (npurgatory > 0) {
+		size_t npurgeable, npurged, nunpurged;
+
 		/* Get next chunk with dirty pages. */
-		chunk = ql_first(&arena->chunks_dirty);
+		chunk = arena_chunk_dirty_first(&arena->chunks_dirty);
 		if (chunk == NULL) {
 			/*
 			 * This thread was unable to purge as many pages as
@@ -751,23 +886,15 @@ arena_purge(arena_t *arena, bool all)
 			arena->npurgatory -= npurgatory;
 			return;
 		}
-		while (chunk->ndirty == 0) {
-			ql_remove(&arena->chunks_dirty, chunk, link_dirty);
-			chunk->dirtied = false;
-			chunk = ql_first(&arena->chunks_dirty);
-			if (chunk == NULL) {
-				/* Same logic as for above. */
-				arena->npurgatory -= npurgatory;
-				return;
-			}
-		}
+		npurgeable = chunk->ndirty;
+		assert(npurgeable != 0);
 
-		if (chunk->ndirty > npurgatory) {
+		if (npurgeable > npurgatory && chunk->nruns_adjac == 0) {
 			/*
-			 * This thread will, at a minimum, purge all the dirty
-			 * pages in chunk, so set npurgatory to reflect this
-			 * thread's commitment to purge the pages.  This tends
-			 * to reduce the chances of the following scenario:
+			 * This thread will purge all the dirty pages in chunk,
+			 * so set npurgatory to reflect this thread's intent to
+			 * purge the pages.  This tends to reduce the chances
+			 * of the following scenario:
 			 *
 			 * 1) This thread sets arena->npurgatory such that
 			 *    (arena->ndirty - arena->npurgatory) is at the
@@ -781,13 +908,20 @@ arena_purge(arena_t *arena, bool all)
 			 * because all of the purging work being done really
 			 * needs to happen.
 			 */
-			arena->npurgatory += chunk->ndirty - npurgatory;
-			npurgatory = chunk->ndirty;
+			arena->npurgatory += npurgeable - npurgatory;
+			npurgatory = npurgeable;
 		}
 
-		arena->npurgatory -= chunk->ndirty;
-		npurgatory -= chunk->ndirty;
-		arena_chunk_purge(arena, chunk);
+		/*
+		 * Keep track of how many pages are purgeable, versus how many
+		 * actually get purged, and adjust counters accordingly.
+		 */
+		arena->npurgatory -= npurgeable;
+		npurgatory -= npurgeable;
+		npurged = arena_chunk_purge(arena, chunk, all);
+		nunpurged = npurgeable - npurged;
+		arena->npurgatory += nunpurged;
+		npurgatory += nunpurged;
 	}
 }
 
@@ -801,11 +935,10 @@ arena_purge_all(arena_t *arena)
 }
 
 static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
+arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
 {
 	arena_chunk_t *chunk;
 	size_t size, run_ind, run_pages, flag_dirty;
-	arena_avail_tree_t *runs_avail;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
@@ -836,15 +969,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 
 	/*
 	 * The run is dirty if the caller claims to have dirtied it, as well as
-	 * if it was already dirty before being allocated.
+	 * if it was already dirty before being allocated and the caller
+	 * doesn't claim to have cleaned it.
 	 */
 	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
 	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
-	if (arena_mapbits_dirty_get(chunk, run_ind) != 0)
+	if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0)
 		dirty = true;
 	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
-	runs_avail = dirty ? &arena->runs_avail_dirty :
-	    &arena->runs_avail_clean;
 
 	/* Mark pages as unallocated in the chunk map. */
 	if (dirty) {
@@ -852,9 +984,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		    CHUNK_MAP_DIRTY);
 		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
 		    CHUNK_MAP_DIRTY);
-
-		chunk->ndirty += run_pages;
-		arena->ndirty += run_pages;
 	} else {
 		arena_mapbits_unallocated_set(chunk, run_ind, size,
 		    arena_mapbits_unzeroed_get(chunk, run_ind));
@@ -878,8 +1007,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		    run_ind+run_pages+nrun_pages-1) == nrun_size);
 		assert(arena_mapbits_dirty_get(chunk,
 		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
-		arena_avail_tree_remove(runs_avail,
-		    arena_mapp_get(chunk, run_ind+run_pages));
+		arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages,
+		    false, true);
 
 		size += nrun_size;
 		run_pages += nrun_pages;
@@ -905,8 +1034,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
 		    prun_size);
 		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
-		arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk,
-		    run_ind));
+		arena_avail_remove(arena, chunk, run_ind, prun_pages, true,
+		    false);
 
 		size += prun_size;
 		run_pages += prun_pages;
@@ -921,19 +1050,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
 	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
 	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
-	arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind));
-
-	if (dirty) {
-		/*
-		 * Insert into chunks_dirty before potentially calling
-		 * arena_chunk_dealloc(), so that chunks_dirty and
-		 * arena->ndirty are consistent.
-		 */
-		if (chunk->dirtied == false) {
-			ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
-			chunk->dirtied = true;
-		}
-	}
+	arena_avail_insert(arena, chunk, run_ind, run_pages, true, true);
 
 	/* Deallocate chunk if it is now completely unused. */
 	if (size == arena_maxclass) {
@@ -982,7 +1099,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
 	    flag_dirty);
 
-	arena_run_dalloc(arena, run, false);
+	arena_run_dalloc(arena, run, false, false);
 }
 
 static void
@@ -1015,7 +1132,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    flag_dirty);
 
 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
-	    dirty);
+	    dirty, false);
 }
 
 static arena_run_t *
@@ -1526,7 +1643,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 		    ((past - run_ind) << LG_PAGE), false);
 		/* npages = past - run_ind; */
 	}
-	arena_run_dalloc(arena, run, true);
+	arena_run_dalloc(arena, run, true, false);
 	malloc_mutex_unlock(&arena->lock);
 	/****************************/
 	malloc_mutex_lock(&bin->lock);
@@ -1638,7 +1755,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 		}
 	}
 
-	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
+	arena_run_dalloc(arena, (arena_run_t *)ptr, true, false);
 }
 
 void
@@ -1985,15 +2102,14 @@ arena_new(arena_t *arena, unsigned ind)
 	arena->dss_prec = chunk_dss_prec_get();
 
 	/* Initialize chunks. */
-	ql_new(&arena->chunks_dirty);
+	arena_chunk_dirty_new(&arena->chunks_dirty);
 	arena->spare = NULL;
 
 	arena->nactive = 0;
 	arena->ndirty = 0;
 	arena->npurgatory = 0;
 
-	arena_avail_tree_new(&arena->runs_avail_clean);
-	arena_avail_tree_new(&arena->runs_avail_dirty);
+	arena_avail_tree_new(&arena->runs_avail);
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {

From 847ff223dedad6b0f5186f904c817c0306ce599f Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 23 Oct 2012 08:42:48 +0200
Subject: [PATCH 4/7] Don't register jemalloc's zone allocator if something
 else already replaced the system default zone

---
 src/zone.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/zone.c b/src/zone.c
index cde5d49a..c62c183f 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -171,6 +171,16 @@ void
 register_zone(void)
 {
 
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	malloc_zone_t *default_zone = malloc_default_zone();
+	if (!default_zone->zone_name ||
+	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
+		return;
+	}
+
 	zone.size = (void *)zone_size;
 	zone.malloc = (void *)zone_malloc;
 	zone.calloc = (void *)zone_calloc;
@@ -241,7 +251,7 @@ register_zone(void)
 	 * then becomes the default.
 	 */
 	do {
-		malloc_zone_t *default_zone = malloc_default_zone();
+		default_zone = malloc_default_zone();
 		malloc_zone_unregister(default_zone);
 		malloc_zone_register(default_zone);
 	} while (malloc_default_zone() != &zone);

From ed90c97332e34e9552cdde102d8b4a9cd11bb5cb Mon Sep 17 00:00:00 2001
From: Jan Beich <jbeich@tormail.org>
Date: Thu, 18 Oct 2012 00:06:32 +0400
Subject: [PATCH 5/7] document what stats.active does not track

Based on http://www.canonware.com/pipermail/jemalloc-discuss/2012-March/000164.html
---
 doc/jemalloc.xml.in | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 754fdcc7..54b87474 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1512,7 +1512,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         application.  This is a multiple of the page size, and greater than or
         equal to <link
         linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
-        </para></listitem>
+        This does not include <link linkend="stats.arenas.i.pdirty">
+        <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link> and pages
+        entirely devoted to allocator metadata.</para></listitem>
       </varlistentry>
 
       <varlistentry>
@@ -1628,7 +1630,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Number of pages in active runs.</para></listitem>
       </varlistentry>
 
-      <varlistentry>
+      <varlistentry id="stats.arenas.i.pdirty">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl>
           (<type>size_t</type>)

From abf6739317742ca4677bf885178984a8757ee14a Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 7 Nov 2012 10:05:04 -0800
Subject: [PATCH 6/7] Tweak chunk purge order according to fragmentation.

Tweak chunk purge order to purge unfragmented chunks from high to low
memory.  This facilitates dirty run reuse.
---
 src/arena.c | 45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b93a6799..0c53b071 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -138,14 +138,22 @@ rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
 static inline int
 arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
 {
-	size_t a_val, b_val;
 
 	assert(a != NULL);
 	assert(b != NULL);
 
+	/*
+	 * Short-circuit for self comparison.  The following comparison code
+	 * would come to the same result, but at the cost of executing the slow
+	 * path.
+	 */
+	if (a == b)
+		return (0);
+
 	/*
 	 * Order such that chunks with higher fragmentation are "less than"
-	 * those with lower fragmentation.  Fragmentation is measured as:
+	 * those with lower fragmentation -- purging order is from "least" to
+	 * "greatest".  Fragmentation is measured as:
 	 *
 	 *     mean current avail run size
 	 *   --------------------------------
@@ -163,18 +171,33 @@ arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
 	 * comparison, in order to avoid division.
 	 *
 	 */
-	a_val = (a->nruns_avail - a->nruns_adjac) * b->nruns_avail;
-	b_val = (b->nruns_avail - b->nruns_adjac) * a->nruns_avail;
-	if (a_val < b_val)
-		return (1);
-	if (a_val > b_val)
-		return (-1);
-	/* Break ties by chunk address. */
+	{
+		size_t a_val = (a->nruns_avail - a->nruns_adjac) *
+		    b->nruns_avail;
+		size_t b_val = (b->nruns_avail - b->nruns_adjac) *
+		    a->nruns_avail;
+
+		if (a_val < b_val)
+			return (1);
+		if (a_val > b_val)
+			return (-1);
+	}
+	/*
+	 * Break ties by chunk address.  For fragmented chunks, report lower
+	 * addresses as "lower", so that fragmentation reduction happens first
+	 * at lower addresses.  However, use the opposite ordering for
+	 * unfragmented chunks, in order to increase the chances of
+	 * re-allocating dirty runs.
+	 */
 	{
 		uintptr_t a_chunk = (uintptr_t)a;
 		uintptr_t b_chunk = (uintptr_t)b;
-
-		return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
+		int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk));
+		if (a->nruns_adjac == 0) {
+			assert(b->nruns_adjac == 0);
+			ret = -ret;
+		}
+		return (ret);
 	}
 }
 

From 556ddc7fa94f13c388ec6c9d2d54ace250540f2c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 7 Nov 2012 15:16:29 -0800
Subject: [PATCH 7/7] Update ChangeLog for 3.2.0.

---
 ChangeLog | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index edba5269..ab3476c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,10 +6,14 @@ found in the git revision history:
     http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
     git://canonware.com/jemalloc.git
 
-* 3.x.x (XXX Not released)
+* 3.2.0 (November 9, 2012)
+
+  In addition to a couple of bug fixes, this version modifies page run
+  allocation and dirty page purging algorithms in order to better control
+  page-level virtual memory fragmentation.
 
   Incompatible changes:
-  - Change the "opt.lg_dirty_mult" from 5 to 3 (32:1 to 8:1).
+  - Change the "opt.lg_dirty_mult" default from 5 to 3 (32:1 to 8:1).
 
   Bug fixes:
   - Fix dss/mmap allocation precedence code to use recyclable mmap memory only