Merge branch 'dev'

2011-01-31 20:12:32 -08:00 · 2011-01-31 20:12:32 -08:00 · a73ebd946a
commit a73ebd946a
parent 1c4b088b08 ada55b2e92
12 changed files with 97 additions and 89 deletions
--- a/jemalloc/ChangeLog
+++ b/jemalloc/ChangeLog
@ -6,6 +6,14 @@ found in the git revision history:
    http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
    git://canonware.com/jemalloc.git

+* 2.1.1
+
+  Bug Fixes:
+  - Fix aligned huge reallocation (affected allocm()).
+  - Fix the ALLOCM_LG_ALIGN macro definition.
+  - Fix a heap dumping deadlock.
+  - Fix a "thread.arena" mallctl bug.
+
 * 2.1.0

  This version incorporates some optimizations that can't quite be considered
@ -27,7 +35,7 @@ found in the git revision history:

  Bug fixes:
  - Fix a race condition in heap profiling that could cause undefined behavior
-    if opt.prof_accum were disabled.
+    if "opt.prof_accum" were disabled.
  - Add missing mutex unlocks for some OOM error paths in the heap profiling
    code.
  - Fix a compilation error for non-C99 builds.
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@ -28,7 +28,7 @@ any of the following arguments (not a definitive list) to 'configure':

 --with-jemalloc-prefix=<prefix>
    Prefix all public APIs with <prefix>.  For example, if <prefix> is
-    "prefix_", the API changes like the following occur:
+    "prefix_", API changes like the following occur:

      malloc()         --> prefix_malloc()
      malloc_conf      --> prefix_malloc_conf
@ -48,9 +48,9 @@ any of the following arguments (not a definitive list) to 'configure':
    example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.

 --enable-cc-silence
-    Enable code that silences unuseful compiler warnings.  This is helpful when
-    trying to tell serious warnings from those due to compiler limitations, but
-    it potentially incurs a performance penalty.
+    Enable code that silences non-useful compiler warnings.  This is helpful
+    when trying to tell serious warnings from those due to compiler
+    limitations, but it potentially incurs a performance penalty.

 --enable-debug
    Enable assertions and validation code.  This incurs a substantial
@ -62,7 +62,7 @@ any of the following arguments (not a definitive list) to 'configure':

 --enable-prof
    Enable heap profiling and leak detection functionality.  See the "opt.prof"
-    option documention for usage details.
+    option documentation for usage details.

 --disable-prof-libgcc
    Disable the use of libgcc's backtracing functionality.  Ordinarily, libgcc's
@ -89,7 +89,7 @@ any of the following arguments (not a definitive list) to 'configure':
 --disable-tcache
    Disable thread-specific caches for small objects.  Objects are cached and
    released in bulk, thus reducing the total number of mutex operations.  See
-    the "opt.tcache" option for suage details.
+    the "opt.tcache" option for usage details.

 --enable-swap
    Enable mmap()ed swap file support.  When this feature is built in, it is
@ -198,8 +198,8 @@ MANDIR="?"
    Use this as the installation prefix for man pages.

 DESTDIR="?"
-    Prepend DESTDIR to INCLUDEDIR, LIBDIR, and MANDIR.  This is useful when
-    installing to a different path than was specified via --prefix.
+    Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR.  This is useful
+    when installing to a different path than was specified via --prefix.

 CC="?"
    Use this to invoke the C compiler.
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@ -45,9 +45,10 @@
 * point is implicitly RUN_BFP bits to the left.
 *
 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since there is one bit of header overhead
- * per object (plus a constant).  This constraint is relaxed (ignored) for runs
- * that are so small that the per-region overhead is greater than:
+ * honored for some/all object sizes, since when heap profiling is enabled
+ * there is one pointer of header overhead per object (plus a constant).  This
+ * constraint is relaxed (ignored) for runs that are so small that the
+ * per-region overhead is greater than:
 *
 *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 */
@ -105,7 +106,7 @@ struct arena_chunk_map_s {
 	 * Run address (or size) and various flags are stored together.  The bit
 	 * layout looks like (assuming 32-bit system):
 	 *
-	 *   ???????? ???????? ????---- ----dzla
+	 *   ???????? ???????? ????---- ----dula
 	 *
 	 * ? : Unallocated: Run address for first/last pages, unset for internal
 	 *                  pages.
@ -113,7 +114,7 @@ struct arena_chunk_map_s {
 	 *     Large: Run size for first page, unset for trailing pages.
 	 * - : Unused.
 	 * d : dirty?
-	 * z : zeroed?
+	 * u : unzeroed?
 	 * l : large?
 	 * a : allocated?
 	 *
@ -129,30 +130,30 @@ struct arena_chunk_map_s {
 	 * [dula] : bit unset
 	 *
 	 *   Unallocated (clean):
-	 *     ssssssss ssssssss ssss---- ----du--
+	 *     ssssssss ssssssss ssss---- ----du-a
 	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
-	 *     ssssssss ssssssss ssss---- ----dU--
+	 *     ssssssss ssssssss ssss---- ----dU-a
 	 *
 	 *   Unallocated (dirty):
-	 *     ssssssss ssssssss ssss---- ----D---
+	 *     ssssssss ssssssss ssss---- ----D--a
 	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     ssssssss ssssssss ssss---- ----D---
+	 *     ssssssss ssssssss ssss---- ----D--a
 	 *
 	 *   Small:
-	 *     pppppppp pppppppp pppp---- ----d--a
-	 *     pppppppp pppppppp pppp---- -------a
-	 *     pppppppp pppppppp pppp---- ----d--a
+	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp pppp---- -------A
+	 *     pppppppp pppppppp pppp---- ----d--A
 	 *
 	 *   Large:
-	 *     ssssssss ssssssss ssss---- ----D-la
+	 *     ssssssss ssssssss ssss---- ----D-LA
 	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     -------- -------- -------- ----D-la
+	 *     -------- -------- -------- ----D-LA
 	 *
 	 *   Large (sampled, size <= PAGE_SIZE):
-	 *     ssssssss ssssssss sssscccc ccccD-la
+	 *     ssssssss ssssssss sssscccc ccccD-LA
 	 *
 	 *   Large (not sampled, size == PAGE_SIZE):
-	 *     ssssssss ssssssss ssss---- ----D-la
+	 *     ssssssss ssssssss ssss---- ----D-LA
 	 */
 	size_t				bits;
 #ifdef JEMALLOC_PROF
@ -347,45 +348,35 @@ struct arena_s {

 	/*
 	 * bins is used to store trees of free regions of the following sizes,
-	 * assuming a 16-byte quantum, 4 KiB page size, and default
-	 * JEMALLOC_OPTIONS.
+	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
+	 * default MALLOC_CONF.
 	 *
 	 *   bins[i] |   size |
 	 *   --------+--------+
-	 *        0  |      2 |
-	 *        1  |      4 |
-	 *        2  |      8 |
+	 *        0  |      8 |
 	 *   --------+--------+
-	 *        3  |     16 |
-	 *        4  |     32 |
-	 *        5  |     48 |
+	 *        1  |     16 |
+	 *        2  |     32 |
+	 *        3  |     48 |
 	 *           :        :
-	 *        8  |     96 |
-	 *        9  |    112 |
-	 *       10  |    128 |
+	 *        6  |     96 |
+	 *        7  |    112 |
+	 *        8  |    128 |
 	 *   --------+--------+
-	 *       11  |    192 |
-	 *       12  |    256 |
-	 *       13  |    320 |
-	 *       14  |    384 |
-	 *       15  |    448 |
-	 *       16  |    512 |
+	 *        9  |    192 |
+	 *       10  |    256 |
+	 *       11  |    320 |
+	 *       12  |    384 |
+	 *       13  |    448 |
+	 *       14  |    512 |
 	 *   --------+--------+
-	 *       17  |    768 |
-	 *       18  |   1024 |
-	 *       19  |   1280 |
+	 *       15  |    768 |
+	 *       16  |   1024 |
+	 *       17  |   1280 |
 	 *           :        :
-	 *       27  |   3328 |
-	 *       28  |   3584 |
-	 *       29  |   3840 |
-	 *   --------+--------+
-	 *       30  |  4 KiB |
-	 *       31  |  6 KiB |
-	 *       33  |  8 KiB |
-	 *           :        :
-	 *       43  | 28 KiB |
-	 *       44  | 30 KiB |
-	 *       45  | 32 KiB |
+	 *       25  |   3328 |
+	 *       26  |   3584 |
+	 *       27  |   3840 |
 	 *   --------+--------+
 	 */
 	arena_bin_t		bins[1]; /* Dynamically sized. */
--- a/jemalloc/include/jemalloc/internal/ckh.h
+++ b/jemalloc/include/jemalloc/internal/ckh.h
@ -31,7 +31,7 @@ struct ckhc_s {

 struct ckh_s {
 #ifdef JEMALLOC_DEBUG
-#define	CKH_MAGIG	0x3af2489d
+#define	CKH_MAGIC	0x3af2489d
 	uint32_t	magic;
 #endif

--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@ -37,7 +37,6 @@ struct tcache_bin_s {
 	tcache_bin_stats_t tstats;
 #  endif
 	unsigned	low_water;	/* Min # cached since last GC. */
-	unsigned	high_water;	/* Max # cached since last GC. */
 	unsigned	ncached;	/* # of cached objects. */
 	unsigned	ncached_max;	/* Upper limit on ncached. */
 	void		*avail;		/* Chain of available objects. */
@ -194,7 +193,6 @@ tcache_event(tcache_t *tcache)
 			}
 		}
 		tbin->low_water = tbin->ncached;
-		tbin->high_water = tbin->ncached;

 		tcache->next_gc_bin++;
 		if (tcache->next_gc_bin == nhbins)
@ -348,8 +346,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
 	*(void **)ptr = tbin->avail;
 	tbin->avail = ptr;
 	tbin->ncached++;
-	if (tbin->ncached > tbin->high_water)
-		tbin->high_water = tbin->ncached;

 	tcache_event(tcache);
 }
@ -388,8 +384,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 	*(void **)ptr = tbin->avail;
 	tbin->avail = ptr;
 	tbin->ncached++;
-	if (tbin->ncached > tbin->high_water)
-		tbin->high_water = tbin->ncached;

 	tcache_event(tcache);
 }
--- a/jemalloc/include/jemalloc/jemalloc.h.in
+++ b/jemalloc/include/jemalloc/jemalloc.h.in
@ -19,7 +19,7 @@ extern "C" {
 #  define JEMALLOC_P(s) s
 #endif

-#define	ALLOCM_LG_ALIGN	((int)0x3f)
+#define	ALLOCM_LG_ALIGN(la)	(la)
 #if LG_SIZEOF_PTR == 2
 #define	ALLOCM_ALIGN(a)	(ffs(a)-1)
 #else
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@ -1358,8 +1358,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 #endif
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
-	if (tbin->ncached > tbin->high_water)
-		tbin->high_water = tbin->ncached;
 }
 #endif

@ -1369,7 +1367,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 *   *) bin->run_size >= min_run_size
 *   *) bin->run_size <= arena_maxclass
 *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
- *   *) run header size < PAGE_SIZE
 *
 * bin->nregs and bin->reg0_offset are also calculated here, since these
 * settings are all interdependent.
@ -1455,8 +1452,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	} while (try_run_size <= arena_maxclass
 	    && try_run_size <= arena_maxclass
 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
-	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
-	    && try_hdr_size < PAGE_SIZE);
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);

 	assert(good_hdr_size <= good_reg0_offset);

--- a/jemalloc/src/ckh.c
+++ b/jemalloc/src/ckh.c
@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	size_t hash1, hash2, bucket, cell;

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);

 	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);

@ -383,7 +383,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	}

 #ifdef JEMALLOC_DEBUG
-	ckh->magic = CKH_MAGIG;
+	ckh->magic = CKH_MAGIC;
 #endif

 	ret = false;
@ -396,7 +396,7 @@ ckh_delete(ckh_t *ckh)
 {

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);

 #ifdef CKH_VERBOSE
 	malloc_printf(
@ -421,7 +421,7 @@ ckh_count(ckh_t *ckh)
 {

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);

 	return (ckh->count);
 }
@ -452,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 	bool ret;

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);
 	assert(ckh_search(ckh, key, NULL, NULL));

 #ifdef CKH_COUNT
@ -477,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);

 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@ -509,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;

 	assert(ckh != NULL);
-	assert(ckh->magic = CKH_MAGIG);
+	assert(ckh->magic == CKH_MAGIC);

 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@ -1137,6 +1137,11 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,

 		/* Set new arena association. */
 		ARENA_SET(arena);
+		{
+			tcache_t *tcache = TCACHE_GET();
+			if (tcache != NULL)
+				tcache->arena = arena;
+		}
 	}

 	ret = 0;
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@ -83,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero)
 	 * alignment, in order to assure the alignment can be achieved, then
 	 * unmap leading and trailing chunks.
 	 */
-	assert(alignment >= chunksize);
+	assert(alignment > chunksize);

 	chunk_size = CHUNK_CEILING(size);

@ -192,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	if (alignment != 0)
+	if (alignment > chunksize)
 		ret = huge_palloc(size + extra, alignment, zero);
 	else
 		ret = huge_malloc(size + extra, zero);
@ -201,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
-		if (alignment != 0)
+		if (alignment > chunksize)
 			ret = huge_palloc(size, alignment, zero);
 		else
 			ret = huge_malloc(size, zero);
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@ -421,8 +421,8 @@ malloc_conf_init(void)
 			if ((opts = getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
-				 * the value of the JEMALLOC_OPTIONS
-				 * environment variable.
+				 * the value of the MALLOC_CONF environment
+				 * variable.
 				 */
 			} else {
 				/* No configuration specified. */
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@ -432,6 +432,7 @@ prof_lookup(prof_bt_t *bt)
 			prof_ctx_t	*p;
 			void		*v;
 		} ctx;
+		bool new_ctx;

 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
@ -468,12 +469,14 @@ prof_lookup(prof_bt_t *bt)
 				idalloc(ctx.v);
 				return (NULL);
 			}
-		}
-		/*
-		 * Acquire ctx's lock before releasing bt2ctx_mtx, in order to
-		 * avoid a race condition with prof_ctx_destroy().
-		 */
-		malloc_mutex_lock(&ctx.p->lock);
+			/*
+			 * Artificially raise curobjs, in order to avoid a race
+			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 */
+			ctx.p->cnt_merged.curobjs++;
+			new_ctx = true;
+		} else
+			new_ctx = false;
 		prof_leave();

 		/* Link a prof_thd_cnt_t into ctx for this thread. */
@ -498,7 +501,11 @@ prof_lookup(prof_bt_t *bt)
 			/* Allocate and partially initialize a new cnt. */
 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
 			if (ret.p == NULL) {
-				malloc_mutex_unlock(&ctx.p->lock);
+				if (new_ctx) {
+					malloc_mutex_lock(&ctx.p->lock);
+					ctx.p->cnt_merged.curobjs--;
+					malloc_mutex_unlock(&ctx.p->lock);
+				}
 				return (NULL);
 			}
 			ql_elm_new(ret.p, cnts_link);
@ -509,12 +516,19 @@ prof_lookup(prof_bt_t *bt)
 		ret.p->epoch = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
-			malloc_mutex_unlock(&ctx.p->lock);
+			if (new_ctx) {
+				malloc_mutex_lock(&ctx.p->lock);
+				ctx.p->cnt_merged.curobjs--;
+				malloc_mutex_unlock(&ctx.p->lock);
+			}
 			idalloc(ret.v);
 			return (NULL);
 		}
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+		malloc_mutex_lock(&ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
+		if (new_ctx)
+			ctx.p->cnt_merged.curobjs--;
 		malloc_mutex_unlock(&ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */