diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog index e32a5883..fc2f8946 100644 --- a/jemalloc/ChangeLog +++ b/jemalloc/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.1.1 + + Bug Fixes: + - Fix aligned huge reallocation (affected allocm()). + - Fix the ALLOCM_LG_ALIGN macro definition. + - Fix a heap dumping deadlock. + - Fix a "thread.arena" mallctl bug. + * 2.1.0 This version incorporates some optimizations that can't quite be considered @@ -27,7 +35,7 @@ found in the git revision history: Bug fixes: - Fix a race condition in heap profiling that could cause undefined behavior - if opt.prof_accum were disabled. + if "opt.prof_accum" were disabled. - Add missing mutex unlocks for some OOM error paths in the heap profiling code. - Fix a compilation error for non-C99 builds. diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index b77ebfd4..e0a5dc47 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -28,7 +28,7 @@ any of the following arguments (not a definitive list) to 'configure': --with-jemalloc-prefix= Prefix all public APIs with . For example, if is - "prefix_", the API changes like the following occur: + "prefix_", API changes like the following occur: malloc() --> prefix_malloc() malloc_conf --> prefix_malloc_conf @@ -48,9 +48,9 @@ any of the following arguments (not a definitive list) to 'configure': example, libjemalloc.so.0 becomes libjemalloc.so.0. --enable-cc-silence - Enable code that silences unuseful compiler warnings. This is helpful when - trying to tell serious warnings from those due to compiler limitations, but - it potentially incurs a performance penalty. + Enable code that silences non-useful compiler warnings. This is helpful + when trying to tell serious warnings from those due to compiler + limitations, but it potentially incurs a performance penalty. --enable-debug Enable assertions and validation code. This incurs a substantial @@ -62,7 +62,7 @@ any of the following arguments (not a definitive list) to 'configure': --enable-prof Enable heap profiling and leak detection functionality. See the "opt.prof" - option documention for usage details. + option documentation for usage details. --disable-prof-libgcc Disable the use of libgcc's backtracing functionality. Ordinarily, libgcc's @@ -89,7 +89,7 @@ any of the following arguments (not a definitive list) to 'configure': --disable-tcache Disable thread-specific caches for small objects. Objects are cached and released in bulk, thus reducing the total number of mutex operations. See - the "opt.tcache" option for suage details. + the "opt.tcache" option for usage details. --enable-swap Enable mmap()ed swap file support. When this feature is built in, it is @@ -198,8 +198,8 @@ MANDIR="?" Use this as the installation prefix for man pages. DESTDIR="?" - Prepend DESTDIR to INCLUDEDIR, LIBDIR, and MANDIR. This is useful when - installing to a different path than was specified via --prefix. + Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful + when installing to a different path than was specified via --prefix. CC="?" Use this to invoke the C compiler. diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index 9556c2c6..a43d1fa1 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -45,9 +45,10 @@ * point is implicitly RUN_BFP bits to the left. * * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since there is one bit of header overhead - * per object (plus a constant). This constraint is relaxed (ignored) for runs - * that are so small that the per-region overhead is greater than: + * honored for some/all object sizes, since when heap profiling is enabled + * there is one pointer of header overhead per object (plus a constant). This + * constraint is relaxed (ignored) for runs that are so small that the + * per-region overhead is greater than: * * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ @@ -105,7 +106,7 @@ struct arena_chunk_map_s { * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dzla + * ???????? ???????? ????---- ----dula * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. @@ -113,7 +114,7 @@ struct arena_chunk_map_s { * Large: Run size for first page, unset for trailing pages. * - : Unused. * d : dirty? - * z : zeroed? + * u : unzeroed? * l : large? * a : allocated? * @@ -129,30 +130,30 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-- + * ssssssss ssssssss ssss---- ----du-a * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-- + * ssssssss ssssssss ssss---- ----dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--- + * ssssssss ssssssss ssss---- ----D--a * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--- + * ssssssss ssssssss ssss---- ----D--a * * Small: - * pppppppp pppppppp pppp---- ----d--a - * pppppppp pppppppp pppp---- -------a - * pppppppp pppppppp pppp---- ----d--a + * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp pppp---- -------A + * pppppppp pppppppp pppp---- ----d--A * * Large: - * ssssssss ssssssss ssss---- ----D-la + * ssssssss ssssssss ssss---- ----D-LA * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-la + * -------- -------- -------- ----D-LA * * Large (sampled, size <= PAGE_SIZE): - * ssssssss ssssssss sssscccc ccccD-la + * ssssssss ssssssss sssscccc ccccD-LA * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss---- ----D-la + * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; #ifdef JEMALLOC_PROF @@ -347,45 +348,35 @@ struct arena_s { /* * bins is used to store trees of free regions of the following sizes, - * assuming a 16-byte quantum, 4 KiB page size, and default - * JEMALLOC_OPTIONS. + * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and + * default MALLOC_CONF. * * bins[i] | size | * --------+--------+ - * 0 | 2 | - * 1 | 4 | - * 2 | 8 | + * 0 | 8 | * --------+--------+ - * 3 | 16 | - * 4 | 32 | - * 5 | 48 | + * 1 | 16 | + * 2 | 32 | + * 3 | 48 | * : : - * 8 | 96 | - * 9 | 112 | - * 10 | 128 | + * 6 | 96 | + * 7 | 112 | + * 8 | 128 | * --------+--------+ - * 11 | 192 | - * 12 | 256 | - * 13 | 320 | - * 14 | 384 | - * 15 | 448 | - * 16 | 512 | + * 9 | 192 | + * 10 | 256 | + * 11 | 320 | + * 12 | 384 | + * 13 | 448 | + * 14 | 512 | * --------+--------+ - * 17 | 768 | - * 18 | 1024 | - * 19 | 1280 | + * 15 | 768 | + * 16 | 1024 | + * 17 | 1280 | * : : - * 27 | 3328 | - * 28 | 3584 | - * 29 | 3840 | - * --------+--------+ - * 30 | 4 KiB | - * 31 | 6 KiB | - * 33 | 8 KiB | - * : : - * 43 | 28 KiB | - * 44 | 30 KiB | - * 45 | 32 KiB | + * 25 | 3328 | + * 26 | 3584 | + * 27 | 3840 | * --------+--------+ */ arena_bin_t bins[1]; /* Dynamically sized. */ diff --git a/jemalloc/include/jemalloc/internal/ckh.h b/jemalloc/include/jemalloc/internal/ckh.h index d4e391b6..3e4ad4c8 100644 --- a/jemalloc/include/jemalloc/internal/ckh.h +++ b/jemalloc/include/jemalloc/internal/ckh.h @@ -31,7 +31,7 @@ struct ckhc_s { struct ckh_s { #ifdef JEMALLOC_DEBUG -#define CKH_MAGIG 0x3af2489d +#define CKH_MAGIC 0x3af2489d uint32_t magic; #endif diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index 1ad91a9b..f431c667 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -37,7 +37,6 @@ struct tcache_bin_s { tcache_bin_stats_t tstats; # endif unsigned low_water; /* Min # cached since last GC. */ - unsigned high_water; /* Max # cached since last GC. */ unsigned ncached; /* # of cached objects. */ unsigned ncached_max; /* Upper limit on ncached. */ void *avail; /* Chain of available objects. */ @@ -194,7 +193,6 @@ tcache_event(tcache_t *tcache) } } tbin->low_water = tbin->ncached; - tbin->high_water = tbin->ncached; tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) @@ -348,8 +346,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) *(void **)ptr = tbin->avail; tbin->avail = ptr; tbin->ncached++; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; tcache_event(tcache); } @@ -388,8 +384,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) *(void **)ptr = tbin->avail; tbin->avail = ptr; tbin->ncached++; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; tcache_event(tcache); } diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in index 4dd3981a..580a5ec5 100644 --- a/jemalloc/include/jemalloc/jemalloc.h.in +++ b/jemalloc/include/jemalloc/jemalloc.h.in @@ -19,7 +19,7 @@ extern "C" { # define JEMALLOC_P(s) s #endif -#define ALLOCM_LG_ALIGN ((int)0x3f) +#define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) #else diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 7f939b3c..3cf15ff2 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -1358,8 +1358,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind #endif malloc_mutex_unlock(&bin->lock); tbin->ncached = i; - if (tbin->ncached > tbin->high_water) - tbin->high_water = tbin->ncached; } #endif @@ -1369,7 +1367,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind * *) bin->run_size >= min_run_size * *) bin->run_size <= arena_maxclass * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) run header size < PAGE_SIZE * * bin->nregs and bin->reg0_offset are also calculated here, since these * settings are all interdependent. @@ -1455,8 +1452,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) } while (try_run_size <= arena_maxclass && try_run_size <= arena_maxclass && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_hdr_size < PAGE_SIZE); + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); assert(good_hdr_size <= good_reg0_offset); diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c index 682a8db6..e386a531 100644 --- a/jemalloc/src/ckh.c +++ b/jemalloc/src/ckh.c @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -383,7 +383,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) } #ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIG; + ckh->magic = CKH_MAGIC; #endif ret = false; @@ -396,7 +396,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -421,7 +421,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -452,7 +452,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -477,7 +477,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -509,7 +509,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic = CKH_MAGIG); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index 3c8adab9..0b8b06f3 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -1137,6 +1137,11 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); + { + tcache_t *tcache = TCACHE_GET(); + if (tcache != NULL) + tcache->arena = arena; + } } ret = 0; diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c index 0aadc433..de09198e 100644 --- a/jemalloc/src/huge.c +++ b/jemalloc/src/huge.c @@ -83,7 +83,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) * alignment, in order to assure the alignment can be achieved, then * unmap leading and trailing chunks. */ - assert(alignment >= chunksize); + assert(alignment > chunksize); chunk_size = CHUNK_CEILING(size); @@ -192,7 +192,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * different size class. In that case, fall back to allocating new * space and copying. */ - if (alignment != 0) + if (alignment > chunksize) ret = huge_palloc(size + extra, alignment, zero); else ret = huge_malloc(size + extra, zero); @@ -201,7 +201,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (extra == 0) return (NULL); /* Try again, this time without extra. */ - if (alignment != 0) + if (alignment > chunksize) ret = huge_palloc(size, alignment, zero); else ret = huge_malloc(size, zero); diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 2aebc51d..f5434c7f 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -421,8 +421,8 @@ malloc_conf_init(void) if ((opts = getenv(envname)) != NULL) { /* * Do nothing; opts is already initialized to - * the value of the JEMALLOC_OPTIONS - * environment variable. + * the value of the MALLOC_CONF environment + * variable. */ } else { /* No configuration specified. */ diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 636cccef..3566c6d8 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -432,6 +432,7 @@ prof_lookup(prof_bt_t *bt) prof_ctx_t *p; void *v; } ctx; + bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global @@ -468,12 +469,14 @@ prof_lookup(prof_bt_t *bt) idalloc(ctx.v); return (NULL); } - } - /* - * Acquire ctx's lock before releasing bt2ctx_mtx, in order to - * avoid a race condition with prof_ctx_destroy(). - */ - malloc_mutex_lock(&ctx.p->lock); + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->cnt_merged.curobjs++; + new_ctx = true; + } else + new_ctx = false; prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ @@ -498,7 +501,11 @@ prof_lookup(prof_bt_t *bt) /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { - malloc_mutex_unlock(&ctx.p->lock); + if (new_ctx) { + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx.p->lock); + } return (NULL); } ql_elm_new(ret.p, cnts_link); @@ -509,12 +516,19 @@ prof_lookup(prof_bt_t *bt) ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - malloc_mutex_unlock(&ctx.p->lock); + if (new_ctx) { + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx.p->lock); + } idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(&ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + if (new_ctx) + ctx.p->cnt_merged.curobjs--; malloc_mutex_unlock(&ctx.p->lock); } else { /* Move ret to the front of the LRU. */