From fb64ec29ec05fbcba09898a3c93211966a6fa985 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Sep 2015 18:37:18 -0700 Subject: [PATCH 1/6] Fix prof_tctx_dump_iter() to filter. Fix prof_tctx_dump_iter() to filter out nodes that were created after heap profile dumping started. Prior to this fix, spurious entries with arbitrary object/byte counts could appear in heap profiles, which resulted in jeprof inaccuracies or failures. --- ChangeLog | 6 ++++++ src/prof.c | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 58e4462b..c6bd5562 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,12 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.0.3 (XXX) + + Bug fixes: + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + * 4.0.2 (September 21, 2015) This bugfix release addresses a few bugs specific to heap profiling. diff --git a/src/prof.c b/src/prof.c index 0a08062c..5d2b9598 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1102,11 +1102,23 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { bool propagate_err = *(bool *)arg; - if (prof_dump_printf(propagate_err, - " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", - tctx->thr_uid, tctx->dump_cnts.curobjs, tctx->dump_cnts.curbytes, - tctx->dump_cnts.accumobjs, tctx->dump_cnts.accumbytes)) - return (tctx); + switch (tctx->state) { + case prof_tctx_state_initializing: + case prof_tctx_state_nominal: + /* Not captured by this dump. */ + break; + case prof_tctx_state_dumping: + case prof_tctx_state_purgatory: + if (prof_dump_printf(propagate_err, + " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " + "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, + tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, + tctx->dump_cnts.accumbytes)) + return (tctx); + break; + default: + not_reached(); + } return (NULL); } From d260f442ce693de4351229027b37b3293fcbfd7d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Sep 2015 16:38:45 -0700 Subject: [PATCH 2/6] Fix xallocx(..., MALLOCX_ZERO) bugs. Zero all trailing bytes of large allocations when --enable-cache-oblivious configure option is enabled. This regression was introduced by 8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index randomization for large allocations.). Zero trailing bytes of huge allocations when resizing from/to a size class that is not a multiple of the chunk size. --- ChangeLog | 4 ++ src/arena.c | 10 ++++ src/huge.c | 30 +++++----- test/integration/xallocx.c | 119 ++++++++++++++++++++++++++++++++++++- 4 files changed, 148 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index c6bd5562..a9929f82 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,10 @@ brevity. Much more detail can be found in the git revision history: * 4.0.3 (XXX) Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap profile dumping started. diff --git a/src/arena.c b/src/arena.c index 7f4a6cae..3081519c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2679,6 +2679,16 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (arena_run_split_large(arena, run, splitsize, zero)) goto label_fail; + if (config_cache_oblivious && zero) { + /* + * Zero the trailing bytes of the original allocation's + * last page, since they are in an indeterminate state. + */ + assert(PAGE_CEILING(oldsize) == oldsize); + memset((void *)((uintptr_t)ptr + oldsize), 0, + PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr); + } + size = oldsize + splitsize; npages = (size + large_pad) >> LG_PAGE; diff --git a/src/huge.c b/src/huge.c index f8778db2..1e9a6651 100644 --- a/src/huge.c +++ b/src/huge.c @@ -133,7 +133,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, extent_node_t *node; arena_t *arena; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; - bool zeroed; + bool pre_zeroed, post_zeroed; /* Increase usize to incorporate extra. */ for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1)) @@ -145,26 +145,27 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, node = huge_node_get(ptr); arena = extent_node_arena_get(node); + pre_zeroed = extent_node_zeroed_get(node); /* Fill if necessary (shrinking). */ if (oldsize > usize) { size_t sdiff = oldsize - usize; if (config_fill && unlikely(opt_junk_free)) { memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff); - zeroed = false; + post_zeroed = false; } else { - zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr, - CHUNK_CEILING(oldsize), usize, sdiff); + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + ptr, CHUNK_CEILING(oldsize), usize, sdiff); } } else - zeroed = true; + post_zeroed = pre_zeroed; malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ assert(extent_node_size_get(node) != usize); extent_node_size_set(node, usize); - /* Clear node's zeroed field if zeroing failed above. */ - extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed); + /* Update zeroed. */ + extent_node_zeroed_set(node, post_zeroed); malloc_mutex_unlock(&arena->huge_mtx); arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize); @@ -172,7 +173,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min, /* Fill if necessary (growing). */ if (oldsize < usize) { if (zero || (config_fill && unlikely(opt_zero))) { - if (!zeroed) { + if (!pre_zeroed) { memset((void *)((uintptr_t)ptr + oldsize), 0, usize - oldsize); } @@ -190,10 +191,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) arena_t *arena; chunk_hooks_t chunk_hooks; size_t cdiff; - bool zeroed; + bool pre_zeroed, post_zeroed; node = huge_node_get(ptr); arena = extent_node_arena_get(node); + pre_zeroed = extent_node_zeroed_get(node); chunk_hooks = chunk_hooks_get(arena); assert(oldsize > usize); @@ -209,21 +211,21 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize) if (config_fill && unlikely(opt_junk_free)) { huge_dalloc_junk((void *)((uintptr_t)ptr + usize), sdiff); - zeroed = false; + post_zeroed = false; } else { - zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, + post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr + usize), CHUNK_CEILING(oldsize), CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff); } } else - zeroed = true; + post_zeroed = pre_zeroed; malloc_mutex_lock(&arena->huge_mtx); /* Update the size of the huge allocation. */ extent_node_size_set(node, usize); - /* Clear node's zeroed field if zeroing failed above. */ - extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed); + /* Update zeroed. */ + extent_node_zeroed_set(node, post_zeroed); malloc_mutex_unlock(&arena->huge_mtx); /* Zap the excess chunks. */ diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c index 058e27c5..f69d48d7 100644 --- a/test/integration/xallocx.c +++ b/test/integration/xallocx.c @@ -347,6 +347,121 @@ TEST_BEGIN(test_extra_huge) } TEST_END +static void +print_filled_extents(const void *p, uint8_t c, size_t len) +{ + const uint8_t *pc = (const uint8_t *)p; + size_t i, range0; + uint8_t c0; + + malloc_printf(" p=%p, c=%#x, len=%zu:", p, c, len); + range0 = 0; + c0 = pc[0]; + for (i = 0; i < len; i++) { + if (pc[i] != c0) { + malloc_printf(" %#x[%zu..%zu)", c0, range0, i); + range0 = i; + c0 = pc[i]; + } + } + malloc_printf(" %#x[%zu..%zu)\n", c0, range0, i); +} + +static bool +validate_fill(const void *p, uint8_t c, size_t offset, size_t len) +{ + const uint8_t *pc = (const uint8_t *)p; + bool err; + size_t i; + + for (i = offset, err = false; i < offset+len; i++) { + if (pc[i] != c) + err = true; + } + + if (err) + print_filled_extents(p, c, offset + len); + + return (err); +} + +static void +test_zero(size_t szmin, size_t szmax) +{ + size_t sz, nsz; + void *p; +#define FILL_BYTE 0x7aU + + sz = szmax; + p = mallocx(sz, MALLOCX_ZERO); + assert_ptr_not_null(p, "Unexpected mallocx() error"); + assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", + sz); + + /* + * Fill with non-zero so that non-debug builds are more likely to detect + * errors. + */ + memset(p, FILL_BYTE, sz); + assert_false(validate_fill(p, FILL_BYTE, 0, sz), + "Memory not filled: sz=%zu", sz); + + /* Shrink in place so that we can expect growing in place to succeed. */ + sz = szmin; + assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz, + "Unexpected xallocx() error"); + assert_false(validate_fill(p, FILL_BYTE, 0, sz), + "Memory not filled: sz=%zu", sz); + + for (sz = szmin; sz < szmax; sz = nsz) { + nsz = nallocx(sz+1, MALLOCX_ZERO); + assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz, + "Unexpected xallocx() failure"); + assert_false(validate_fill(p, FILL_BYTE, 0, sz), + "Memory not filled: sz=%zu", sz); + assert_false(validate_fill(p, 0x00, sz, nsz-sz), + "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz); + memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz); + assert_false(validate_fill(p, FILL_BYTE, 0, nsz), + "Memory not filled: nsz=%zu", nsz); + } + + dallocx(p, 0); +} + +TEST_BEGIN(test_zero_large) +{ + size_t large0, largemax; + + /* Get size classes. */ + large0 = get_large_size(0); + largemax = get_large_size(get_nlarge()-1); + + test_zero(large0, largemax); +} +TEST_END + +TEST_BEGIN(test_zero_huge) +{ + size_t huge0, huge1; + static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE + true +#else + false +#endif + ; + + /* Get size classes. */ + huge0 = get_huge_size(0); + huge1 = get_huge_size(1); + + if (maps_coalesce) + test_zero(huge0, huge0 * 4); + test_zero(huge1, huge0 * 2); +} +TEST_END + int main(void) { @@ -359,5 +474,7 @@ main(void) test_size_extra_overflow, test_extra_small, test_extra_large, - test_extra_huge)); + test_extra_huge, + test_zero_large, + test_zero_huge)); } From 03eb37e8fd35587b944f8cbc85cd81a08b0ed17a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Sep 2015 16:44:16 -0700 Subject: [PATCH 3/6] Make mallocx() OOM test more robust. Make mallocx() OOM testing work correctly even on systems that can allocate the majority of virtual address space in a single contiguous region. --- test/integration/mallocx.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 3973938b..6253175d 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -52,9 +52,20 @@ TEST_BEGIN(test_oom) hugemax = get_huge_size(get_nhuge()-1); - /* In practice hugemax is too large to be allocated. */ - assert_ptr_null(mallocx(hugemax, 0), - "Expected OOM for mallocx(size=%#zx, 0)", hugemax); + /* + * It should be impossible to allocate two objects that each consume + * more than half the virtual address space. + */ + { + void *p; + + p = mallocx(hugemax, 0); + if (p != NULL) { + assert_ptr_null(mallocx(hugemax, 0), + "Expected OOM for mallocx(size=%#zx, 0)", hugemax); + dallocx(p, 0); + } + } #if LG_SIZEOF_PTR == 3 size = ZU(0x8000000000000000); From d36c7ebb004e73122c76276b854364f543458b8c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Sep 2015 16:53:18 -0700 Subject: [PATCH 4/6] Work around an NPTL-specific TSD issue. Work around a potentially bad thread-specific data initialization interaction with NPTL (glibc's pthreads implementation). This resolves #283. --- ChangeLog | 2 ++ src/tsd.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index a9929f82..b7381a58 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,8 @@ brevity. Much more detail can be found in the git revision history: when resizing from/to a size class that is not a multiple of the chunk size. - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). * 4.0.2 (September 21, 2015) diff --git a/src/tsd.c b/src/tsd.c index 2100833a..9ffe9afe 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -73,6 +73,9 @@ tsd_cleanup(void *arg) tsd_t *tsd = (tsd_t *)arg; switch (tsd->state) { + case tsd_state_uninitialized: + /* Do nothing. */ + break; case tsd_state_nominal: #define O(n, t) \ n##_cleanup(tsd); From 044047fae122d3e4a22d8d6748b598922b3c3ccc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Sep 2015 19:52:28 -0700 Subject: [PATCH 5/6] Remove fragile xallocx() test case. In addition to depending on map coalescing, the test depended on munmap() being disabled so that chunk recycling would always succeed. --- test/integration/xallocx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c index f69d48d7..37362521 100644 --- a/test/integration/xallocx.c +++ b/test/integration/xallocx.c @@ -444,20 +444,11 @@ TEST_END TEST_BEGIN(test_zero_huge) { size_t huge0, huge1; - static const bool maps_coalesce = -#ifdef JEMALLOC_MAPS_COALESCE - true -#else - false -#endif - ; /* Get size classes. */ huge0 = get_huge_size(0); huge1 = get_huge_size(1); - if (maps_coalesce) - test_zero(huge0, huge0 * 4); test_zero(huge1, huge0 * 2); } TEST_END From 02709688e09325026be402b63400f88e587293d7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 24 Sep 2015 20:05:26 -0700 Subject: [PATCH 6/6] Update ChangeLog for 4.0.3. --- ChangeLog | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index b7381a58..e3b0a519 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,9 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc -* 4.0.3 (XXX) +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. Bug fixes: - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large