From f22214a29ddd3bed005cbcc8f2aff7c61ef4940b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 6 Oct 2014 03:42:10 -0400 Subject: [PATCH] Use regular arena allocation for huge tree nodes. This avoids grabbing the base mutex, as a step towards fine-grained locking for huge allocations. The thread cache also provides a tiny (~3%) improvement for serial huge allocations. --- include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 4 +-- src/huge.c | 9 ++++--- src/jemalloc.c | 2 +- test/unit/junk.c | 27 ++++++++++++++----- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 939993f2..5d4d3a16 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -21,7 +21,7 @@ void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(void *ptr); +void huge_dalloc(tsd_t *tsd, void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c7a5fd8a..f4d5de6a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -938,7 +938,7 @@ idalloct(tsd_t *tsd, void *ptr, bool try_tcache) if (chunk != ptr) arena_dalloc(tsd, chunk, ptr, try_tcache); else - huge_dalloc(ptr); + huge_dalloc(tsd, ptr); } JEMALLOC_ALWAYS_INLINE void @@ -952,7 +952,7 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) if (chunk != ptr) arena_sdalloc(tsd, chunk, ptr, size, try_tcache); else - huge_dalloc(ptr); + huge_dalloc(tsd, ptr); } JEMALLOC_ALWAYS_INLINE void diff --git a/src/huge.c b/src/huge.c index 1376729a..541df60a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -41,7 +41,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, assert(csize >= usize); /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); + node = ipalloct(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), + CACHELINE, false, tsd != NULL, NULL); if (node == NULL) return (NULL); @@ -57,7 +58,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, } ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { - base_node_dalloc(node); + idalloct(tsd, node, tsd != NULL); return (NULL); } @@ -311,7 +312,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, } void -huge_dalloc(void *ptr) +huge_dalloc(tsd_t *tsd, void *ptr) { extent_node_t *node, key; @@ -329,7 +330,7 @@ huge_dalloc(void *ptr) huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, CHUNK_CEILING(node->size)); - base_node_dalloc(node); + idalloct(tsd, node, tsd != NULL); } size_t diff --git a/src/jemalloc.c b/src/jemalloc.c index 3c889e8a..38b5aaf7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -240,7 +240,7 @@ a0free(void *ptr) if (chunk != ptr) arena_dalloc(NULL, chunk, ptr, false); else - huge_dalloc(ptr); + huge_dalloc(NULL, ptr); } /* Create a new arena and insert it into the arenas array at index ind. */ diff --git a/test/unit/junk.c b/test/unit/junk.c index 5b35a879..1522a610 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -8,7 +8,16 @@ const char *malloc_conf = static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig; static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig; static huge_dalloc_junk_t *huge_dalloc_junk_orig; -static void *most_recently_junked; +static void *watch_for_junking; +static bool saw_junking; + +static void +watch_junking(void *p) +{ + + watch_for_junking = p; + saw_junking = false; +} static void arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) @@ -21,7 +30,8 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info) "Missing junk fill for byte %zu/%zu of deallocated region", i, bin_info->reg_size); } - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -35,7 +45,8 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize) "Missing junk fill for byte %zu/%zu of deallocated region", i, usize); } - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -48,7 +59,8 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize) * enough that it doesn't make sense to duplicate the decision logic in * test code, so don't actually check that the region is junk-filled. */ - most_recently_junked = ptr; + if (ptr == watch_for_junking) + saw_junking = true; } static void @@ -87,18 +99,19 @@ test_junk(size_t sz_min, size_t sz_max) } if (xallocx(s, sz+1, 0, 0) == sz) { - void *junked = (void *)s; + watch_junking(s); s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); - assert_ptr_eq(most_recently_junked, junked, + assert_true(saw_junking, "Expected region of size %zu to be junk-filled", sz); } } + watch_junking(s); dallocx(s, 0); - assert_ptr_eq(most_recently_junked, (void *)s, + assert_true(saw_junking, "Expected region of size %zu to be junk-filled", sz); arena_dalloc_junk_small = arena_dalloc_junk_small_orig;