From 66cd953514a18477eb49732e40d5c2ab5f1b12c5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 22 Apr 2016 14:34:14 -0700 Subject: [PATCH] Do not allocate metadata via non-auto arenas, nor tcaches. This assures that all internally allocated metadata come from the first opt_narenas arenas, i.e. the automatically multiplexed arenas. --- include/jemalloc/internal/arena.h | 28 +++-- include/jemalloc/internal/huge.h | 7 +- .../jemalloc/internal/jemalloc_internal.h.in | 21 +++- include/jemalloc/internal/private_symbols.txt | 2 + include/jemalloc/internal/tcache.h | 10 +- include/jemalloc/internal/tsd.h | 2 + src/arena.c | 29 +++-- src/ckh.c | 16 +-- src/ctl.c | 2 +- src/huge.c | 28 ++--- src/jemalloc.c | 112 ++++++++++++------ src/prof.c | 42 +++---- src/quarantine.c | 8 +- src/tcache.c | 20 ++-- 14 files changed, 192 insertions(+), 135 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2130e9a0..103a4c91 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -290,10 +290,18 @@ struct arena_s { unsigned ind; /* - * Number of threads currently assigned to this arena. This field is - * synchronized via atomic operations. + * Number of threads currently assigned to this arena, synchronized via + * atomic operations. Each thread has two distinct assignments, one for + * application-serving allocation, and the other for internal metadata + * allocation. Internal metadata must not be allocated from arenas + * created via the arenas.extend mallctl, because the arena..reset + * mallctl indiscriminately discards all allocations for the affected + * arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. */ - unsigned nthreads; + unsigned nthreads[2]; /* * There are three classes of arena operations from a locking @@ -541,7 +549,7 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero); void *arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache); + bool zero); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size); @@ -583,9 +591,9 @@ void arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); -unsigned arena_nthreads_get(arena_t *arena); -void arena_nthreads_inc(arena_t *arena); -void arena_nthreads_dec(arena_t *arena); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); arena_t *arena_new(tsd_t *tsd, unsigned ind); bool arena_boot(void); void arena_prefork(tsd_t *tsd, arena_t *arena); @@ -1320,7 +1328,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, assert(size > tcache_maxclass); } - return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache)); + return (arena_malloc_hard(tsd, arena, size, ind, zero)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1426,7 +1434,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) } } } else - huge_dalloc(tsd, ptr, tcache); + huge_dalloc(tsd, ptr); } JEMALLOC_ALWAYS_INLINE void @@ -1477,7 +1485,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) } } } else - huge_dalloc(tsd, ptr, tcache); + huge_dalloc(tsd, ptr); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index f19d3368..9de2055d 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,10 +9,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, - tcache_t *tcache); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, tcache_t *tcache); + bool zero); bool huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, @@ -21,7 +20,7 @@ void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void huge_dalloc(tsd_t *tsd, void *ptr); arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(tsd_t *tsd, const void *ptr); prof_tctx_t *huge_prof_tctx_get(tsd_t *tsd, const void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ddceabca..fe58c1c6 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -443,6 +443,9 @@ extern bool in_valgrind; /* Number of CPUs. */ extern unsigned ncpus; +/* Number of arenas used for automatic multiplexing of threads and arenas. */ +extern unsigned narenas_auto; + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -469,10 +472,11 @@ void bootstrap_free(void *ptr); unsigned narenas_total_get(void); arena_t *arena_init(tsd_t *tsd, unsigned ind); arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); -arena_t *arena_choose_hard(tsd_t *tsd); +arena_t *arena_choose_hard(tsd_t *tsd, bool internal); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); +void iarena_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); void arenas_tdata_cleanup(tsd_t *tsd); void narenas_tdata_cleanup(tsd_t *tsd); @@ -546,7 +550,7 @@ size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena, bool internal); arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing); @@ -784,15 +788,16 @@ sa2u(size_t size, size_t alignment) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) +arena_choose(tsd_t *tsd, arena_t *arena, bool internal) { arena_t *ret; if (arena != NULL) return (arena); - if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) - ret = arena_choose_hard(tsd); + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) + ret = arena_choose_hard(tsd, internal); return (ret); } @@ -935,6 +940,8 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, void *ret; assert(size != 0); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_metadata && likely(ret != NULL)) { @@ -982,6 +989,8 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, assert(usize != 0); assert(usize == sa2u(usize, alignment)); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || arena == NULL || arena->ind < narenas_auto); ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -1052,6 +1061,8 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, { assert(ptr != NULL); + assert(!is_metadata || tcache == NULL); + assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto); if (config_stats && is_metadata) { arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr, config_prof)); diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index c8799cba..eacc7c62 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -286,6 +286,7 @@ huge_ralloc_no_move huge_salloc iaalloc iallocztm +iarena_cleanup icalloc icalloct idalloc @@ -342,6 +343,7 @@ malloc_write map_bias map_misc_offset mb_write +narenas_auto narenas_tdata_cleanup narenas_total_get ncpus diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 1aa64631..82724304 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -293,7 +293,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { bool tcache_hard_success; - arena = arena_choose(tsd, arena); + arena = arena_choose(tsd, arena, false); if (unlikely(arena == NULL)) return (NULL); @@ -354,7 +354,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - arena = arena_choose(tsd, arena); + arena = arena_choose(tsd, arena, false); if (unlikely(arena == NULL)) return (NULL); @@ -459,8 +459,10 @@ JEMALLOC_ALWAYS_INLINE tcache_t * tcaches_get(tsd_t *tsd, unsigned ind) { tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) - elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL, + false)); + } return (elm->tcache); } #endif diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index b23b3b4c..1a1b5c32 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -536,6 +536,7 @@ struct tsd_init_head_s { O(thread_allocated, uint64_t) \ O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ + O(iarena, arena_t *) \ O(arena, arena_t *) \ O(arenas_tdata, arena_tdata_t *) \ O(narenas_tdata, unsigned) \ @@ -552,6 +553,7 @@ struct tsd_init_head_s { NULL, \ NULL, \ NULL, \ + NULL, \ 0, \ false, \ tcache_enabled_default, \ diff --git a/src/arena.c b/src/arena.c index 15023cf9..0da832e2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2478,10 +2478,10 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero) void * arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache) + bool zero) { - arena = arena_choose(tsd, arena); + arena = arena_choose(tsd, arena, false); if (unlikely(arena == NULL)) return (NULL); @@ -2489,7 +2489,7 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, return (arena_malloc_small(tsd, arena, ind, zero)); if (likely(size <= large_maxclass)) return (arena_malloc_large(tsd, arena, ind, zero)); - return (huge_malloc(tsd, arena, index2size(ind), zero, tcache)); + return (huge_malloc(tsd, arena, index2size(ind), zero)); } /* Only handles large allocations that require more than page alignment. */ @@ -2506,7 +2506,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, assert(usize == PAGE_CEILING(usize)); - arena = arena_choose(tsd, arena); + arena = arena_choose(tsd, arena, false); if (unlikely(arena == NULL)) return (NULL); @@ -2606,10 +2606,9 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, ret = arena_palloc_large(tsd, arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, tcache); + ret = huge_malloc(tsd, arena, usize, zero); else { - ret = huge_palloc(tsd, arena, usize, alignment, zero, - tcache); + ret = huge_palloc(tsd, arena, usize, alignment, zero); } } return (ret); @@ -3211,7 +3210,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, size_t *nactive, size_t *ndirty) { - *nthreads += arena_nthreads_get(arena); + *nthreads += arena_nthreads_get(arena, false); *dss = dss_prec_names[arena->dss_prec]; *lg_dirty_mult = arena->lg_dirty_mult; *decay_time = arena->decay_time; @@ -3294,24 +3293,24 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads, } unsigned -arena_nthreads_get(arena_t *arena) +arena_nthreads_get(arena_t *arena, bool internal) { - return (atomic_read_u(&arena->nthreads)); + return (atomic_read_u(&arena->nthreads[internal])); } void -arena_nthreads_inc(arena_t *arena) +arena_nthreads_inc(arena_t *arena, bool internal) { - atomic_add_u(&arena->nthreads, 1); + atomic_add_u(&arena->nthreads[internal], 1); } void -arena_nthreads_dec(arena_t *arena) +arena_nthreads_dec(arena_t *arena, bool internal) { - atomic_sub_u(&arena->nthreads, 1); + atomic_sub_u(&arena->nthreads[internal], 1); } arena_t * @@ -3338,7 +3337,7 @@ arena_new(tsd_t *tsd, unsigned ind) return (NULL); arena->ind = ind; - arena->nthreads = 0; + arena->nthreads[0] = arena->nthreads[1] = 0; if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA)) return (NULL); diff --git a/src/ckh.c b/src/ckh.c index 07b49dd2..aa9803e8 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -271,7 +271,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) goto label_return; } tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, - true, NULL); + true, arena_choose(tsd, NULL, true)); if (tab == NULL) { ret = true; goto label_return; @@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true, true); + idalloctm(tsd, tab, NULL, true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); + idalloctm(tsd, ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -315,7 +315,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, - NULL); + arena_choose(tsd, NULL, true)); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true, true); + idalloctm(tsd, tab, NULL, true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); + idalloctm(tsd, ckh->tab, NULL, true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, goto label_return; } ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, - NULL); + arena_choose(tsd, NULL, true)); if (ckh->tab == NULL) { ret = true; goto label_return; @@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); + idalloctm(tsd, ckh->tab, NULL, true, true); if (config_debug) memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t)); } diff --git a/src/ctl.c b/src/ctl.c index 50faee7b..fad2fdd7 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1304,7 +1304,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, arena_t *oldarena; unsigned newind, oldind; - oldarena = arena_choose(tsd, NULL); + oldarena = arena_choose(tsd, NULL, false); if (oldarena == NULL) return (EAGAIN); diff --git a/src/huge.c b/src/huge.c index 3a802dee..bac2425f 100644 --- a/src/huge.c +++ b/src/huge.c @@ -31,18 +31,17 @@ huge_node_unset(const void *ptr, const extent_node_t *node) } void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, - tcache_t *tcache) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero) { assert(usize == s2u(usize)); - return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); + return (huge_palloc(tsd, arena, usize, chunksize, zero)); } void * huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, tcache_t *tcache) + bool zero) { void *ret; size_t ausize; @@ -58,7 +57,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Allocate an extent node with which to track the chunk. */ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, tcache, true, arena); + CACHELINE, false, NULL, true, arena_choose(tsd, NULL, true)); if (node == NULL) return (NULL); @@ -67,10 +66,10 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - arena = arena_choose(tsd, arena); + arena = arena_choose(tsd, arena, false); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena, usize, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, tcache, true, true); + idalloctm(tsd, node, NULL, true, true); return (NULL); } @@ -78,7 +77,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (huge_node_set(tsd, ret, node)) { arena_chunk_dalloc_huge(tsd, arena, ret, usize); - idalloctm(tsd, node, tcache, true, true); + idalloctm(tsd, node, NULL, true, true); return (NULL); } @@ -331,12 +330,12 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, static void * huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache) + size_t alignment, bool zero) { if (alignment <= chunksize) - return (huge_malloc(tsd, arena, usize, zero, tcache)); - return (huge_palloc(tsd, arena, usize, alignment, zero, tcache)); + return (huge_malloc(tsd, arena, usize, zero)); + return (huge_palloc(tsd, arena, usize, alignment, zero)); } void * @@ -358,8 +357,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, * different size class. In that case, fall back to allocating new * space and copying. */ - ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero, - tcache); + ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero); if (ret == NULL) return (NULL); @@ -370,7 +368,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, } void -huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +huge_dalloc(tsd_t *tsd, void *ptr) { extent_node_t *node; arena_t *arena; @@ -386,7 +384,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) extent_node_size_get(node)); arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); - idalloctm(tsd, node, tcache, true, true); + idalloctm(tsd, node, NULL, true, true); arena_decay_tick(tsd, arena); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 7543dff1..3bd39c3c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -60,7 +60,7 @@ static malloc_mutex_t arenas_lock; arena_t **arenas; static unsigned narenas_total; /* Use narenas_total_*(). */ static arena_t *a0; /* arenas[0]; read-only after initialization. */ -static unsigned narenas_auto; /* Read-only after initialization. */ +unsigned narenas_auto; /* Read-only after initialization. */ typedef enum { malloc_init_uninitialized = 3, @@ -318,8 +318,8 @@ a0ialloc(size_t size, bool zero, bool is_metadata) if (unlikely(malloc_init_a0())) return (NULL); - return (iallocztm(NULL, size, size2index(size), zero, false, - is_metadata, arena_get(NULL, 0, false), true)); + return (iallocztm(NULL, size, size2index(size), zero, NULL, + is_metadata, arena_get(NULL, 0, true), true)); } static void @@ -451,15 +451,19 @@ arena_init(tsd_t *tsd, unsigned ind) } static void -arena_bind(tsd_t *tsd, unsigned ind) +arena_bind(tsd_t *tsd, unsigned ind, bool internal) { arena_t *arena; arena = arena_get(tsd, ind, false); - arena_nthreads_inc(arena); + arena_nthreads_inc(arena, internal); - if (tsd_nominal(tsd)) - tsd_arena_set(tsd, arena); + if (tsd_nominal(tsd)) { + if (internal) + tsd_iarena_set(tsd, arena); + else + tsd_arena_set(tsd, arena); + } } void @@ -469,19 +473,22 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) oldarena = arena_get(tsd, oldind, false); newarena = arena_get(tsd, newind, false); - arena_nthreads_dec(oldarena); - arena_nthreads_inc(newarena); + arena_nthreads_dec(oldarena, false); + arena_nthreads_inc(newarena, false); tsd_arena_set(tsd, newarena); } static void -arena_unbind(tsd_t *tsd, unsigned ind) +arena_unbind(tsd_t *tsd, unsigned ind, bool internal) { arena_t *arena; arena = arena_get(tsd, ind, false); - arena_nthreads_dec(arena); - tsd_arena_set(tsd, NULL); + arena_nthreads_dec(arena, internal); + if (internal) + tsd_iarena_set(tsd, NULL); + else + tsd_arena_set(tsd, NULL); } arena_tdata_t * @@ -562,14 +569,24 @@ label_return: /* Slow path, called only by arena_choose(). */ arena_t * -arena_choose_hard(tsd_t *tsd) +arena_choose_hard(tsd_t *tsd, bool internal) { - arena_t *ret; + arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL); if (narenas_auto > 1) { - unsigned i, choose, first_null; + unsigned i, j, choose[2], first_null; + + /* + * Determine binding for both non-internal and internal + * allocation. + * + * choose[0]: For application allocation. + * choose[1]: For internal metadata allocation. + */ + + for (j = 0; j < 2; j++) + choose[j] = 0; - choose = 0; first_null = narenas_auto; malloc_mutex_lock(tsd, &arenas_lock); assert(arena_get(tsd, 0, false) != NULL); @@ -579,10 +596,13 @@ arena_choose_hard(tsd_t *tsd) * Choose the first arena that has the lowest * number of threads assigned to it. */ - if (arena_nthreads_get(arena_get(tsd, i, false)) - < arena_nthreads_get(arena_get(tsd, choose, - false))) - choose = i; + for (j = 0; j < 2; j++) { + if (arena_nthreads_get(arena_get(tsd, i, + false), !!j) < + arena_nthreads_get(arena_get(tsd, + choose[j], false), !!j)) + choose[j] = i; + } } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized @@ -597,27 +617,35 @@ arena_choose_hard(tsd_t *tsd) } } - if (arena_nthreads_get(arena_get(tsd, choose, false)) == 0 - || first_null == narenas_auto) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arena_get(tsd, choose, false); - } else { - /* Initialize a new arena. */ - choose = first_null; - ret = arena_init_locked(tsd, choose); - if (ret == NULL) { - malloc_mutex_unlock(tsd, &arenas_lock); - return (NULL); + for (j = 0; j < 2; j++) { + if (arena_nthreads_get(arena_get(tsd, choose[j], false), + !!j) == 0 || first_null != narenas_auto) { + /* + * Use an unloaded arena, or the least loaded + * arena if all arenas are already initialized. + */ + if (!!j == internal) + ret = arena_get(tsd, choose[j], false); + } else { + arena_t *arena; + + /* Initialize a new arena. */ + choose[j] = first_null; + arena = arena_init_locked(tsd, choose[j]); + if (arena == NULL) { + malloc_mutex_unlock(tsd, &arenas_lock); + return (NULL); + } + if (!!j == internal) + ret = arena; } + arena_bind(tsd, choose[j], !!j); } - arena_bind(tsd, choose); malloc_mutex_unlock(tsd, &arenas_lock); } else { ret = arena_get(tsd, 0, false); - arena_bind(tsd, 0); + arena_bind(tsd, 0, false); + arena_bind(tsd, 0, true); } return (ret); @@ -637,6 +665,16 @@ thread_deallocated_cleanup(tsd_t *tsd) /* Do nothing. */ } +void +iarena_cleanup(tsd_t *tsd) +{ + arena_t *iarena; + + iarena = tsd_iarena_get(tsd); + if (iarena != NULL) + arena_unbind(tsd, iarena->ind, true); +} + void arena_cleanup(tsd_t *tsd) { @@ -644,7 +682,7 @@ arena_cleanup(tsd_t *tsd) arena = tsd_arena_get(tsd); if (arena != NULL) - arena_unbind(tsd, arena->ind); + arena_unbind(tsd, arena->ind, false); } void diff --git a/src/prof.c b/src/prof.c index 520bf90a..82604632 100644 --- a/src/prof.c +++ b/src/prof.c @@ -554,7 +554,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) */ size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size, - size2index(size), false, tcache_get(tsd, true), true, NULL, true); + size2index(size), false, NULL, true, arena_get(NULL, 0, true), + true); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -595,7 +596,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(tsd, gctx->lock); - idalloctm(tsd, gctx, tcache_get(tsd, false), true, true); + idalloctm(tsd, gctx, NULL, true, true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -706,7 +707,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, tcache_get(tsd, false), true, true); + idalloctm(tsd, tctx, NULL, true, true); } static bool @@ -735,8 +736,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, tcache_get(tsd, false), true, - true); + idalloctm(tsd, gctx.v, NULL, true, true); return (true); } new_gctx = true; @@ -780,7 +780,6 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->prepared = true; malloc_mutex_unlock(tsd, tdata->lock); if (not_found) { - tcache_t *tcache; void *btkey; prof_gctx_t *gctx; bool new_gctx, error; @@ -794,10 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - tcache = tcache_get(tsd, true); ret.v = iallocztm(tsd, sizeof(prof_tctx_t), - size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL, - true); + size2index(sizeof(prof_tctx_t)), false, NULL, true, + arena_choose(tsd, NULL, true), true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -817,7 +815,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, tcache, true, true); + idalloctm(tsd, ret.v, NULL, true, true); return (NULL); } malloc_mutex_lock(tsd, gctx->lock); @@ -1238,8 +1236,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloctm(tsd, to_destroy, - tcache_get(tsd, false), true, true); + idalloctm(tsd, to_destroy, NULL, true, + true); } else next = NULL; } while (next != NULL); @@ -1771,14 +1769,13 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; - tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ - tcache = tcache_get(tsd, true); tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), - size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true); + size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(NULL, + 0, true), true); if (tdata == NULL) return (NULL); @@ -1792,7 +1789,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, tcache, true, true); + idalloctm(tsd, tdata, NULL, true, true); return (NULL); } @@ -1848,7 +1845,6 @@ static void prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { - tcache_t *tcache; malloc_mutex_assert_owner(tsd, &tdatas_mtx); @@ -1859,11 +1855,10 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, assert(prof_tdata_should_destroy_unlocked(tsd, tdata, even_if_attached)); - tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, tcache, true, true); + idalloctm(tsd, tdata->thread_name, NULL, true, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, tcache, true, true); + idalloctm(tsd, tdata, NULL, true, true); } static void @@ -2023,8 +2018,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd, - true), true, NULL, true); + ret = iallocztm(tsd, size, size2index(size), false, NULL, true, + arena_get(NULL, 0, true), true); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -2056,8 +2051,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), - true, true); + idalloctm(tsd, tdata->thread_name, NULL, true, true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 6cb74b37..ff1637ec 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -30,7 +30,7 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)); quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size), - false, tcache_get(tsd, true), true, NULL, true); + false, NULL, true, arena_get(NULL, 0, true), true); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -57,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); + idalloctm(tsd, quarantine, NULL, true, true); } static quarantine_t * @@ -89,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); + idalloctm(tsd, quarantine, NULL, true, true); tsd_quarantine_set(tsd, ret); return (ret); @@ -179,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); + idalloctm(tsd, quarantine, NULL, true, true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/tcache.c b/src/tcache.c index a9539f64..ca867c72 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,7 +97,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, assert(binind < NBINS); assert(rem <= tbin->ncached); - arena = arena_choose(tsd, NULL); + arena = arena_choose(tsd, NULL, false); assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ @@ -179,7 +179,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, assert(binind < nhbins); assert(rem <= tbin->ncached); - arena = arena_choose(tsd, NULL); + arena = arena_choose(tsd, NULL, false); assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ @@ -307,7 +307,7 @@ tcache_get_hard(tsd_t *tsd) tcache_enabled_set(false); /* Memoize. */ return (NULL); } - arena = arena_choose(tsd, NULL); + arena = arena_choose(tsd, NULL, false); if (unlikely(arena == NULL)) return (NULL); return (tcache_create(tsd, arena)); @@ -328,8 +328,8 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, - arena_get(tsd, 0, false)); + tcache = ipallocztm(tsd, size, CACHELINE, true, NULL, true, + arena_get(NULL, 0, true)); if (tcache == NULL) return (NULL); @@ -359,7 +359,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_t *arena; unsigned i; - arena = arena_choose(tsd, NULL); + arena = arena_choose(tsd, NULL, false); tcache_arena_dissociate(tsd, tcache, arena); for (i = 0; i < NBINS; i++) { @@ -391,7 +391,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(tsd, arena, tcache->prof_accumbytes)) prof_idump(tsd); - idalloctm(tsd, tcache, false, true, true); + idalloctm(tsd, tcache, NULL, true, true); } void @@ -446,6 +446,7 @@ tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena) bool tcaches_create(tsd_t *tsd, unsigned *r_ind) { + arena_t *arena; tcache_t *tcache; tcaches_t *elm; @@ -458,7 +459,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - tcache = tcache_create(tsd, arena_get(tsd, 0, false)); + arena = arena_choose(tsd, NULL, true); + if (unlikely(arena == NULL)) + return (true); + tcache = tcache_create(tsd, arena); if (tcache == NULL) return (true);