From 1cb181ed632e7573fb4eab194e4d216867222d27 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 29 Jan 2015 15:30:47 -0800 Subject: [PATCH] Implement explicit tcache support. Add the MALLOCX_TCACHE() and MALLOCX_TCACHE_NONE macros, which can be used in conjunction with the *allocx() API. Add the tcache.create, tcache.flush, and tcache.destroy mallctls. This resolves #145. --- doc/jemalloc.xml.in | 106 ++++++++--- include/jemalloc/internal/arena.h | 51 +++--- include/jemalloc/internal/huge.h | 8 +- .../jemalloc/internal/jemalloc_internal.h.in | 134 +++++++------- include/jemalloc/internal/private_symbols.txt | 5 + include/jemalloc/internal/tcache.h | 102 +++++++---- include/jemalloc/jemalloc_macros.h.in | 12 +- src/arena.c | 24 +-- src/ckh.c | 7 +- src/ctl.c | 113 +++++++++++- src/huge.c | 36 ++-- src/jemalloc.c | 158 ++++++++--------- src/prof.c | 35 ++-- src/quarantine.c | 10 +- src/tcache.c | 166 +++++++++++++----- test/unit/mallctl.c | 110 ++++++++++++ 16 files changed, 740 insertions(+), 337 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 739b33ac..da800ded 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -242,7 +242,7 @@ relevant. Use bitwise or (|) operations to specify one or more of the following: - + MALLOCX_LG_ALIGN(la) @@ -252,7 +252,7 @@ that la is within the valid range. - + MALLOCX_ALIGN(a) @@ -262,7 +262,7 @@ validate that a is a power of 2. - + MALLOCX_ZERO Initialize newly allocated memory to contain zero @@ -271,16 +271,38 @@ that are initialized to contain zero bytes. If this macro is absent, newly allocated memory is uninitialized. - + + MALLOCX_TCACHE(tc) + + + Use the thread-specific cache (tcache) specified by + the identifier tc, which must have been + acquired via the tcache.create + mallctl. This macro does not validate that + tc specifies a valid + identifier. + + + MALLOCX_TCACHE_NONE + + Do not use a thread-specific cache (tcache). Unless + MALLOCX_TCACHE(tc) or + MALLOCX_TCACHE_NONE is specified, an + automatically managed tcache will be used under many circumstances. + This macro cannot be used in the same flags + argument as + MALLOCX_TCACHE(tc). + + MALLOCX_ARENA(a) Use the arena specified by the index - a (and by necessity bypass the thread - cache). This macro has no effect for regions that were allocated - via an arena other than the one specified. This macro does not - validate that a specifies an arena index in - the valid range. + a. This macro has no effect for regions that + were allocated via an arena other than the one specified. This + macro does not validate that a specifies an + arena index in the valid range. @@ -1060,12 +1082,11 @@ malloc_conf = "xmalloc:true";]]> r- [] - Thread-specific caching enabled/disabled. When there - are multiple threads, each thread uses a thread-specific cache for - objects up to a certain size. Thread-specific caching allows many - allocations to be satisfied without performing any thread - synchronization, at the cost of increased memory use. See the - Thread-specific caching (tcache) enabled/disabled. When + there are multiple threads, each thread uses a tcache for objects up to + a certain size. Thread-specific caching allows many allocations to be + satisfied without performing any thread synchronization, at the cost of + increased memory use. See the opt.lg_tcache_max option for related tuning information. This option is enabled by default unless running inside [] Maximum size class (log base 2) to cache in the - thread-specific cache. At a minimum, all small size classes are - cached, and at a maximum all large size classes are cached. The + thread-specific cache (tcache). At a minimum, all small size classes + are cached, and at a maximum all large size classes are cached. The default maximum is 32 KiB (2^15). @@ -1339,7 +1360,7 @@ malloc_conf = "xmalloc:true";]]> Enable/disable calling thread's tcache. The tcache is implicitly flushed as a side effect of becoming disabled (see thread.tcache.flush). + linkend="thread.tcache.flush">thread.tcache.flush). @@ -1350,9 +1371,9 @@ malloc_conf = "xmalloc:true";]]> -- [] - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface + Flush calling thread's thread-specific cache (tcache). + This interface releases all cached objects and internal data structures + associated with the calling thread's tcache. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits. However, garbage collection is triggered by allocation @@ -1399,6 +1420,49 @@ malloc_conf = "xmalloc:true";]]> default. + + + tcache.create + (unsigned) + r- + [] + + Create an explicit thread-specific cache (tcache) and + return an identifier that can be passed to the MALLOCX_TCACHE(tc) + macro to explicitly use the specified cache rather than the + automatically managed one that is used by default. Each explicit cache + can be used by only one thread at a time; the application must assure + that this constraint holds. + + + + + + tcache.flush + (unsigned) + -w + [] + + Flush the specified thread-specific cache (tcache). The + same considerations apply to this interface as to thread.tcache.flush, + except that the tcache will never be automatically be discarded. + + + + + + tcache.destroy + (unsigned) + -w + [] + + Flush the specified thread-specific cache (tcache) and + make the identifier available for use during a future tcache creation. + + + arena.<i>.purge diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 46367f68..5476899d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -272,7 +272,8 @@ struct arena_s { arena_stats_t stats; /* * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. */ ql_head(tcache_t) tcache_ql; @@ -387,8 +388,7 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc); + size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, @@ -450,13 +450,13 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache); + tcache_t *tcache); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, - bool try_tcache); + tcache_t *tcache); void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - bool try_tcache); + tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -943,17 +943,15 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx) JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache) + tcache_t *tcache) { - tcache_t *tcache; assert(size != 0); assert(size <= arena_maxclass); if (likely(size <= SMALL_MAXCLASS)) { - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - true)) != NULL)) - return (tcache_alloc_small(tcache, size, zero)); + if (likely(tcache != NULL)) + return (tcache_alloc_small(tsd, tcache, size, zero)); else { arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) @@ -965,9 +963,8 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(tsd, true)) != NULL)) - return (tcache_alloc_large(tcache, size, zero)); + if (likely(tcache != NULL) && size <= tcache_maxclass) + return (tcache_alloc_large(tsd, tcache, size, zero)); else { arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) @@ -1027,10 +1024,9 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, tcache_t *tcache) { size_t pageind, mapbits; - tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1040,11 +1036,10 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(arena_mapbits_allocated_get(chunk, pageind) != 0); if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) { + if (likely(tcache != NULL)) { index_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else arena_dalloc_small(chunk->arena, chunk, ptr, pageind); } else { @@ -1052,9 +1047,8 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (try_tcache && size <= tcache_maxclass && likely((tcache = - tcache_get(tsd, false)) != NULL)) - tcache_dalloc_large(tcache, ptr, size); + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); else arena_dalloc_large(chunk->arena, chunk, ptr); } @@ -1062,9 +1056,8 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) JEMALLOC_ALWAYS_INLINE void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, - bool try_tcache) + tcache_t *tcache) { - tcache_t *tcache; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1082,10 +1075,9 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, if (likely(size <= SMALL_MAXCLASS)) { /* Small allocation. */ - if (likely(try_tcache) && likely((tcache = tcache_get(tsd, - false)) != NULL)) { + if (likely(tcache != NULL)) { index_t binind = size2index(size); - tcache_dalloc_small(tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1094,9 +1086,8 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(tsd, false)) != NULL) - tcache_dalloc_large(tcache, ptr, size); + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); else arena_dalloc_large(chunk->arena, chunk, ptr); } diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index decb0249..231cc368 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -10,19 +10,19 @@ #ifdef JEMALLOC_H_EXTERNS void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - bool try_tcache); + tcache_t *tcache); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool try_tcache); + bool zero, tcache_t *tcache); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc); + tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache); +void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 2b167420..b8c994cb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -172,7 +172,21 @@ static const bool config_ivsalloc = /* Size class index type. */ typedef unsigned index_t; -#define MALLOCX_ARENA_MASK ((int)~0xff) +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_MASK ((int)~0xfffff) +#define MALLOCX_ARENA_MAX 0xffe +#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) +#define MALLOCX_TCACHE_MAX 0xffd #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ #define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ @@ -181,8 +195,11 @@ typedef unsigned index_t; (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) #define MALLOCX_ZERO_GET(flags) \ ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) #define MALLOCX_ARENA_GET(flags) \ - (((unsigned)(flags >> 8)) - 1) + (((unsigned)(((unsigned)flags) >> 20)) - 1) /* Smallest size class to support. */ #define TINY_MIN (1U << LG_TINY_MIN) @@ -749,7 +766,7 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, * ind is invalid, cache is old (too small), or arena to be * initialized. */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, + return (refresh_if_missing ? arena_get_hard(tsd, ind, init_if_missing) : NULL); } arena = arenas_cache[ind]; @@ -778,32 +795,31 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, +void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena); +void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); void *icalloc(tsd_t *tsd, size_t size); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, bool is_metadata, arena_t *arena); + tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, arena_t *arena); + tcache_t *tcache, arena_t *arena); void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata); -void idalloct(tsd_t *tsd, void *ptr, bool try_tcache); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, bool try_tcache); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena); -void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, @@ -853,7 +869,7 @@ isalloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, +iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; @@ -861,9 +877,9 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, assert(size != 0); if (likely(size <= arena_maxclass)) - ret = arena_malloc(tsd, arena, size, zero, try_tcache); + ret = arena_malloc(tsd, arena, size, zero, tcache); else - ret = huge_malloc(tsd, arena, size, zero, try_tcache); + ret = huge_malloc(tsd, arena, size, zero, tcache); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -872,36 +888,36 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, bool try_tcache, bool is_metadata, } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, false, try_tcache, false, arena)); + return (iallocztm(tsd, size, false, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * imalloc(tsd_t *tsd, size_t size) { - return (iallocztm(tsd, size, false, true, false, NULL)); + return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, bool try_tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, true, try_tcache, false, arena)); + return (iallocztm(tsd, size, true, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * icalloc(tsd_t *tsd, size_t size) { - return (iallocztm(tsd, size, true, true, false, NULL)); + return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, bool is_metadata, arena_t *arena) + tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; @@ -909,7 +925,7 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, assert(usize == sa2u(usize, alignment)); if (usize <= SMALL_MAXCLASS && alignment < PAGE) - ret = arena_malloc(tsd, arena, usize, zero, try_tcache); + ret = arena_malloc(tsd, arena, usize, zero, tcache); else { if (likely(usize <= arena_maxclass)) { arena = arena_choose(tsd, arena); @@ -917,10 +933,10 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, return (NULL); ret = arena_palloc(arena, usize, alignment, zero); } else if (likely(alignment <= chunksize)) - ret = huge_malloc(tsd, arena, usize, zero, try_tcache); + ret = huge_malloc(tsd, arena, usize, zero, tcache); else { ret = huge_palloc(tsd, arena, usize, alignment, zero, - try_tcache); + tcache); } } assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); @@ -932,19 +948,19 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, } JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { - return (ipallocztm(tsd, usize, alignment, zero, try_tcache, false, - arena)); + return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd, usize, alignment, zero, true, false, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, + NULL), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t @@ -981,7 +997,7 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { arena_chunk_t *chunk; @@ -993,37 +1009,37 @@ idalloctm(tsd_t *tsd, void *ptr, bool try_tcache, bool is_metadata) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) - arena_dalloc(tsd, chunk, ptr, try_tcache); + arena_dalloc(tsd, chunk, ptr, tcache); else - huge_dalloc(tsd, ptr, try_tcache); + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, bool try_tcache) +idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloctm(tsd, ptr, try_tcache, false); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, true, false); + idalloctm(tsd, ptr, tcache_get(tsd, false), false); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, bool try_tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, try_tcache, false); + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void -isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { arena_chunk_t *chunk; @@ -1031,25 +1047,24 @@ isdalloct(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (likely(chunk != ptr)) - arena_sdalloc(tsd, chunk, ptr, size, try_tcache); + arena_sdalloc(tsd, chunk, ptr, size, tcache); else - huge_dalloc(tsd, ptr, try_tcache); + huge_dalloc(tsd, ptr, tcache); } JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, bool try_tcache) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { if (config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - isdalloct(tsd, ptr, size, try_tcache); + isdalloct(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void * iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena) + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; size_t usize, copysize; @@ -1057,7 +1072,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); @@ -1065,8 +1080,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - p = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, - arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) return (NULL); } @@ -1076,13 +1090,13 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (p); } JEMALLOC_ALWAYS_INLINE void * iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) + bool zero, tcache_t *tcache, arena_t *arena) { assert(ptr != NULL); @@ -1095,15 +1109,15 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, * and copy. */ return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena)); + zero, tcache, arena)); } if (likely(size <= arena_maxclass)) { return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, try_tcache_alloc, try_tcache_dalloc)); + alignment, zero, tcache)); } else { return (huge_ralloc(tsd, arena, ptr, oldsize, size, 0, - alignment, zero, try_tcache_alloc, try_tcache_dalloc)); + alignment, zero, tcache)); } } @@ -1112,8 +1126,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero) { - return (iralloct(tsd, ptr, oldsize, size, alignment, zero, true, true, - NULL)); + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, + tcache_get(tsd, true), NULL)); } JEMALLOC_ALWAYS_INLINE bool diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 7a78f580..cf42bead 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -425,6 +425,11 @@ tcache_get_hard tcache_maxclass tcache_salloc tcache_stats_merge +tcaches +tcaches_create +tcaches_destroy +tcaches_flush +tcaches_get thread_allocated_cleanup thread_deallocated_cleanup tsd_booted diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 6e97b3dd..2a3952be 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; /* * tcache pointers close to NULL are used to encode state information that is @@ -70,7 +71,6 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ index_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ @@ -82,6 +82,14 @@ struct tcache_s { */ }; +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -95,27 +103,41 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern size_t nhbins; /* Maximum cached size class. */ -extern size_t tcache_maxclass; +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tcache_t *tcache); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - index_t binind); -void tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache); -void tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, + tcache_bin_t *tbin, index_t binind); +void tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena); -void tcache_arena_dissociate(tcache_t *tcache); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, + arena_t *newarena); +void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); tcache_t *tcache_get_hard(tsd_t *tsd); tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); void tcache_cleanup(tsd_t *tsd); void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ @@ -123,16 +145,21 @@ bool tcache_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tcache_t *tcache); +void tcache_event(tsd_t *tsd, tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +void *tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, + bool zero); +void *tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, + bool zero); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + index_t binind); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) @@ -202,7 +229,7 @@ tcache_get(tsd_t *tsd, bool create) } JEMALLOC_ALWAYS_INLINE void -tcache_event(tcache_t *tcache) +tcache_event(tsd_t *tsd, tcache_t *tcache) { if (TCACHE_GC_INCR == 0) @@ -211,7 +238,7 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) - tcache_event_hard(tcache); + tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * @@ -231,7 +258,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_small(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) { void *ret; index_t binind; @@ -244,7 +271,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); + ret = tcache_alloc_small_hard(tsd, tcache, tbin, binind); if (ret == NULL) return (NULL); } @@ -270,12 +297,12 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin->tstats.nrequests++; if (config_prof) tcache->prof_accumbytes += usize; - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_large(tsd_t *tsd, tcache_t *tcache, size_t size, bool zero) { void *ret; index_t binind; @@ -293,7 +320,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, usize, zero); + ret = arena_malloc_large(arena_choose(tsd, NULL), usize, zero); if (ret == NULL) return (NULL); } else { @@ -321,12 +348,12 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) tcache->prof_accumbytes += usize; } - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -339,18 +366,18 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + tcache_bin_flush_small(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { index_t binind; tcache_bin_t *tbin; @@ -368,14 +395,23 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + return (elm->tcache); } #endif diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 99f12611..7d1dcf4a 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -19,8 +19,16 @@ ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) # endif # define MALLOCX_ZERO ((int)0x40) -/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */ -# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8)) +/* + * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 + * encodes MALLOCX_TCACHE_NONE. + */ +# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +/* + * Bias arena index bits so that 0 encodes "use an automatically chosen arena". + */ +# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) diff --git a/src/arena.c b/src/arena.c index a5033bf8..907fbd7f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2182,8 +2182,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) + size_t extra, size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; @@ -2201,12 +2200,9 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, try_tcache_alloc, - arena); - } else { - ret = arena_malloc(tsd, arena, size + extra, zero, - try_tcache_alloc); - } + ret = ipalloct(tsd, usize, alignment, zero, tcache, arena); + } else + ret = arena_malloc(tsd, arena, size + extra, zero, tcache); if (ret == NULL) { if (extra == 0) @@ -2216,12 +2212,10 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloct(tsd, usize, alignment, zero, - try_tcache_alloc, arena); - } else { - ret = arena_malloc(tsd, arena, size, zero, - try_tcache_alloc); - } + ret = ipalloct(tsd, usize, alignment, zero, tcache, + arena); + } else + ret = arena_malloc(tsd, arena, size, zero, tcache); if (ret == NULL) return (NULL); @@ -2236,7 +2230,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, copysize = (size < oldsize) ? size : oldsize; JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } diff --git a/src/ckh.c b/src/ckh.c index db2ae392..ad075d60 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -270,7 +270,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ret = true; goto label_return; } - tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, + NULL); if (tab == NULL) { ret = true; goto label_return; @@ -313,7 +314,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; - tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); if (tab == NULL) { /* * An OOM error isn't worth propagating, since it doesn't @@ -389,7 +390,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ret = true; goto label_return; } - ckh->tab = (ckhc_t *)ipalloc(tsd, usize, CACHELINE, true); + ckh->tab = (ckhc_t *)ipalloct(tsd, usize, CACHELINE, true, NULL, NULL); if (ckh->tab == NULL) { ret = true; goto label_return; diff --git a/src/ctl.c b/src/ctl.c index 63a689a3..a2838032 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -110,6 +110,9 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(tcache_create) +CTL_PROTO(tcache_flush) +CTL_PROTO(tcache_destroy) CTL_PROTO(arena_i_purge) static void arena_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) @@ -275,6 +278,12 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t tcache_node[] = { + {NAME("create"), CTL(tcache_create)}, + {NAME("flush"), CTL(tcache_flush)}, + {NAME("destroy"), CTL(tcache_destroy)} +}; + static const ctl_named_node_t chunk_node[] = { {NAME("alloc"), CTL(arena_i_chunk_alloc)}, {NAME("dalloc"), CTL(arena_i_chunk_dalloc)} @@ -474,6 +483,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("tcache"), CHILD(named, tcache)}, {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, @@ -1281,19 +1291,21 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; tsd_t *tsd; - arena_t *arena; + arena_t *oldarena; unsigned newind, oldind; tsd = tsd_fetch(); - arena = arena_choose(tsd, NULL); - if (arena == NULL) + oldarena = arena_choose(tsd, NULL); + if (oldarena == NULL) return (EAGAIN); malloc_mutex_lock(&ctl_mtx); - newind = oldind = arena->ind; + newind = oldind = oldarena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { + arena_t *newarena; + if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; @@ -1301,8 +1313,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - arena = arena_get(tsd, newind, true, true); - if (arena == NULL) { + newarena = arena_get(tsd, newind, true, true); + if (newarena == NULL) { ret = EAGAIN; goto label_return; } @@ -1310,8 +1322,10 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, arena_migrate(tsd, oldind, newind); if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); - if (tcache != NULL) - tcache_arena_reassociate(tcache, arena); + if (tcache != NULL) { + tcache_arena_reassociate(tcache, oldarena, + newarena); + } } } @@ -1438,6 +1452,89 @@ label_return: /******************************************************************************/ +static int +tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (tcaches_create(tsd, &tcache_ind)) { + ret = EFAULT; + goto label_return; + } + READ(tcache_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_flush(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + +static int +tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + tsd_t *tsd; + unsigned tcache_ind; + + if (!config_tcache) + return (ENOENT); + + tsd = tsd_fetch(); + + WRITEONLY(); + tcache_ind = UINT_MAX; + WRITE(tcache_ind, unsigned); + if (tcache_ind == UINT_MAX) { + ret = EFAULT; + goto label_return; + } + tcaches_destroy(tsd, tcache_ind); + + ret = 0; +label_return: + return (ret); +} + +/******************************************************************************/ + /* ctl_mutex must be held during execution of this function. */ static void arena_purge(unsigned arena_ind) diff --git a/src/huge.c b/src/huge.c index 84a1ab23..db0ecd51 100644 --- a/src/huge.c +++ b/src/huge.c @@ -13,7 +13,8 @@ static malloc_mutex_t huge_mtx; static extent_tree_t huge; void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache) +huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { size_t usize; @@ -23,12 +24,12 @@ huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, bool try_tcache) return (NULL); } - return (huge_palloc(tsd, arena, usize, chunksize, zero, try_tcache)); + return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, - bool zero, bool try_tcache) + bool zero, tcache_t *tcache) { void *ret; extent_node_t *node; @@ -38,7 +39,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, /* Allocate an extent node with which to track the chunk. */ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), - CACHELINE, false, try_tcache, true, arena); + CACHELINE, false, tcache, true, arena); if (node == NULL) return (NULL); @@ -50,7 +51,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, usize, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, try_tcache, true); + idalloctm(tsd, node, tcache, true); return (NULL); } @@ -307,8 +308,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc) + size_t extra, size_t alignment, bool zero, tcache_t *tcache) { void *ret; size_t copysize; @@ -324,11 +324,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, */ if (alignment > chunksize) { ret = huge_palloc(tsd, arena, size + extra, alignment, zero, - try_tcache_alloc); - } else { - ret = huge_malloc(tsd, arena, size + extra, zero, - try_tcache_alloc); - } + tcache); + } else + ret = huge_malloc(tsd, arena, size + extra, zero, tcache); if (ret == NULL) { if (extra == 0) @@ -336,11 +334,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, /* Try again, this time without extra. */ if (alignment > chunksize) { ret = huge_palloc(tsd, arena, size, alignment, zero, - try_tcache_alloc); - } else { - ret = huge_malloc(tsd, arena, size, zero, - try_tcache_alloc); - } + tcache); + } else + ret = huge_malloc(tsd, arena, size, zero, tcache); if (ret == NULL) return (NULL); @@ -352,12 +348,12 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - isqalloc(tsd, ptr, oldsize, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (ret); } void -huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) +huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { extent_node_t *node; @@ -368,7 +364,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, bool try_tcache) huge_dalloc_junk(node->addr, node->size); arena_chunk_dalloc_huge(node->arena, node->addr, node->size); - idalloctm(tsd, node, try_tcache, true); + idalloctm(tsd, node, tcache, true); } arena_t * diff --git a/src/jemalloc.c b/src/jemalloc.c index d1fa674c..94477914 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -367,6 +367,8 @@ arena_init_locked(unsigned ind) /* Expand arenas if necessary. */ assert(ind <= narenas_total); + if (ind > MALLOCX_ARENA_MAX) + return (NULL); if (ind == narenas_total) { unsigned narenas_new = narenas_total + 1; arena_t **arenas_new = @@ -1696,7 +1698,7 @@ irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, bool try_tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1713,12 +1715,12 @@ ifree(tsd_t *tsd, void *ptr, bool try_tcache) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - iqalloc(tsd, ptr, try_tcache); + iqalloc(tsd, ptr, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } JEMALLOC_INLINE_C void -isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) +isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache) { UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1731,7 +1733,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, bool try_tcache) *tsd_thread_deallocatedp_get(tsd) += usize; if (config_valgrind && unlikely(in_valgrind)) rzsize = p2rz(ptr); - isqalloc(tsd, ptr, usize, try_tcache); + isqalloc(tsd, ptr, usize, tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); } @@ -1749,7 +1751,7 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, true); + ifree(tsd, ptr, tcache_get(tsd, false)); return (NULL); } size = 1; @@ -1802,8 +1804,10 @@ je_free(void *ptr) { UTRACE(ptr, 0, 0); - if (likely(ptr != NULL)) - ifree(tsd_fetch(), ptr, true); + if (likely(ptr != NULL)) { + tsd_t *tsd = tsd_fetch(); + ifree(tsd, ptr, tcache_get(tsd, false)); + } } /* @@ -1875,7 +1879,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = JEMALLOC_ALWAYS_INLINE_C bool imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) { @@ -1886,22 +1890,26 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *usize = sa2u(size, *alignment); } *zero = MALLOCX_ZERO_GET(flags); + if ((flags & MALLOCX_TCACHE_MASK) != 0) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + *tcache = NULL; + else + *tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *try_tcache = false; *arena = arena_get(tsd, arena_ind, true, true); if (unlikely(*arena == NULL)) return (true); - } else { - *try_tcache = true; + } else *arena = NULL; - } return (false); } JEMALLOC_ALWAYS_INLINE_C bool imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, - size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) + size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena) { if (likely(flags == 0)) { @@ -1909,55 +1917,53 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, assert(usize != 0); *alignment = 0; *zero = false; - *try_tcache = true; + *tcache = tcache_get(tsd, true); *arena = NULL; return (false); } else { return (imallocx_flags_decode_hard(tsd, size, flags, usize, - alignment, zero, try_tcache, arena)); + alignment, zero, tcache, arena)); } } JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - bool try_tcache, arena_t *arena) + tcache_t *tcache, arena_t *arena) { - if (alignment != 0) { - return (ipalloct(tsd, usize, alignment, zero, try_tcache, - arena)); - } + if (alignment != 0) + return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); if (zero) - return (icalloct(tsd, usize, try_tcache, arena)); - return (imalloct(tsd, usize, try_tcache, arena)); + return (icalloct(tsd, usize, tcache, arena)); + return (imalloct(tsd, usize, tcache, arena)); } JEMALLOC_ALWAYS_INLINE_C void * imallocx_maybe_flags(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, bool try_tcache, arena_t *arena) + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { if (likely(flags == 0)) return (imalloc(tsd, size)); - return (imallocx_flags(tsd, usize, alignment, zero, try_tcache, arena)); + return (imallocx_flags(tsd, usize, alignment, zero, tcache, arena)); } static void * imallocx_prof_sample(tsd_t *tsd, size_t size, int flags, size_t usize, - size_t alignment, bool zero, bool try_tcache, arena_t *arena) + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; if (usize <= SMALL_MAXCLASS) { assert(((alignment == 0) ? s2u(LARGE_MINCLASS) : sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS); - p = imalloct(tsd, LARGE_MINCLASS, try_tcache, arena); + p = imalloct(tsd, LARGE_MINCLASS, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { p = imallocx_maybe_flags(tsd, size, flags, usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } return (p); @@ -1969,20 +1975,20 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) void *p; size_t alignment; bool zero; - bool try_tcache; + tcache_t *tcache; arena_t *arena; prof_tctx_t *tctx; if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, - &zero, &try_tcache, &arena))) + &zero, &tcache, &arena))) return (NULL); tctx = prof_alloc_prep(tsd, *usize, true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) { p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } else if ((uintptr_t)tctx > (uintptr_t)1U) { p = imallocx_prof_sample(tsd, size, flags, *usize, alignment, - zero, try_tcache, arena); + zero, tcache, arena); } else p = NULL; if (unlikely(p == NULL)) { @@ -1999,7 +2005,7 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) { size_t alignment; bool zero; - bool try_tcache; + tcache_t *tcache; arena_t *arena; if (likely(flags == 0)) { @@ -2009,10 +2015,9 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) } if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, - &alignment, &zero, &try_tcache, &arena))) + &alignment, &zero, &tcache, &arena))) return (NULL); - return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache, - arena)); + return (imallocx_flags(tsd, *usize, alignment, zero, tcache, arena)); } void * @@ -2053,8 +2058,8 @@ label_oom: static void * irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, - size_t alignment, size_t usize, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena, prof_tctx_t *tctx) + size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena, + prof_tctx_t *tctx) { void *p; @@ -2062,13 +2067,13 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, return (NULL); if (usize <= SMALL_MAXCLASS) { p = iralloct(tsd, oldptr, old_usize, LARGE_MINCLASS, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena); + zero, tcache, arena); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else { p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); } return (p); @@ -2076,8 +2081,8 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, JEMALLOC_ALWAYS_INLINE_C void * irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, - size_t alignment, size_t *usize, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc, arena_t *arena) + size_t alignment, size_t *usize, bool zero, tcache_t *tcache, + arena_t *arena) { void *p; prof_tctx_t *old_tctx, *tctx; @@ -2086,11 +2091,10 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size, tctx = prof_alloc_prep(tsd, *usize, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { p = irallocx_prof_sample(tsd, oldptr, old_usize, size, - alignment, *usize, zero, try_tcache_alloc, - try_tcache_dalloc, arena, tctx); + alignment, *usize, zero, tcache, arena, tctx); } else { p = iralloct(tsd, oldptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); } if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, false); @@ -2123,8 +2127,8 @@ je_rallocx(void *ptr, size_t size, int flags) UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; - bool try_tcache_alloc, try_tcache_dalloc; arena_t *arena; + tcache_t *tcache; assert(ptr != NULL); assert(size != 0); @@ -2134,18 +2138,19 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk; - try_tcache_alloc = false; - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = arena_get(tsd, arena_ind, true, true); if (unlikely(arena == NULL)) goto label_oom; - try_tcache_dalloc = (chunk == ptr || chunk->arena != arena); - } else { - try_tcache_alloc = true; - try_tcache_dalloc = true; + } else arena = NULL; - } + + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); + } else + tcache = tcache_get(tsd, true); old_usize = isalloc(ptr, config_prof); if (config_valgrind && unlikely(in_valgrind)) @@ -2155,12 +2160,12 @@ je_rallocx(void *ptr, size_t size, int flags) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); assert(usize != 0); p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, - zero, try_tcache_alloc, try_tcache_dalloc, arena); + zero, tcache, arena); if (unlikely(p == NULL)) goto label_oom; } else { p = iralloct(tsd, ptr, old_usize, size, alignment, zero, - try_tcache_alloc, try_tcache_dalloc, arena); + tcache, arena); if (unlikely(p == NULL)) goto label_oom; if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -2319,28 +2324,22 @@ void je_dallocx(void *ptr, int flags) { tsd_t *tsd; - bool try_tcache; + tcache_t *tcache; assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); tsd = tsd_fetch(); - if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { - unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = arena_get(tsd, arena_ind, true, true); - /* - * If arena is NULL, the application passed an arena that has - * never been used before, which is unsupported during - * deallocation. - */ - assert(arena != NULL); - try_tcache = (chunk == ptr || chunk->arena != arena); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } else - try_tcache = true; + tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, try_tcache); + ifree(tsd_fetch(), ptr, tcache); } JEMALLOC_ALWAYS_INLINE_C size_t @@ -2360,7 +2359,7 @@ void je_sdallocx(void *ptr, size_t size, int flags) { tsd_t *tsd; - bool try_tcache; + tcache_t *tcache; size_t usize; assert(ptr != NULL); @@ -2369,21 +2368,16 @@ je_sdallocx(void *ptr, size_t size, int flags) assert(usize == isalloc(ptr, config_prof)); tsd = tsd_fetch(); - if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { - unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = arena_get(tsd, arena_ind, true, true); - /* - * If arena is NULL, the application passed an arena that has - * never been used before, which is unsupported during - * deallocation. - */ - try_tcache = (chunk == ptr || chunk->arena != arena); + if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) { + if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) + tcache = NULL; + else + tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags)); } else - try_tcache = true; + tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - isfree(tsd, ptr, usize, try_tcache); + isfree(tsd, ptr, usize, tcache); } size_t diff --git a/src/prof.c b/src/prof.c index 04b2591c..4f1580b0 100644 --- a/src/prof.c +++ b/src/prof.c @@ -540,7 +540,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) * Create a single allocation that has space for vec of length bt->len. */ prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *)), false, true, true, NULL); + vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), + true, NULL); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -581,7 +582,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloctm(tsd, gctx, true, true); + idalloctm(tsd, gctx, tcache_get(tsd, false), true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -681,7 +682,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, true, true); + idalloctm(tsd, tctx, tcache_get(tsd, false), true); } static bool @@ -710,7 +711,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, true, true); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); return (true); } new_gctx = true; @@ -754,6 +755,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) ret.p->prepared = true; malloc_mutex_unlock(tdata->lock); if (not_found) { + tcache_t *tcache; void *btkey; prof_gctx_t *gctx; bool new_gctx, error; @@ -767,7 +769,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) return (NULL); /* Link a prof_tctx_t into gctx for this thread. */ - ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, true, true, + tcache = tcache_get(tsd, true); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, NULL); if (ret.p == NULL) { if (new_gctx) @@ -786,7 +789,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, true, true); + idalloctm(tsd, ret.v, tcache, true); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -1166,7 +1169,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) to_destroy); tctx_tree_remove(&gctx->tctxs, to_destroy); - idalloctm(tsd, to_destroy, true, true); + idalloctm(tsd, to_destroy, + tcache_get(tsd, false), true); } else next = NULL; } while (next != NULL); @@ -1644,12 +1648,14 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, char *thread_name, bool active) { prof_tdata_t *tdata; + tcache_t *tcache; cassert(config_prof); /* Initialize an empty cache for this thread. */ + tcache = tcache_get(tsd, true); tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, - true, true, NULL); + tcache, true, NULL); if (tdata == NULL) return (NULL); @@ -1662,7 +1668,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, true, true); + idalloctm(tsd, tdata, tcache, true); return (NULL); } @@ -1708,16 +1714,18 @@ static void prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { + tcache_t *tcache; assert(prof_tdata_should_destroy(tdata, even_if_attached)); assert(tsd_prof_tdata_get(tsd) != tdata); tdata_tree_remove(&tdatas, tdata); + tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, true, true); + idalloctm(tsd, tdata->thread_name, tcache, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, true, true); + idalloctm(tsd, tdata, tcache, true); } static void @@ -1878,7 +1886,7 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, false, true, true, NULL); + ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1910,7 +1918,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) return (EAGAIN); if (tdata->thread_name != NULL) { - idalloctm(tsd, tdata->thread_name, true, true); + idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), + true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 094b44d3..adc7305d 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -27,8 +27,8 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs) assert(tsd_nominal(tsd)); quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) - + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, true, - true, NULL); + + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, + tcache_get(tsd, true), true, NULL); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -55,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); } static quarantine_t * @@ -87,7 +87,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, ret); return (ret); @@ -177,7 +177,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, true, true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/tcache.c b/src/tcache.c index d638015f..c7d4f784 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -13,6 +13,14 @@ static unsigned stack_nelms; /* Total stack elms per tcache. */ size_t nhbins; size_t tcache_maxclass; +tcaches_t *tcaches; + +/* Index of first element within tcaches that has never been used. */ +static unsigned tcaches_past; + +/* Head of singly linked list tracking available tcaches elements. */ +static tcaches_t *tcaches_avail; + /******************************************************************************/ size_t tcache_salloc(const void *ptr) @@ -22,7 +30,7 @@ size_t tcache_salloc(const void *ptr) } void -tcache_event_hard(tcache_t *tcache) +tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { index_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; @@ -33,11 +41,11 @@ tcache_event_hard(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low water mark. */ if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_small(tsd, tbin, binind, tbin->ncached + - tbin->low_water + (tbin->low_water >> 2), tcache); } else { - tcache_bin_flush_large(tbin, binind, tbin->ncached - - tbin->low_water + (tbin->low_water >> 2), tcache); + tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached + - tbin->low_water + (tbin->low_water >> 2), tcache); } /* * Reduce fill count by 2X. Limit lg_fill_div such that the @@ -62,11 +70,12 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) +tcache_alloc_small_hard(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + index_t binind) { void *ret; - arena_tcache_fill_small(tcache->arena, tbin, binind, + arena_tcache_fill_small(arena_choose(tsd, NULL), tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; @@ -76,9 +85,10 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_small(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -86,21 +96,23 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, assert(binind < NBINS); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; + arena_t *bin_arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; - if (config_prof && arena == tcache->arena) { + if (config_prof && bin_arena == arena) { if (arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); tcache->prof_accumbytes = 0; } malloc_mutex_lock(&bin->lock); - if (config_stats && arena == tcache->arena) { + if (config_stats && bin_arena == arena) { assert(!merged_stats); merged_stats = true; bin->stats.nflushes++; @@ -112,12 +124,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { + if (chunk->arena == bin_arena) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_bits_t *bitselm = arena_bitselm_get(chunk, pageind); - arena_dalloc_bin_junked_locked(arena, chunk, + arena_dalloc_bin_junked_locked(bin_arena, chunk, ptr, bitselm); } else { /* @@ -137,7 +149,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &tcache->arena->bins[binind]; + arena_bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -153,9 +165,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, - tcache_t *tcache) +tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind, + unsigned rem, tcache_t *tcache) { + arena_t *arena; void *ptr; unsigned i, nflush, ndeferred; bool merged_stats = false; @@ -163,17 +176,19 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, assert(binind < nhbins); assert(rem <= tbin->ncached); + arena = arena_choose(tsd, NULL); + assert(arena != NULL); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( tbin->avail[0]); - arena_t *arena = chunk->arena; + arena_t *locked_arena = chunk->arena; UNUSED bool idump; if (config_prof) idump = false; - malloc_mutex_lock(&arena->lock); - if ((config_prof || config_stats) && arena == tcache->arena) { + malloc_mutex_lock(&locked_arena->lock); + if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { idump = arena_prof_accum_locked(arena, tcache->prof_accumbytes); @@ -193,9 +208,9 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, ptr = tbin->avail[i]; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { - arena_dalloc_large_junked_locked(arena, chunk, - ptr); + if (chunk->arena == locked_arena) { + arena_dalloc_large_junked_locked(locked_arena, + chunk, ptr); } else { /* * This object was allocated via a different @@ -207,7 +222,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, ndeferred++; } } - malloc_mutex_unlock(&arena->lock); + malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) prof_idump(); } @@ -216,7 +231,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[binind - NBINS].nrequests += @@ -243,27 +257,37 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena) ql_tail_insert(&arena->tcache_ql, tcache, link); malloc_mutex_unlock(&arena->lock); } - tcache->arena = arena; } void -tcache_arena_reassociate(tcache_t *tcache, arena_t *arena) +tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); + tcache_arena_dissociate(tcache, oldarena); + tcache_arena_associate(tcache, newarena); } void -tcache_arena_dissociate(tcache_t *tcache) +tcache_arena_dissociate(tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - tcache_stats_merge(tcache, tcache->arena); - malloc_mutex_unlock(&tcache->arena->lock); + malloc_mutex_lock(&arena->lock); + if (config_debug) { + bool in_ql = false; + tcache_t *iter; + ql_foreach(iter, &arena->tcache_ql, link) { + if (iter == tcache) { + in_ql = true; + break; + } + } + assert(in_ql); + } + ql_remove(&arena->tcache_ql, tcache, link); + tcache_stats_merge(tcache, arena); + malloc_mutex_unlock(&arena->lock); } } @@ -298,7 +322,7 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, arena); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); if (tcache == NULL) return (NULL); @@ -318,16 +342,17 @@ tcache_create(tsd_t *tsd, arena_t *arena) static void tcache_destroy(tsd_t *tsd, tcache_t *tcache) { + arena_t *arena; unsigned i; - tcache_arena_dissociate(tcache); + arena = arena_choose(tsd, NULL); + tcache_arena_dissociate(tcache, arena); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0, tcache); + tcache_bin_flush_small(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; @@ -337,10 +362,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) for (; i < nhbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0, tcache); + tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats && tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; arena->stats.lstats[i - NBINS].nrequests += @@ -350,7 +374,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) } if (config_prof && tcache->prof_accumbytes > 0 && - arena_prof_accum(tcache->arena, tcache->prof_accumbytes)) + arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); idalloctm(tsd, tcache, false, true); @@ -404,6 +428,66 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } } +bool +tcaches_create(tsd_t *tsd, unsigned *r_ind) +{ + tcache_t *tcache; + tcaches_t *elm; + + if (tcaches == NULL) { + tcaches = base_alloc(sizeof(tcache_t *) * + (MALLOCX_TCACHE_MAX+1)); + if (tcaches == NULL) + return (true); + } + + if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) + return (true); + tcache = tcache_create(tsd, a0get()); + if (tcache == NULL) + return (true); + + if (tcaches_avail != NULL) { + elm = tcaches_avail; + tcaches_avail = tcaches_avail->next; + elm->tcache = tcache; + *r_ind = (elm - tcaches) / sizeof(tcaches_t); + } else { + elm = &tcaches[tcaches_past]; + elm->tcache = tcache; + *r_ind = tcaches_past; + tcaches_past++; + } + + return (false); +} + +static void +tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) +{ + + if (elm->tcache == NULL) + return; + tcache_destroy(tsd, elm->tcache); + elm->tcache = NULL; +} + +void +tcaches_flush(tsd_t *tsd, unsigned ind) +{ + + tcaches_elm_flush(tsd, &tcaches[ind]); +} + +void +tcaches_destroy(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + tcaches_elm_flush(tsd, elm); + elm->next = tcaches_avail; + tcaches_avail = elm; +} + bool tcache_boot(void) { diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index f4b7d1ab..10a6fcd6 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -211,6 +211,114 @@ TEST_BEGIN(test_manpage_example) } TEST_END +TEST_BEGIN(test_tcache_none) +{ + void *p0, *q, *p1; + + test_skip_if(!config_tcache); + + /* Allocate p and q. */ + p0 = mallocx(42, 0); + assert_ptr_not_null(p0, "Unexpected mallocx() failure"); + q = mallocx(42, 0); + assert_ptr_not_null(q, "Unexpected mallocx() failure"); + + /* Deallocate p and q, but bypass the tcache for q. */ + dallocx(p0, 0); + dallocx(q, MALLOCX_TCACHE_NONE); + + /* Make sure that tcache-based allocation returns p, not q. */ + p1 = mallocx(42, 0); + assert_ptr_not_null(p1, "Unexpected mallocx() failure"); + assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region"); + + /* Clean up. */ + dallocx(p1, MALLOCX_TCACHE_NONE); +} +TEST_END + +TEST_BEGIN(test_tcache) +{ +#define NTCACHES 10 + unsigned tis[NTCACHES]; + void *ps[NTCACHES]; + void *qs[NTCACHES]; + unsigned i; + size_t sz, psz, qsz; + + test_skip_if(!config_tcache); + + psz = 42; + qsz = nallocx(psz, 0) + 1; + + /* Create tcaches. */ + for (i = 0; i < NTCACHES; i++) { + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0, + "Unexpected mallctl() failure, i=%u", i); + } + + /* Flush empty tcaches. */ + for (i = 0; i < NTCACHES; i++) { + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } + + /* Cache some allocations. */ + for (i = 0; i < NTCACHES; i++) { + ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u", + i); + dallocx(ps[i], MALLOCX_TCACHE(tis[i])); + + qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u", + i); + dallocx(qs[i], MALLOCX_TCACHE(tis[i])); + } + + /* Verify that tcaches allocate cached regions. */ + for (i = 0; i < NTCACHES; i++) { + void *p0 = ps[i]; + ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u", + i); + assert_ptr_eq(ps[i], p0, + "Expected mallocx() to allocate cached region, i=%u", i); + } + + /* Verify that reallocation uses cached regions. */ + for (i = 0; i < NTCACHES; i++) { + void *q0 = qs[i]; + qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i])); + assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u", + i); + assert_ptr_eq(qs[i], q0, + "Expected rallocx() to allocate cached region, i=%u", i); + /* Avoid undefined behavior in case of test failure. */ + if (qs[i] == NULL) + qs[i] = ps[i]; + } + for (i = 0; i < NTCACHES; i++) + dallocx(qs[i], MALLOCX_TCACHE(tis[i])); + + /* Flush some non-empty tcaches. */ + for (i = 0; i < NTCACHES/2; i++) { + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } + + /* Destroy tcaches. */ + for (i = 0; i < NTCACHES; i++) { + assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i], + sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u", + i); + } +} +TEST_END + TEST_BEGIN(test_thread_arena) { unsigned arena_old, arena_new, narenas; @@ -431,6 +539,8 @@ main(void) test_mallctl_config, test_mallctl_opt, test_manpage_example, + test_tcache_none, + test_tcache, test_thread_arena, test_arena_i_purge, test_arena_i_dss,