diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 681b5802..894ce9af 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -389,7 +389,7 @@ bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -bool arena_new(arena_t *arena, unsigned ind); +arena_t *arena_new(unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); void arena_postfork_parent(arena_t *arena); @@ -924,8 +924,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, true)) != NULL)) return (tcache_alloc_small(tcache, size, zero)); else { - return (arena_malloc_small(choose_arena(tsd, arena), - size, zero)); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + return (arena_malloc_small(arena, size, zero)); } } else { /* @@ -936,8 +938,10 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_get(tsd, true)) != NULL)) return (tcache_alloc_large(tcache, size, zero)); else { - return (arena_malloc_large(choose_arena(tsd, arena), - size, zero)); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + return (arena_malloc_large(arena, size, zero)); } } } diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8f0beb9e..c7a5fd8a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -386,20 +386,6 @@ extern bool in_valgrind; /* Number of CPUs. */ extern unsigned ncpus; -/* Protects arenas initialization (arenas, arenas_total). */ -extern malloc_mutex_t arenas_lock; -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - * - * arenas[0..narenas_auto) are used for automatic multiplexing of threads and - * arenas. arenas[narenas_auto..narenas_total) are only used if the application - * takes some action to create them and allocate from them. - */ -extern arena_t **arenas; -extern unsigned narenas_total; -extern unsigned narenas_auto; /* Read-only after initialization. */ - /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). @@ -412,11 +398,23 @@ extern size_t const index2size_tab[NSIZES]; */ extern uint8_t const size2index_tab[]; +arena_t *a0get(void); +void *a0malloc(size_t size); +void *a0calloc(size_t num, size_t size); +void a0free(void *ptr); arena_t *arenas_extend(unsigned ind); -arena_t *choose_arena_hard(tsd_t *tsd); +arena_t *arena_init(unsigned ind); +unsigned narenas_total_get(void); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_choose_hard(tsd_t *tsd); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); +void arenas_cache_cleanup(tsd_t *tsd); +void narenas_cache_cleanup(tsd_t *tsd); +void arenas_cache_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); @@ -475,8 +473,9 @@ size_t s2u_compute(size_t size); size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -unsigned narenas_total_get(void); -arena_t *choose_arena(tsd_t *tsd, arena_t *arena); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -709,34 +708,51 @@ sa2u(size_t size, size_t alignment) return (usize); } -JEMALLOC_INLINE unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(tsd_t *tsd, arena_t *arena) +arena_choose(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) { - ret = choose_arena_hard(tsd); - assert(ret != NULL); - } + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) + ret = arena_choose_hard(tsd); return (ret); } + +JEMALLOC_INLINE arena_t * +arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + + /* init_if_missing requires refresh_if_missing. */ + assert(!init_if_missing || refresh_if_missing); + + if (unlikely(arenas_cache == NULL)) { + /* arenas_cache hasn't been initialized yet. */ + return (arena_get_hard(tsd, ind, init_if_missing)); + } + if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or arena to be + * initialized. + */ + return (refresh_if_missing ? arena_get_hard(tsd, ind, + init_if_missing) : NULL); + } + arena = arenas_cache[ind]; + if (likely(arena != NULL) || !refresh_if_missing) + return (arena); + if (init_if_missing) + return (arena_get_hard(tsd, ind, init_if_missing)); + else + return (NULL); +} #endif #include "jemalloc/internal/bitmap.h" @@ -833,8 +849,10 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(tsd, arena), usize, - alignment, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + ret = arena_palloc(arena, usize, alignment, zero); } else if (alignment <= chunksize) ret = huge_malloc(tsd, arena, usize, zero); else diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 1a7fde4b..d5e6fdcf 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,11 +1,16 @@ a0calloc a0free +a0get a0malloc +arena_get +arena_get_hard arena_alloc_junk_small arena_bin_index arena_bin_info arena_bitselm_get arena_boot +arena_choose +arena_choose_hard arena_chunk_alloc_huge arena_chunk_dalloc_huge arena_cleanup @@ -19,6 +24,7 @@ arena_dalloc_large_locked arena_dalloc_small arena_dss_prec_get arena_dss_prec_set +arena_init arena_malloc arena_malloc_large arena_malloc_small @@ -42,9 +48,11 @@ arena_mapbitsp_read arena_mapbitsp_write arena_maxclass arena_maxrun +arena_migrate arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages +arena_nbound arena_new arena_palloc arena_postfork_child @@ -69,10 +77,8 @@ arena_salloc arena_sdalloc arena_stats_merge arena_tcache_fill_small -arenas -arenas_cleanup -arenas_extend -arenas_lock +arenas_cache_bypass_cleanup +arenas_cache_cleanup atomic_add_u atomic_add_uint32 atomic_add_uint64 @@ -100,8 +106,6 @@ bitmap_size bitmap_unset bt_init buferror -choose_arena -choose_arena_hard chunk_alloc_arena chunk_alloc_base chunk_alloc_default @@ -247,7 +251,8 @@ malloc_mutex_unlock malloc_printf malloc_snprintf malloc_strtoumax -malloc_tsd_boot +malloc_tsd_boot0 +malloc_tsd_boot1 malloc_tsd_cleanup_register malloc_tsd_dalloc malloc_tsd_malloc @@ -259,8 +264,7 @@ map_bias map_misc_offset mb_write mutex_boot -narenas_auto -narenas_total +narenas_cache_cleanup narenas_total_get ncpus nhbins @@ -363,6 +367,7 @@ tcache_alloc_small tcache_alloc_small_hard tcache_arena_associate tcache_arena_dissociate +tcache_arena_reassociate tcache_bin_flush_large tcache_bin_flush_small tcache_bin_info @@ -388,11 +393,14 @@ tsd_booted tsd_arena_get tsd_arena_set tsd_boot +tsd_boot0 +tsd_boot1 tsd_cleanup tsd_cleanup_wrapper tsd_fetch tsd_get -tsd_get_wrapper +tsd_wrapper_get +tsd_wrapper_set tsd_initialized tsd_init_check_recursion tsd_init_finish diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index da8e4ef4..02eec5db 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -109,6 +109,7 @@ void tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, void tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_get_hard(tsd_t *tsd); tcache_t *tcache_create(arena_t *arena); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 25450391..b5658f8e 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -2,7 +2,7 @@ #ifdef JEMALLOC_H_TYPES /* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 8 +#define MALLOC_TSD_CLEANUPS_MAX 2 typedef bool (*malloc_tsd_cleanup_t)(void); @@ -23,7 +23,7 @@ typedef enum { /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are four macros that support (at least) three use cases: file-private, + * are five macros that support (at least) three use cases: file-private, * library-private, and library-private inlined. Following is an example * library-private tsd variable: * @@ -33,18 +33,19 @@ typedef enum { * int y; * } example_t; * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example_, example_t *) - * malloc_tsd_externs(example_, example_t *) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) * In example.c: - * malloc_tsd_data(, example_, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t *, EX_INITIALIZER, + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, * example_tsd_cleanup) * * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * example_t **example_tsd_get() {...} - * void example_tsd_set(example_t **val) {...} + * example_t *example_tsd_get() {...} + * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than * (a_type) so that it is possible to support non-pointer types (unlike @@ -70,9 +71,32 @@ typedef enum { * non-NULL. */ +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + /* malloc_tsd_protos(). */ #define malloc_tsd_protos(a_attr, a_name, a_type) \ a_attr bool \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ a_name##tsd_boot(void); \ a_attr a_type * \ a_name##tsd_get(void); \ @@ -93,11 +117,13 @@ extern bool a_name##tsd_booted; #elif (defined(_WIN32)) #define malloc_tsd_externs(a_name, a_type) \ extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ extern bool a_name##tsd_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##tsd_tsd; \ extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ extern bool a_name##tsd_booted; #endif @@ -118,6 +144,10 @@ a_attr bool a_name##tsd_booted = false; #elif (defined(_WIN32)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ a_attr bool a_name##tsd_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ @@ -126,6 +156,10 @@ a_attr tsd_init_head_t a_name##tsd_init_head = { \ ql_head_initializer(blocks), \ MALLOC_MUTEX_INITIALIZER \ }; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ a_attr bool a_name##tsd_booted = false; #endif @@ -145,7 +179,7 @@ a_name##tsd_cleanup_wrapper(void) \ return (a_name##tsd_initialized); \ } \ a_attr bool \ -a_name##tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ @@ -155,6 +189,18 @@ a_name##tsd_boot(void) \ a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ @@ -177,7 +223,7 @@ a_name##tsd_set(a_type *val) \ a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ @@ -188,6 +234,18 @@ a_name##tsd_boot(void) \ a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ @@ -215,11 +273,6 @@ a_name##tsd_set(a_type *val) \ #elif (defined(_WIN32)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr bool \ a_name##tsd_cleanup_wrapper(void) \ @@ -241,23 +294,18 @@ a_name##tsd_cleanup_wrapper(void) \ malloc_tsd_dalloc(wrapper); \ return (false); \ } \ -a_attr bool \ -a_name##tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ } \ - a_name##tsd_booted = true; \ - return (false); \ } \ -/* Get/set. */ \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_get_wrapper(void) \ +a_name##tsd_wrapper_get(void) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ TlsGetValue(a_name##tsd_tsd); \ @@ -273,21 +321,63 @@ a_name##tsd_get_wrapper(void) \ wrapper->initialized = false; \ wrapper->val = a_initializer; \ } \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ + a_name##tsd_wrapper_set(wrapper); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -296,7 +386,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -304,11 +394,6 @@ a_name##tsd_set(a_type *val) \ #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr void \ a_name##tsd_cleanup_wrapper(void *arg) \ @@ -333,19 +418,19 @@ a_name##tsd_cleanup_wrapper(void *arg) \ } \ malloc_tsd_dalloc(wrapper); \ } \ -a_attr bool \ -a_name##tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##tsd_booted = true; \ - return (false); \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ } \ -/* Get/set. */ \ a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_get_wrapper(void) \ +a_name##tsd_wrapper_get(void) \ { \ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ pthread_getspecific(a_name##tsd_tsd); \ @@ -367,23 +452,54 @@ a_name##tsd_get_wrapper(void) \ wrapper->initialized = false; \ wrapper->val = a_initializer; \ } \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ + a_name##tsd_wrapper_set(wrapper); \ tsd_init_finish(&a_name##tsd_init_head, &block); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1() \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ a_name##tsd_get(void) \ { \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ @@ -392,7 +508,7 @@ a_name##tsd_set(a_type *val) \ a_name##tsd_wrapper_t *wrapper; \ \ assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_get_wrapper(); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -423,6 +539,9 @@ struct tsd_init_head_s { O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ O(arena, arena_t *) \ + O(arenas_cache, arena_t **) \ + O(narenas_cache, unsigned) \ + O(arenas_cache_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ @@ -433,6 +552,9 @@ struct tsd_init_head_s { 0, \ NULL, \ NULL, \ + NULL, \ + 0, \ + false, \ tcache_enabled_default, \ NULL \ } @@ -447,6 +569,8 @@ MALLOC_TSD static const tsd_t tsd_initializer = TSD_INITIALIZER; +malloc_tsd_types(, tsd_t) + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -455,7 +579,8 @@ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -bool malloc_tsd_boot(void); +bool malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void *tsd_init_check_recursion(tsd_init_head_t *head, diff --git a/src/arena.c b/src/arena.c index 49a30572..86e54404 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2192,27 +2192,37 @@ arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, } } -bool -arena_new(arena_t *arena, unsigned ind) +arena_t * +arena_new(unsigned ind) { + arena_t *arena; unsigned i; arena_bin_t *bin; + /* + * Allocate arena and arena->lstats contiguously, mainly because there + * is no way to clean up if base_alloc() OOMs. + */ + if (config_stats) { + arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) + + nlclasses * sizeof(malloc_large_stats_t)); + } else + arena = (arena_t *)base_alloc(sizeof(arena_t)); + if (arena == NULL) + return (NULL); + arena->ind = ind; arena->nthreads = 0; arena->chunk_alloc = chunk_alloc_default; arena->chunk_dalloc = chunk_dalloc_default; if (malloc_mutex_init(&arena->lock)) - return (true); + return (NULL); if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = - (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); + arena->stats.lstats = (malloc_large_stats_t *)(((void *)arena) + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); if (config_tcache) @@ -2236,14 +2246,14 @@ arena_new(arena_t *arena, unsigned ind) for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) - return (true); + return (NULL); bin->runcur = NULL; arena_run_tree_new(&bin->runs); if (config_stats) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - return (false); + return (arena); } /* diff --git a/src/chunk.c b/src/chunk.c index 618aaca0..f65b67af 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -254,9 +254,17 @@ void * chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, unsigned arena_ind) { + arena_t *arena; + + arena = arena_get(tsd_fetch(), arena_ind, false, true); + /* + * The arena we're allocating on behalf of must have been initialized + * already. + */ + assert(arena != NULL); return (chunk_alloc_core(new_addr, size, alignment, false, zero, - arenas[arena_ind]->dss_prec)); + arena->dss_prec)); } static void diff --git a/src/ctl.c b/src/ctl.c index f1f3234b..37f8f42a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -447,7 +447,7 @@ ctl_arena_init(ctl_arena_stats_t *astats) { if (astats->lstats == NULL) { - astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses * sizeof(malloc_large_stats_t)); if (astats->lstats == NULL) return (true); @@ -567,31 +567,24 @@ ctl_arena_refresh(arena_t *arena, unsigned i) static bool ctl_grow(void) { - tsd_t *tsd; ctl_arena_stats_t *astats; - arena_t **tarenas; - tsd = tsd_fetch(); + /* Initialize new arena. */ + if (arena_init(ctl_stats.narenas) == NULL) + return (true); - /* Allocate extended arena stats and arenas arrays. */ - astats = (ctl_arena_stats_t *)imalloc(tsd, (ctl_stats.narenas + 2) * + /* Allocate extended arena stats. */ + astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t)); if (astats == NULL) return (true); - tarenas = (arena_t **)imalloc(tsd, (ctl_stats.narenas + 1) * - sizeof(arena_t *)); - if (tarenas == NULL) { - idalloc(tsd, astats); - return (true); - } /* Initialize the new astats element. */ memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) { - idalloc(tsd, tarenas); - idalloc(tsd, astats); + a0free(astats); return (true); } /* Swap merged stats to their new location. */ @@ -604,32 +597,7 @@ ctl_grow(void) memcpy(&astats[ctl_stats.narenas + 1], &tstats, sizeof(ctl_arena_stats_t)); } - /* Initialize the new arenas element. */ - tarenas[ctl_stats.narenas] = NULL; - { - arena_t **arenas_old = arenas; - /* - * Swap extended arenas array into place. Although ctl_mtx - * protects this function from other threads extending the - * array, it does not protect from other threads mutating it - * (i.e. initializing arenas and setting array elements to - * point to them). Therefore, array copying must happen under - * the protection of arenas_lock. - */ - malloc_mutex_lock(&arenas_lock); - arenas = tarenas; - memcpy(arenas, arenas_old, ctl_stats.narenas * - sizeof(arena_t *)); - narenas_total++; - arenas_extend(narenas_total - 1); - malloc_mutex_unlock(&arenas_lock); - /* - * Deallocate arenas_old only if it came from imalloc() (not - * base_alloc()). - */ - if (ctl_stats.narenas != narenas_auto) - idalloc(tsd, arenas_old); - } + a0free(ctl_stats.arenas); ctl_stats.arenas = astats; ctl_stats.narenas++; @@ -639,6 +607,7 @@ ctl_grow(void) static void ctl_refresh(void) { + tsd_t *tsd; unsigned i; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); @@ -657,15 +626,17 @@ ctl_refresh(void) ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + tsd = tsd_fetch(); + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(tsd, i, false, (i == 0)); + for (i = 0; i < ctl_stats.narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + if (tarenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arena_nbound(i); else ctl_stats.arenas[i].nthreads = 0; } - malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); @@ -698,9 +669,8 @@ ctl_init(void) * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ - assert(narenas_auto == narenas_total_get()); - ctl_stats.narenas = narenas_auto; - ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + ctl_stats.narenas = narenas_total_get(); + ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc( (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; @@ -718,6 +688,13 @@ ctl_init(void) unsigned i; for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { + unsigned j; + for (j = 0; j < i; j++) { + a0free( + ctl_stats.arenas[j].lstats); + } + a0free(ctl_stats.arenas); + ctl_stats.arenas = NULL; ret = true; goto label_return; } @@ -1231,17 +1208,19 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; tsd_t *tsd; + arena_t *arena; unsigned newind, oldind; tsd = tsd_fetch(); + arena = arena_choose(tsd, NULL); + if (arena == NULL) + return (EAGAIN); malloc_mutex_lock(&ctl_mtx); - newind = oldind = choose_arena(tsd, NULL)->ind; + newind = oldind = arena->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { - arena_t *arena; - if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; @@ -1249,28 +1228,18 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL && (arena = - arenas_extend(newind)) == NULL) { - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(tsd, newind, true, true); + if (arena == NULL) { ret = EAGAIN; goto label_return; } - assert(arena == arenas[newind]); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - - /* Set new arena association. */ + /* Set new arena/tcache associations. */ + arena_migrate(tsd, oldind, newind); if (config_tcache) { tcache_t *tcache = tsd_tcache_get(tsd); - if (tcache != NULL) { - tcache_arena_dissociate(tcache); - tcache_arena_associate(tcache, arena); - } + if (tcache != NULL) + tcache_arena_reassociate(tcache, arena); } - - tsd_arena_set(tsd, arena); } ret = 0; @@ -1400,11 +1369,13 @@ label_return: static void arena_purge(unsigned arena_ind) { + tsd_t *tsd; + unsigned i; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); - malloc_mutex_unlock(&arenas_lock); + tsd = tsd_fetch(); + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(tsd, i, false, (i == 0)); if (arena_ind == ctl_stats.narenas) { unsigned i; @@ -1467,7 +1438,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arenas[arena_ind]; + arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); if (arena == NULL || (dss_prec != dss_prec_limit && arena_dss_prec_set(arena, dss_prec))) { ret = EFAULT; @@ -1501,7 +1472,8 @@ arena_i_chunk_alloc_ctl(const size_t *mib, size_t miblen, void *oldp, arena_t *arena; malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), + arena_ind, false, true)) != NULL) { malloc_mutex_lock(&arena->lock); READ(arena->chunk_alloc, chunk_alloc_t *); WRITE(arena->chunk_alloc, chunk_alloc_t *); @@ -1527,7 +1499,8 @@ arena_i_chunk_dalloc_ctl(const size_t *mib, size_t miblen, void *oldp, arena_t *arena; malloc_mutex_lock(&ctl_mtx); - if (arena_ind < narenas_total && (arena = arenas[arena_ind]) != NULL) { + if (arena_ind < narenas_total_get() && (arena = arena_get(tsd_fetch(), + arena_ind, false, true)) != NULL) { malloc_mutex_lock(&arena->lock); READ(arena->chunk_dalloc, chunk_dalloc_t *); WRITE(arena->chunk_dalloc, chunk_dalloc_t *); diff --git a/src/huge.c b/src/huge.c index ae416253..1376729a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -50,7 +50,11 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - arena = choose_arena(tsd, arena); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + base_node_dalloc(node); + return (NULL); + } ret = arena_chunk_alloc_huge(arena, NULL, csize, alignment, &is_zeroed); if (ret == NULL) { base_node_dalloc(node); diff --git a/src/jemalloc.c b/src/jemalloc.c index f3750b40..3c889e8a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,8 +4,6 @@ /******************************************************************************/ /* Data. */ -malloc_tsd_data(, arenas, arena_t *, NULL) - /* Runtime configuration options. */ const char *je_malloc_conf JEMALLOC_ATTR(weak); bool opt_abort = @@ -34,10 +32,20 @@ bool in_valgrind; unsigned ncpus; -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas_total; -unsigned narenas_auto; +/* Protects arenas initialization (arenas, narenas_total). */ +static malloc_mutex_t arenas_lock; +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + * + * arenas[0..narenas_auto) are used for automatic multiplexing of threads and + * arenas. arenas[narenas_auto..narenas_total) are only used if the application + * takes some action to create them and allocate from them. + */ +static arena_t **arenas; +static unsigned narenas_total; +static arena_t *a0; /* arenas[0]; read-only after initialization. */ +static unsigned narenas_auto; /* Read-only after initialization. */ /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,35 +152,288 @@ static bool malloc_init_hard(void); * Begin miscellaneous support functions. */ -/* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arenas_extend(unsigned ind) +JEMALLOC_ALWAYS_INLINE_C void +malloc_thread_init(void) { - arena_t *ret; - - ret = (arena_t *)base_alloc(sizeof(arena_t)); - if (ret != NULL && !arena_new(ret, ind)) { - arenas[ind] = ret; - return (ret); - } - /* Only reached if there is an OOM error. */ /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. + * TSD initialization can't be safely done as a side effect of + * deallocation, because it is possible for a thread to do nothing but + * deallocate its TLS data via free(), in which case writing to TLS + * would cause write-after-free memory corruption. The quarantine + * facility *only* gets used as a side effect of deallocation, so make + * a best effort attempt at initializing its TSD by hooking all + * allocation events. */ - malloc_write(": Error initializing arena\n"); - if (opt_abort) - abort(); - - return (arenas[0]); + if (config_fill && unlikely(opt_quarantine)) + quarantine_alloc_hook(); } -/* Slow path, called only by choose_arena(). */ +JEMALLOC_ALWAYS_INLINE_C bool +malloc_init(void) +{ + + if (unlikely(!malloc_initialized) && malloc_init_hard()) + return (true); + malloc_thread_init(); + + return (false); +} + +/* + * The a0*() functions are used instead of i[mcd]alloc() in bootstrap-sensitive + * situations that cannot tolerate TLS variable access. These functions are + * also exposed for use in static binaries on FreeBSD, hence the old-style + * malloc() API. + */ + arena_t * -choose_arena_hard(tsd_t *tsd) +a0get(void) +{ + + assert(a0 != NULL); + return (a0); +} + +static void * +a0alloc(size_t size, bool zero) +{ + void *ret; + + if (unlikely(malloc_init())) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + ret = arena_malloc(NULL, a0get(), size, zero, false); + else + ret = huge_malloc(NULL, a0get(), size, zero); + + return (ret); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(NULL, chunk, ptr, false); + else + huge_dalloc(ptr); +} + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arena_init(unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + + /* Expand arenas if necessary. */ + assert(ind <= narenas_total); + if (ind == narenas_total) { + unsigned narenas_new = narenas_total + 1; + arena_t **arenas_new = + (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * + sizeof(arena_t *))); + if (arenas_new == NULL) { + arena = NULL; + goto label_return; + } + memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); + arenas_new[ind] = NULL; + /* + * Deallocate only if arenas came from a0malloc() (not + * base_alloc()). + */ + if (narenas_total != narenas_auto) + a0free(arenas); + arenas = arenas_new; + narenas_total = narenas_new; + } + + /* + * Another thread may have already initialized arenas[ind] if it's an + * auto arena. + */ + arena = arenas[ind]; + if (arena != NULL) { + assert(ind < narenas_auto); + goto label_return; + } + + /* Actually initialize the arena. */ + arena = arenas[ind] = arena_new(ind); +label_return: + malloc_mutex_unlock(&arenas_lock); + return (arena); +} + +unsigned +narenas_total_get(void) +{ + unsigned narenas; + + malloc_mutex_lock(&arenas_lock); + narenas = narenas_total; + malloc_mutex_unlock(&arenas_lock); + + return (narenas); +} + +static void +arena_bind_locked(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + arena = arenas[ind]; + arena->nthreads++; + + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, arena); +} + +static void +arena_bind(tsd_t *tsd, unsigned ind) +{ + + malloc_mutex_lock(&arenas_lock); + arena_bind_locked(tsd, ind); + malloc_mutex_unlock(&arenas_lock); +} + +void +arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) +{ + arena_t *oldarena, *newarena; + + malloc_mutex_lock(&arenas_lock); + oldarena = arenas[oldind]; + newarena = arenas[newind]; + oldarena->nthreads--; + newarena->nthreads++; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, newarena); +} + +unsigned +arena_nbound(unsigned ind) +{ + unsigned nthreads; + + malloc_mutex_lock(&arenas_lock); + nthreads = arenas[ind]->nthreads; + malloc_mutex_unlock(&arenas_lock); + return (nthreads); +} + +static void +arena_unbind(tsd_t *tsd, unsigned ind) +{ + arena_t *arena; + + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); + tsd_arena_set(tsd, NULL); +} + +arena_t * +arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + unsigned narenas_cache = tsd_narenas_cache_get(tsd); + unsigned narenas_actual = narenas_total_get(); + + /* Deallocate old cache if it's too small. */ + if (arenas_cache != NULL && narenas_cache < narenas_actual) { + a0free(arenas_cache); + arenas_cache = NULL; + narenas_cache = 0; + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); + } + + /* Allocate cache if it's missing. */ + if (arenas_cache == NULL) { + bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); + assert(ind < narenas_actual || !init_if_missing); + narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + + if (!*arenas_cache_bypassp) { + *arenas_cache_bypassp = true; + arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * + narenas_cache); + *arenas_cache_bypassp = false; + } else + arenas_cache = NULL; + if (arenas_cache == NULL) { + /* + * This function must always tell the truth, even if + * it's slow, so don't let OOM or recursive allocation + * avoidance (note arenas_cache_bypass check) get in the + * way. + */ + if (ind >= narenas_actual) + return (NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); + return (arena); + } + tsd_arenas_cache_set(tsd, arenas_cache); + tsd_narenas_cache_set(tsd, narenas_cache); + } + + /* + * Copy to cache. It's possible that the actual number of arenas has + * increased since narenas_total_get() was called above, but that causes + * no correctness issues unless two threads concurrently execute the + * arenas.extend mallctl, which we trust mallctl synchronization to + * prevent. + */ + malloc_mutex_lock(&arenas_lock); + memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); + malloc_mutex_unlock(&arenas_lock); + if (narenas_cache > narenas_actual) { + memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * + (narenas_cache - narenas_actual)); + } + + /* Read the refreshed cache, and init the arena if necessary. */ + arena = arenas_cache[ind]; + if (init_if_missing && arena == NULL) + arena = arenas_cache[ind] = arena_init(ind); + return (arena); +} + +/* Slow path, called only by arena_choose(). */ +arena_t * +arena_choose_hard(tsd_t *tsd) { arena_t *ret; @@ -182,7 +443,7 @@ choose_arena_hard(tsd_t *tsd) choose = 0; first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); + assert(a0get() != NULL); for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* @@ -215,20 +476,20 @@ choose_arena_hard(tsd_t *tsd) ret = arenas[choose]; } else { /* Initialize a new arena. */ - ret = arenas_extend(first_null); + choose = first_null; + ret = arena_init(choose); + if (ret == NULL) { + malloc_mutex_unlock(&arenas_lock); + return (NULL); + } } - ret->nthreads++; + arena_bind_locked(tsd, choose); malloc_mutex_unlock(&arenas_lock); } else { - ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); + ret = a0get(); + arena_bind(tsd, 0); } - if (tsd_nominal(tsd)) - tsd_arena_set(tsd, ret); - return (ret); } @@ -248,6 +509,33 @@ thread_deallocated_cleanup(tsd_t *tsd) void arena_cleanup(tsd_t *tsd) +{ + arena_t *arena; + + arena = tsd_arena_get(tsd); + if (arena != NULL) + arena_unbind(tsd, arena->ind); +} + +void +arenas_cache_cleanup(tsd_t *tsd) +{ + arena_t **arenas_cache; + + arenas_cache = tsd_arenas_cache_get(tsd); + if (arenas != NULL) + a0free(arenas_cache); +} + +void +narenas_cache_cleanup(tsd_t *tsd) +{ + + /* Do nothing. */ +} + +void +arenas_cache_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ @@ -312,44 +600,6 @@ malloc_ncpus(void) return ((result == -1) ? 1 : (unsigned)result); } -void -arenas_cleanup(void *arg) -{ - arena_t *arena = *(arena_t **)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - -JEMALLOC_ALWAYS_INLINE_C void -malloc_thread_init(void) -{ - - /* - * TSD initialization can't be safely done as a side effect of - * deallocation, because it is possible for a thread to do nothing but - * deallocate its TLS data via free(), in which case writing to TLS - * would cause write-after-free memory corruption. The quarantine - * facility *only* gets used as a side effect of deallocation, so make - * a best effort attempt at initializing its TSD by hooking all - * allocation events. - */ - if (config_fill && unlikely(opt_quarantine)) - quarantine_alloc_hook(); -} - -JEMALLOC_ALWAYS_INLINE_C bool -malloc_init(void) -{ - - if (unlikely(!malloc_initialized) && malloc_init_hard()) - return (true); - malloc_thread_init(); - - return (false); -} - static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p) @@ -745,7 +995,7 @@ malloc_init_hard(void) #endif malloc_initializer = INITIALIZER; - if (malloc_tsd_boot()) { + if (malloc_tsd_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -809,10 +1059,10 @@ malloc_init_hard(void) /* * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). + * arena_choose_hard(). */ - arenas_extend(0); - if (arenas[0] == NULL) { + a0 = arena_init(0); + if (a0 == NULL) { malloc_mutex_unlock(&init_lock); return (true); } @@ -887,6 +1137,7 @@ malloc_init_hard(void) malloc_initialized = true; malloc_mutex_unlock(&init_lock); + malloc_tsd_boot1(); return (false); } @@ -1428,8 +1679,8 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) = * Begin non-standard functions. */ -JEMALLOC_ALWAYS_INLINE_C void -imallocx_flags_decode_hard(size_t size, int flags, size_t *usize, +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) { @@ -1444,16 +1695,19 @@ imallocx_flags_decode_hard(size_t size, int flags, size_t *usize, if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); *try_tcache = false; - *arena = arenas[arena_ind]; + *arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(*arena == NULL)) + return (true); } else { *try_tcache = true; *arena = NULL; } + return (false); } -JEMALLOC_ALWAYS_INLINE_C void -imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, - bool *zero, bool *try_tcache, arena_t **arena) +JEMALLOC_ALWAYS_INLINE_C bool +imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, + size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena) { if (likely(flags == 0)) { @@ -1463,9 +1717,10 @@ imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment, *zero = false; *try_tcache = true; *arena = NULL; + return (false); } else { - imallocx_flags_decode_hard(size, flags, usize, alignment, zero, - try_tcache, arena); + return (imallocx_flags_decode_hard(tsd, size, flags, usize, + alignment, zero, try_tcache, arena)); } } @@ -1524,8 +1779,9 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) arena_t *arena; prof_tctx_t *tctx; - imallocx_flags_decode(size, flags, usize, &alignment, &zero, - &try_tcache, &arena); + if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment, + &zero, &try_tcache, &arena))) + return (NULL); tctx = prof_alloc_prep(tsd, *usize, true); if (likely((uintptr_t)tctx == (uintptr_t)1U)) { p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment, @@ -1558,8 +1814,9 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) return (imalloc(tsd, size)); } - imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero, - &try_tcache, &arena); + if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, + &alignment, &zero, &try_tcache, &arena))) + return (NULL); return (imallocx_flags(tsd, *usize, alignment, zero, try_tcache, arena)); } @@ -1685,9 +1942,10 @@ je_rallocx(void *ptr, size_t size, int flags) arena_chunk_t *chunk; try_tcache_alloc = false; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache_dalloc = (chunk == ptr || chunk->arena != - arenas[arena_ind]); - arena = arenas[arena_ind]; + arena = arena_get(tsd, arena_ind, true, true); + if (unlikely(arena == NULL)) + goto label_oom; + try_tcache_dalloc = (chunk == ptr || chunk->arena != arena); } else { try_tcache_alloc = true; try_tcache_dalloc = true; @@ -1825,6 +2083,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); + // XX Dangerous arenas read. arena = arenas[arena_ind]; } else arena = NULL; @@ -1875,16 +2134,24 @@ je_sallocx(const void *ptr, int flags) void je_dallocx(void *ptr, int flags) { + tsd_t *tsd; bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); + arena_t *arena = arena_get(tsd, arena_ind, true, true); + /* + * If arena is NULL, the application passed an arena that has + * never been used before, which is unsupported during + * deallocation. + */ + assert(arena != NULL); + try_tcache = (chunk == ptr || chunk->arena != arena); } else try_tcache = true; @@ -1908,6 +2175,7 @@ inallocx(size_t size, int flags) void je_sdallocx(void *ptr, size_t size, int flags) { + tsd_t *tsd; bool try_tcache; size_t usize; @@ -1916,16 +2184,22 @@ je_sdallocx(void *ptr, size_t size, int flags) usize = inallocx(size, flags); assert(usize == isalloc(ptr, config_prof)); + tsd = tsd_fetch(); if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - try_tcache = (chunk == ptr || chunk->arena != - arenas[arena_ind]); + arena_t *arena = arena_get(tsd, arena_ind, true, true); + /* + * If arena is NULL, the application passed an arena that has + * never been used before, which is unsupported during + * deallocation. + */ + try_tcache = (chunk == ptr || chunk->arena != arena); } else try_tcache = true; UTRACE(ptr, 0, 0); - isfree(tsd_fetch(), ptr, usize, try_tcache); + isfree(tsd, ptr, usize, try_tcache); } size_t @@ -2105,55 +2379,3 @@ jemalloc_postfork_child(void) } /******************************************************************************/ -/* - * The following functions are used for TLS allocation/deallocation in static - * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() - * is that these avoid accessing TLS variables. - */ - -static void * -a0alloc(size_t size, bool zero) -{ - - if (unlikely(malloc_init())) - return (NULL); - - if (size == 0) - size = 1; - - if (size <= arena_maxclass) - return (arena_malloc(NULL, arenas[0], size, zero, false)); - else - return (huge_malloc(NULL, arenas[0], size, zero)); -} - -void * -a0malloc(size_t size) -{ - - return (a0alloc(size, false)); -} - -void * -a0calloc(size_t num, size_t size) -{ - - return (a0alloc(num * size, true)); -} - -void -a0free(void *ptr) -{ - arena_chunk_t *chunk; - - if (ptr == NULL) - return; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(NULL, chunk, ptr, false); - else - huge_dalloc(ptr); -} - -/******************************************************************************/ diff --git a/src/tcache.c b/src/tcache.c index 2c968c68..1bf70269 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -245,6 +245,14 @@ tcache_arena_associate(tcache_t *tcache, arena_t *arena) tcache->arena = arena; } +void +tcache_arena_reassociate(tcache_t *tcache, arena_t *arena) +{ + + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); +} + void tcache_arena_dissociate(tcache_t *tcache) { @@ -261,13 +269,17 @@ tcache_arena_dissociate(tcache_t *tcache) tcache_t * tcache_get_hard(tsd_t *tsd) { + arena_t *arena; if (!tcache_enabled_get()) { if (tsd_nominal(tsd)) tcache_enabled_set(false); /* Memoize. */ return (NULL); } - return (tcache_create(choose_arena(tsd, NULL))); + arena = arena_choose(tsd, NULL); + if (unlikely(arena == NULL)) + return (NULL); + return (tcache_create(arena)); } tcache_t * diff --git a/src/tsd.c b/src/tsd.c index cbc64e44..59253fe3 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -15,16 +15,14 @@ void * malloc_tsd_malloc(size_t size) { - /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return (arena_malloc(NULL, arenas[0], CACHELINE_CEILING(size), false, - false)); + return (a0malloc(CACHELINE_CEILING(size))); } void malloc_tsd_dalloc(void *wrapper) { - idalloct(NULL, wrapper, false); + a0free(wrapper); } void @@ -106,15 +104,24 @@ MALLOC_TSD } bool -malloc_tsd_boot(void) +malloc_tsd_boot0(void) { ncleanups = 0; - if (tsd_boot()) + if (tsd_boot0()) return (true); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; return (false); } +void +malloc_tsd_boot1(void) +{ + + tsd_boot1(); + *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; +} + #ifdef _WIN32 static BOOL WINAPI _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) diff --git a/test/unit/tsd.c b/test/unit/tsd.c index eb1c5976..b031c484 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -6,6 +6,7 @@ typedef unsigned int data_t; static bool data_cleanup_executed; +malloc_tsd_types(data_, data_t) malloc_tsd_protos(, data_, data_t) void