From 209f2926b8e734317942231332f24b4bfd94587e Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Wed, 26 Apr 2017 18:37:44 -0700 Subject: [PATCH] Header refactoring: tsd - cleanup and dependency breaking. This removes the tsd macros (which are used only for tsd_t in real builds). We break up the circular dependencies involving tsd. We also move all tsd access through getters and setters. This allows us to assert that we only touch data when tsd is in a valid state. We simplify the usages of the x macro trick, removing all the customizability (get/set, init, cleanup), moving the lifetime logic to tsd_init and tsd_cleanup. This lets us make initialization order independent of order within tsd_t. --- include/jemalloc/internal/ckh.h | 2 +- .../internal/jemalloc_internal_externs.h | 2 + .../internal/jemalloc_internal_includes.h | 8 +- include/jemalloc/internal/mutex_externs.h | 2 + include/jemalloc/internal/mutex_inlines.h | 1 + include/jemalloc/internal/mutex_prof.h | 1 + include/jemalloc/internal/prof_inlines_b.h | 2 +- include/jemalloc/internal/rtree_ctx.h | 22 + include/jemalloc/internal/rtree_externs.h | 2 - include/jemalloc/internal/rtree_structs.h | 21 - include/jemalloc/internal/rtree_types.h | 4 - include/jemalloc/internal/rtree_witness.h | 19 + include/jemalloc/internal/stats.h | 3 + include/jemalloc/internal/ticker.h | 2 + include/jemalloc/internal/tsd.h | 298 ++++++++++ include/jemalloc/internal/tsd_externs.h | 20 - include/jemalloc/internal/tsd_generic.h | 160 ++++++ include/jemalloc/internal/tsd_inlines.h | 121 ---- .../internal/tsd_malloc_thread_cleanup.h | 60 ++ include/jemalloc/internal/tsd_structs.h | 114 ---- include/jemalloc/internal/tsd_tls.h | 59 ++ include/jemalloc/internal/tsd_types.h | 541 +----------------- include/jemalloc/internal/tsd_win.h | 139 +++++ src/jemalloc.c | 8 +- src/prof.c | 10 +- src/rtree.c | 7 - src/tcache.c | 5 +- src/tsd.c | 73 ++- test/unit/tsd.c | 77 ++- 29 files changed, 870 insertions(+), 913 deletions(-) create mode 100644 include/jemalloc/internal/rtree_ctx.h create mode 100644 include/jemalloc/internal/rtree_witness.h create mode 100644 include/jemalloc/internal/tsd.h delete mode 100644 include/jemalloc/internal/tsd_externs.h create mode 100644 include/jemalloc/internal/tsd_generic.h delete mode 100644 include/jemalloc/internal/tsd_inlines.h create mode 100644 include/jemalloc/internal/tsd_malloc_thread_cleanup.h delete mode 100644 include/jemalloc/internal/tsd_structs.h create mode 100644 include/jemalloc/internal/tsd_tls.h create mode 100644 include/jemalloc/internal/tsd_win.h diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 96922e04..7b3850bc 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -1,7 +1,7 @@ #ifndef JEMALLOC_INTERNAL_CKH_H #define JEMALLOC_INTERNAL_CKH_H -#include "jemalloc/internal/tsd_types.h" +#include "jemalloc/internal/tsd.h" /* Cuckoo hashing implementation. Skip to the end for the interface. */ diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index 45c119f8..9a431fc1 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -2,6 +2,8 @@ #define JEMALLOC_INTERNAL_EXTERNS_H #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd_types.h" /* TSD checks this to set thread local slow state accordingly. */ extern bool malloc_slow; diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index 340cb1ce..84917a70 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -26,8 +26,8 @@ * dependency information into the header files (i.e. we still rely on the * ordering in this file to ensure all a header's dependencies are available in * its translation unit). Each component is now broken up into multiple header - * files, corresponding to the sections above (e.g. instead of "tsd.h", we now - * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h"). + * files, corresponding to the sections above (e.g. instead of "foo.h", we now + * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h"). * * Those files which have been converted to explicitly include their * inter-component dependencies are now in the initial HERMETIC HEADERS @@ -42,7 +42,6 @@ #include "jemalloc/internal/witness_types.h" #include "jemalloc/internal/mutex_types.h" -#include "jemalloc/internal/tsd_types.h" #include "jemalloc/internal/extent_types.h" #include "jemalloc/internal/extent_dss_types.h" #include "jemalloc/internal/base_types.h" @@ -65,7 +64,6 @@ #include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/tsd_structs.h" /******************************************************************************/ /* EXTERNS */ @@ -83,13 +81,11 @@ #include "jemalloc/internal/large_externs.h" #include "jemalloc/internal/tcache_externs.h" #include "jemalloc/internal/prof_externs.h" -#include "jemalloc/internal/tsd_externs.h" /******************************************************************************/ /* INLINES */ /******************************************************************************/ -#include "jemalloc/internal/tsd_inlines.h" #include "jemalloc/internal/witness_inlines.h" #include "jemalloc/internal/mutex_inlines.h" #include "jemalloc/internal/jemalloc_internal_inlines_a.h" diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h index 5199d3cf..8e40cb34 100644 --- a/include/jemalloc/internal/mutex_externs.h +++ b/include/jemalloc/internal/mutex_externs.h @@ -1,6 +1,8 @@ #ifndef JEMALLOC_INTERNAL_MUTEX_EXTERNS_H #define JEMALLOC_INTERNAL_MUTEX_EXTERNS_H +#include "jemalloc/internal/tsd_types.h" + #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; #else diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h index 6da21cf6..babe8d3a 100644 --- a/include/jemalloc/internal/mutex_inlines.h +++ b/include/jemalloc/internal/mutex_inlines.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_MUTEX_INLINES_H #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/tsd_types.h" void malloc_mutex_lock_slow(malloc_mutex_t *mutex); diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h index 50c0af0a..f7301c88 100644 --- a/include/jemalloc/internal/mutex_prof.h +++ b/include/jemalloc/internal/mutex_prof.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/tsd_types.h" #define MUTEX_PROF_GLOBAL_MUTEXES \ OP(ctl) \ diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h index 8cdea615..fba7b998 100644 --- a/include/jemalloc/internal/prof_inlines_b.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -96,7 +96,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, } return true; } else { - if (tsd->reentrancy_level > 0) { + if (tsd_reentrancy_level_get(tsd) > 0) { return true; } /* Compute new sample threshold. */ diff --git a/include/jemalloc/internal/rtree_ctx.h b/include/jemalloc/internal/rtree_ctx.h new file mode 100644 index 00000000..fe2c8bde --- /dev/null +++ b/include/jemalloc/internal/rtree_ctx.h @@ -0,0 +1,22 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H +#define JEMALLOC_INTERNAL_RTREE_CTX_H + +#include "jemalloc/internal/rtree_types.h" + +typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; +struct rtree_ctx_cache_elm_s { + uintptr_t leafkey; + rtree_leaf_elm_t *leaf; +}; + +typedef struct rtree_ctx_s rtree_ctx_t; +struct rtree_ctx_s { + /* Direct mapped cache. */ + rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; + /* L2 LRU cache. */ + rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; +}; + +void rtree_ctx_data_init(rtree_ctx_t *ctx); + +#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */ diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h index c8d1c376..5145c12c 100644 --- a/include/jemalloc/internal/rtree_externs.h +++ b/include/jemalloc/internal/rtree_externs.h @@ -43,7 +43,5 @@ void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree, const rtree_leaf_elm_t *elm); void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree, const rtree_leaf_elm_t *elm); -void rtree_ctx_data_init(rtree_ctx_t *ctx); -bool tsd_rtree_ctx_data_init(tsd_t *tsd); #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */ diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h index 7ff92e61..4418934f 100644 --- a/include/jemalloc/internal/rtree_structs.h +++ b/include/jemalloc/internal/rtree_structs.h @@ -30,15 +30,6 @@ struct rtree_leaf_elm_s { #endif }; -struct rtree_leaf_elm_witness_s { - const rtree_leaf_elm_t *elm; - witness_t witness; -}; - -struct rtree_leaf_elm_witness_tsd_s { - rtree_leaf_elm_witness_t witnesses[RTREE_ELM_ACQUIRE_MAX]; -}; - struct rtree_level_s { /* Number of key bits distinguished by this level. */ unsigned bits; @@ -49,18 +40,6 @@ struct rtree_level_s { unsigned cumbits; }; -struct rtree_ctx_cache_elm_s { - uintptr_t leafkey; - rtree_leaf_elm_t *leaf; -}; - -struct rtree_ctx_s { - /* Direct mapped cache. */ - rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; - /* L2 LRU cache. */ - rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; -}; - struct rtree_s { malloc_mutex_t init_lock; /* Number of elements based on rtree_levels[0].bits. */ diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h index 402f741c..b465086d 100644 --- a/include/jemalloc/internal/rtree_types.h +++ b/include/jemalloc/internal/rtree_types.h @@ -12,11 +12,7 @@ typedef struct rtree_node_elm_s rtree_node_elm_t; typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; -typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t; -typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t; typedef struct rtree_level_s rtree_level_t; -typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; -typedef struct rtree_ctx_s rtree_ctx_t; typedef struct rtree_s rtree_t; /* Number of high insignificant bits. */ diff --git a/include/jemalloc/internal/rtree_witness.h b/include/jemalloc/internal/rtree_witness.h new file mode 100644 index 00000000..4a136203 --- /dev/null +++ b/include/jemalloc/internal/rtree_witness.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_WITNESS_H +#define JEMALLOC_INTERNAL_RTREE_WITNESS_H + +#include "jemalloc/internal/rtree_types.h" +#include "jemalloc/internal/witness_types.h" +#include "jemalloc/internal/witness_structs.h" + +typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t; +struct rtree_leaf_elm_witness_s { + const rtree_leaf_elm_t *elm; + witness_t witness; +}; + +typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t; +struct rtree_leaf_elm_witness_tsd_s { + rtree_leaf_elm_witness_t witnesses[RTREE_ELM_ACQUIRE_MAX]; +}; + +#endif /* JEMALLOC_INTERNAL_RTREE_WITNESS_H */ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index fd98422d..385a8514 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -2,6 +2,9 @@ #define JEMALLOC_INTERNAL_STATS_H #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/mutex_types.h" +#include "jemalloc/internal/mutex_structs.h" #include "jemalloc/internal/size_classes.h" /* The opt.stats_print storage. */ diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h index faaac91d..572b9645 100644 --- a/include/jemalloc/internal/ticker.h +++ b/include/jemalloc/internal/ticker.h @@ -1,6 +1,8 @@ #ifndef JEMALLOC_INTERNAL_TICKER_H #define JEMALLOC_INTERNAL_TICKER_H +#include "jemalloc/internal/util.h" + /** * A ticker makes it easy to count-down events until some limit. You * ticker_init the ticker to trigger every nticks events. You then notify it diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h new file mode 100644 index 00000000..3d6576b4 --- /dev/null +++ b/include/jemalloc/internal/tsd.h @@ -0,0 +1,298 @@ +#ifndef JEMALLOC_INTERNAL_TSD_H +#define JEMALLOC_INTERNAL_TSD_H + +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/prof_types.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/rtree_ctx.h" +#include "jemalloc/internal/rtree_witness.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness_types.h" +#include "jemalloc/internal/witness_structs.h" + +/* + * Thread-Specific-Data layout + * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- + * s: state + * e: tcache_enabled + * m: thread_allocated (config_stats) + * f: thread_deallocated (config_stats) + * p: prof_tdata (config_prof) + * c: rtree_ctx (rtree cache accessed on deallocation) + * t: tcache + * --- data not accessed on tcache fast path: arena-related fields --- + * d: arenas_tdata_bypass + * r: reentrancy_level + * x: narenas_tdata + * i: iarena + * a: arena + * o: arenas_tdata + * Loading TSD data is on the critical path of basically all malloc operations. + * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. + * Use a compact layout to reduce cache footprint. + * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ + * |---------------------------- 1st cacheline ----------------------------| + * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | + * |---------------------------- 2nd cacheline ----------------------------| + * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | + * |---------------------------- 3nd cacheline ----------------------------| + * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | + * +-------------------------------------------------------------------------+ + * Note: the entire tcache is embedded into TSD and spans multiple cachelines. + * + * The last 3 members (i, a and o) before tcache isn't really needed on tcache + * fast path. However we have a number of unused tcache bins and witnesses + * (never touched unless config_debug) at the end of tcache, so we place them + * there to avoid breaking the cachelines and possibly paging in an extra page. + */ +#ifdef JEMALLOC_JET +typedef void (*test_callback_t)(int *); +# define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10 +# define MALLOC_TEST_TSD \ + O(test_data, int) \ + O(test_callback, test_callback_t) +# define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL +#else +# define MALLOC_TEST_TSD +# define MALLOC_TEST_TSD_INITIALIZER +#endif + +#define MALLOC_TSD \ +/* O(name, type) */ \ + O(tcache_enabled, bool) \ + O(arenas_tdata_bypass, bool) \ + O(reentrancy_level, int8_t) \ + O(narenas_tdata, uint32_t) \ + O(thread_allocated, uint64_t) \ + O(thread_deallocated, uint64_t) \ + O(prof_tdata, prof_tdata_t *) \ + O(rtree_ctx, rtree_ctx_t) \ + O(iarena, arena_t *) \ + O(arena, arena_t *) \ + O(arenas_tdata, arena_tdata_t *) \ + O(tcache, tcache_t) \ + O(witnesses, witness_list_t) \ + O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t) \ + O(witness_fork, bool) \ + MALLOC_TEST_TSD + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + TCACHE_ENABLED_ZERO_INITIALIZER, \ + false, \ + 0, \ + 0, \ + 0, \ + 0, \ + NULL, \ + RTREE_CTX_ZERO_INITIALIZER, \ + NULL, \ + NULL, \ + NULL, \ + TCACHE_ZERO_INITIALIZER, \ + ql_head_initializer(witnesses), \ + RTREE_ELM_WITNESS_TSD_INITIALIZER, \ + false \ + MALLOC_TEST_TSD_INITIALIZER \ +} + +enum { + tsd_state_nominal = 0, /* Common case --> jnz. */ + tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ + /* the above 2 nominal states should be lower values. */ + tsd_state_nominal_max = 1, /* used for comparison only. */ + tsd_state_purgatory = 2, + tsd_state_reincarnated = 3, + tsd_state_uninitialized = 4 +}; + +/* Manually limit tsd_state_t to a single byte. */ +typedef uint8_t tsd_state_t; + +/* The actual tsd. */ +typedef struct tsd_s tsd_t; +struct tsd_s { + /* + * The contents should be treated as totally opaque outside the tsd + * module. Access any thread-local state through the getters and + * setters below. + */ + tsd_state_t state; +#define O(n, t) \ + t use_a_getter_or_setter_instead_##n; +MALLOC_TSD +#undef O +}; + +/* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion + * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be + * explicitly converted to tsd_t, which is non-nullable. + */ +typedef struct tsdn_s tsdn_t; +struct tsdn_s { + tsd_t tsd; +}; +#define TSDN_NULL ((tsdn_t *)0) + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +bool tsd_data_init(void *arg); +void tsd_cleanup(void *arg); +tsd_t *tsd_fetch_slow(tsd_t *tsd); +void tsd_slow_update(tsd_t *tsd); + +/* + * We put the platform-specific data declarations and inlines into their own + * header files to avoid cluttering this file. They define tsd_boot0, + * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set. + */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#include "jemalloc/internal/tsd_malloc_thread_cleanup.h" +#elif (defined(JEMALLOC_TLS)) +#include "jemalloc/internal/tsd_tls.h" +#elif (defined(_WIN32)) +#include "jemalloc/internal/tsd_win.h" +#else +#include "jemalloc/internal/tsd_generic.h" +#endif + +/* + * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of + * foo. This omits some safety checks, and so can be used during tsd + * initialization and cleanup. + */ +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get_unsafe(tsd_t *tsd) { \ + return &tsd->use_a_getter_or_setter_instead_##n; \ +} +MALLOC_TSD +#undef O + +/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */ +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) { \ + assert(tsd->state == tsd_state_nominal || \ + tsd->state == tsd_state_nominal_slow || \ + tsd->state == tsd_state_reincarnated); \ + return tsd_##n##p_get_unsafe(tsd); \ +} +MALLOC_TSD +#undef O + +/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */ +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) { \ + return *tsd_##n##p_get(tsd); \ +} +MALLOC_TSD +#undef O + +/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */ +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t val) { \ + *tsd_##n##p_get(tsd) = val; \ +} +MALLOC_TSD +#undef O + +JEMALLOC_ALWAYS_INLINE void +tsd_assert_fast(tsd_t *tsd) { + assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && + tsd_reentrancy_level_get(tsd) == 0); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_fast(tsd_t *tsd) { + bool fast = (tsd->state == tsd_state_nominal); + if (fast) { + tsd_assert_fast(tsd); + } + + return fast; +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch_impl(bool init) { + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) { + return NULL; + } + assert(tsd != NULL); + + if (unlikely(tsd->state != tsd_state_nominal)) { + return tsd_fetch_slow(tsd); + } + assert(tsd_fast(tsd)); + tsd_assert_fast(tsd); + + return tsd; +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) { + return tsd_fetch_impl(true); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) { + return (tsdn_t *)tsd; +} + +static inline bool +tsd_nominal(tsd_t *tsd) { + return (tsd->state <= tsd_state_nominal_max); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) { + if (!tsd_booted_get()) { + return NULL; + } + + return tsd_tsdn(tsd_fetch_impl(false)); +} + +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) { + return tsdn == NULL; +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsdn_tsd(tsdn_t *tsdn) { + assert(!tsdn_null(tsdn)); + + return &tsdn->tsd; +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsd_rtree_ctx(tsd_t *tsd) { + return tsd_rtree_ctxp_get(tsd); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) { + /* + * If tsd cannot be accessed, initialize the fallback rtree_ctx and + * return a pointer to it. + */ + if (unlikely(tsdn_null(tsdn))) { + rtree_ctx_data_init(fallback); + return fallback; + } + return tsd_rtree_ctx(tsdn_tsd(tsdn)); +} + +#endif /* JEMALLOC_INTERNAL_TSD_H */ diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h deleted file mode 100644 index 6b9dfdc6..00000000 --- a/include/jemalloc/internal/tsd_externs.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TSD_EXTERNS_H -#define JEMALLOC_INTERNAL_TSD_EXTERNS_H - -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_cleanup_register(bool (*f)(void)); -tsd_t *malloc_tsd_boot0(void); -void malloc_tsd_boot1(void); -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -void *tsd_init_check_recursion(tsd_init_head_t *head, - tsd_init_block_t *block); -void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); -#endif -bool tsd_data_init(void *arg); -void tsd_cleanup(void *arg); -tsd_t *tsd_fetch_slow(tsd_t *tsd); -void tsd_slow_update(tsd_t *tsd); - -#endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */ diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h new file mode 100644 index 00000000..d59cb743 --- /dev/null +++ b/include/jemalloc/internal/tsd_generic.h @@ -0,0 +1,160 @@ +#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_GENERIC_H + +typedef struct tsd_init_block_s tsd_init_block_t; +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; + +typedef struct tsd_init_head_s tsd_init_head_t; +struct tsd_init_head_s { + ql_head(tsd_init_block_t) blocks; + malloc_mutex_t lock; +}; + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); + +extern pthread_key_t tsd_tsd; +extern tsd_init_head_t tsd_init_head; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE void +tsd_cleanup_wrapper(void *arg) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg; + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) + { + malloc_write(": Error setting TSD\n"); + if (opt_abort) { + abort(); + } + } + return; + } + } + malloc_tsd_dalloc(wrapper); +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) { + malloc_write(": Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd); + + if (init && unlikely(wrapper == NULL)) { + tsd_init_block_t block; + wrapper = (tsd_wrapper_t *) + tsd_init_check_recursion(&tsd_init_head, &block); + if (wrapper) { + return wrapper; + } + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + block.data = (void *)wrapper; + if (wrapper == NULL) { + malloc_write(": Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + } + tsd_wrapper_set(wrapper); + tsd_init_finish(&tsd_init_head, &block); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) { + return true; + } + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write(": Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h deleted file mode 100644 index f0f77e48..00000000 --- a/include/jemalloc/internal/tsd_inlines.h +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TSD_INLINES_H -#define JEMALLOC_INTERNAL_TSD_INLINES_H - -malloc_tsd_externs(, tsd_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) - -#define MALLOC_TSD_getset_yes(n, t) \ -JEMALLOC_ALWAYS_INLINE t \ -tsd_##n##_get(tsd_t *tsd) { \ - return *tsd_##n##p_get(tsd); \ -} \ -JEMALLOC_ALWAYS_INLINE void \ -tsd_##n##_set(tsd_t *tsd, t n) { \ - assert(tsd->state == tsd_state_nominal || \ - tsd->state == tsd_state_nominal_slow || \ - tsd->state == tsd_state_reincarnated); \ - tsd->n = n; \ -} -#define MALLOC_TSD_getset_no(n, t) -#define O(n, t, gs, i, c) \ -JEMALLOC_ALWAYS_INLINE t * \ -tsd_##n##p_get(tsd_t *tsd) { \ - return &tsd->n; \ -} \ - \ -MALLOC_TSD_getset_##gs(n, t) -MALLOC_TSD -#undef MALLOC_TSD_getset_yes -#undef MALLOC_TSD_getset_no -#undef O - -JEMALLOC_ALWAYS_INLINE bool -tsd_assert_fast(tsd_t *tsd) { - assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && - tsd_reentrancy_level_get(tsd) == 0); - return true; -} - -JEMALLOC_ALWAYS_INLINE bool -tsd_fast(tsd_t *tsd) { - bool fast = (tsd->state == tsd_state_nominal); - if (fast) { - tsd_assert_fast(tsd); - } - - return fast; -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch_impl(bool init) { - tsd_t *tsd = tsd_get(init); - - if (!init && tsd_get_allocates() && tsd == NULL) { - return NULL; - } - assert(tsd != NULL); - - if (unlikely(tsd->state != tsd_state_nominal)) { - return tsd_fetch_slow(tsd); - } - assert(tsd_fast(tsd)); - tsd_assert_fast(tsd); - - return tsd; -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) { - return tsd_fetch_impl(true); -} - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsd_tsdn(tsd_t *tsd) { - return (tsdn_t *)tsd; -} - -static inline bool -tsd_nominal(tsd_t *tsd) { - return (tsd->state <= tsd_state_nominal_max); -} - -JEMALLOC_ALWAYS_INLINE tsdn_t * -tsdn_fetch(void) { - if (!tsd_booted_get()) { - return NULL; - } - - return tsd_tsdn(tsd_fetch_impl(false)); -} - -JEMALLOC_ALWAYS_INLINE bool -tsdn_null(const tsdn_t *tsdn) { - return tsdn == NULL; -} - -JEMALLOC_ALWAYS_INLINE tsd_t * -tsdn_tsd(tsdn_t *tsdn) { - assert(!tsdn_null(tsdn)); - - return &tsdn->tsd; -} - -JEMALLOC_ALWAYS_INLINE rtree_ctx_t * -tsd_rtree_ctx(tsd_t *tsd) { - return tsd_rtree_ctxp_get(tsd); -} - -JEMALLOC_ALWAYS_INLINE rtree_ctx_t * -tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) { - /* - * If tsd cannot be accessed, initialize the fallback rtree_ctx and - * return a pointer to it. - */ - if (unlikely(tsdn_null(tsdn))) { - rtree_ctx_data_init(fallback); - return fallback; - } - return tsd_rtree_ctx(tsdn_tsd(tsdn)); -} - -#endif /* JEMALLOC_INTERNAL_TSD_INLINES_H */ diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h new file mode 100644 index 00000000..beb467a6 --- /dev/null +++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h @@ -0,0 +1,60 @@ +#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H + +extern __thread tsd_t tsd_tls; +extern __thread bool tsd_initialized; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + if (tsd_initialized) { + tsd_initialized = false; + tsd_cleanup(&tsd_tls); + } + return tsd_initialized; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + assert(tsd_booted); + return &tsd_tls; +} +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + tsd_initialized = true; +} diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h deleted file mode 100644 index 40fea97b..00000000 --- a/include/jemalloc/internal/tsd_structs.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H -#define JEMALLOC_INTERNAL_TSD_STRUCTS_H - -#include "jemalloc/internal/ql.h" - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -struct tsd_init_block_s { - ql_elm(tsd_init_block_t) link; - pthread_t thread; - void *data; -}; -struct tsd_init_head_s { - ql_head(tsd_init_block_t) blocks; - malloc_mutex_t lock; -}; -#endif - -/* - * Thread-Specific-Data layout - * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- - * s: state - * e: tcache_enabled - * m: thread_allocated (config_stats) - * f: thread_deallocated (config_stats) - * p: prof_tdata (config_prof) - * c: rtree_ctx (rtree cache accessed on deallocation) - * t: tcache - * --- data not accessed on tcache fast path: arena related fields --- - * d: arenas_tdata_bypass - * r: reentrancy_level - * x: narenas_tdata - * i: iarena - * a: arena - * o: arenas_tdata - * Loading TSD data is on the critical path of basically all malloc operations. - * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. - * Use a compact layout to reduce cache footprint. - * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ - * |---------------------------- 1st cacheline ----------------------------| - * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | - * |---------------------------- 2nd cacheline ----------------------------| - * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | - * |---------------------------- 3nd cacheline ----------------------------| - * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | - * +-------------------------------------------------------------------------+ - * Note: the entire tcache is embedded into TSD and spans multiple cachelines. - * - * The last 3 members (i, a and o) before tcache isn't really needed on tcache - * fast path. However we have a number of unused tcache bins and witnesses - * (never touched unless config_debug) at the end of tcache, so we place them - * there to avoid breaking the cachelines and possibly paging in an extra page. - */ -#define MALLOC_TSD \ -/* O(name, type, [gs]et, init, cleanup) */ \ - O(tcache_enabled, bool, yes, yes, no) \ - O(arenas_tdata_bypass, bool, no, no, no) \ - O(reentrancy_level, int8_t, yes, no, no) \ - O(narenas_tdata, uint32_t, yes, no, no) \ - O(thread_allocated, uint64_t, yes, no, no) \ - O(thread_deallocated, uint64_t, yes, no, no) \ - O(prof_tdata, prof_tdata_t *, yes, no, yes) \ - O(rtree_ctx, rtree_ctx_t, no, yes, no) \ - O(iarena, arena_t *, yes, no, yes) \ - O(arena, arena_t *, yes, no, yes) \ - O(arenas_tdata, arena_tdata_t *,yes, no, yes) \ - O(tcache, tcache_t, no, no, yes) \ - O(witnesses, witness_list_t, no, no, yes) \ - O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t, \ - no, no, no) \ - O(witness_fork, bool, yes, no, no) - -#define TSD_INITIALIZER { \ - tsd_state_uninitialized, \ - TCACHE_ENABLED_ZERO_INITIALIZER, \ - false, \ - 0, \ - 0, \ - 0, \ - 0, \ - NULL, \ - RTREE_CTX_ZERO_INITIALIZER, \ - NULL, \ - NULL, \ - NULL, \ - TCACHE_ZERO_INITIALIZER, \ - ql_head_initializer(witnesses), \ - RTREE_ELM_WITNESS_TSD_INITIALIZER, \ - false \ -} - -struct tsd_s { - tsd_state_t state; -#define O(n, t, gs, i, c) \ - t n; -MALLOC_TSD -#undef O -}; - -/* - * Wrapper around tsd_t that makes it possible to avoid implicit conversion - * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be - * explicitly converted to tsd_t, which is non-nullable. - */ -struct tsdn_s { - tsd_t tsd; -}; - -static const tsd_t tsd_initializer = TSD_INITIALIZER; -UNUSED static const void *malloc_tsd_no_cleanup = (void *)0; - -malloc_tsd_types(, tsd_t) - -#endif /* JEMALLOC_INTERNAL_TSD_STRUCTS_H */ diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h new file mode 100644 index 00000000..757aaa0e --- /dev/null +++ b/include/jemalloc/internal/tsd_tls.h @@ -0,0 +1,59 @@ +#ifdef JEMALLOC_INTERNAL_TSD_TLS_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_TLS_H + +extern __thread tsd_t tsd_tls; +extern pthread_key_t tsd_tsd; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) { + return true; + } + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + assert(tsd_booted); + return &tsd_tls; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) { + malloc_write(": Error setting tsd.\n"); + if (opt_abort) { + abort(); + } + } +} diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h index dc9efbb6..6200af61 100644 --- a/include/jemalloc/internal/tsd_types.h +++ b/include/jemalloc/internal/tsd_types.h @@ -1,549 +1,10 @@ #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H #define JEMALLOC_INTERNAL_TSD_TYPES_H -#include "jemalloc/internal/ql.h" - -/* Maximum number of malloc_tsd users with cleanup functions. */ #define MALLOC_TSD_CLEANUPS_MAX 2 -typedef bool (*malloc_tsd_cleanup_t)(void); - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -typedef struct tsd_init_block_s tsd_init_block_t; -typedef struct tsd_init_head_s tsd_init_head_t; -#endif - typedef struct tsd_s tsd_t; typedef struct tsdn_s tsdn_t; - -#define TSDN_NULL ((tsdn_t *)0) - -enum { - tsd_state_nominal = 0, /* Common case --> jnz. */ - tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ - /* the above 2 nominal states should be lower values. */ - tsd_state_nominal_max = 1, /* used for comparison only. */ - tsd_state_purgatory = 2, - tsd_state_reincarnated = 3, - tsd_state_uninitialized = 4 -}; - -/* Manually limit tsd_state_t to a single byte. */ -typedef uint8_t tsd_state_t; - -/* - * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are five macros that support (at least) three use cases: file-private, - * library-private, and library-private inlined. Following is an example - * library-private tsd variable: - * - * In example.h: - * typedef struct { - * int x; - * int y; - * } example_t; - * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_types(example_, example_t) - * malloc_tsd_protos(, example_, example_t) - * malloc_tsd_externs(example_, example_t) - * In example.c: - * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, - * example_tsd_cleanup) - * - * The result is a set of generated functions, e.g.: - * - * bool example_tsd_boot(void) {...} - * bool example_tsd_booted_get(void) {...} - * example_t *example_tsd_get(bool init) {...} - * void example_tsd_set(example_t *val) {...} - * - * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike - * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast the - * function argument to (a_type *), then dereference the resulting pointer to - * access fields, e.g. - * - * void - * example_tsd_cleanup(void *arg) - * { - * example_t *example = (example_t *)arg; - * - * example->x = 42; - * [...] - * if ([want the cleanup function to be called again]) - * example_tsd_set(example); - * } - * - * If example_tsd_set() is called within example_tsd_cleanup(), it will be - * called again. This is similar to how pthreads TSD destruction works, except - * that pthreads only calls the cleanup function again if the value was set to - * non-NULL. - */ - -/* malloc_tsd_types(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_types(a_name, a_type) -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_types(a_name, a_type) -#elif (defined(_WIN32)) -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#else -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; -#endif - -/* malloc_tsd_protos(). */ -#define malloc_tsd_protos(a_attr, a_name, a_type) \ -a_attr bool \ -a_name##tsd_boot0(void); \ -a_attr void \ -a_name##tsd_boot1(void); \ -a_attr bool \ -a_name##tsd_boot(void); \ -a_attr bool \ -a_name##tsd_booted_get(void); \ -a_attr a_type * \ -a_name##tsd_get(bool init); \ -a_attr void \ -a_name##tsd_set(a_type *val); - -/* malloc_tsd_externs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern __thread bool a_name##tsd_initialized; \ -extern bool a_name##tsd_booted; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern pthread_key_t a_name##tsd_tsd; \ -extern bool a_name##tsd_booted; -#elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##tsd_tsd; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#else -#define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##tsd_tsd; \ -extern tsd_init_head_t a_name##tsd_init_head; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#endif - -/* malloc_tsd_data(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##tsd_initialized = false; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(_WIN32)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##tsd_tsd; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#else -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr tsd_init_head_t a_name##tsd_init_head = { \ - ql_head_initializer(blocks), \ - MALLOC_MUTEX_INITIALIZER \ -}; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#endif - -/* malloc_tsd_funcs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) { \ - if (a_name##tsd_initialized) { \ - a_name##tsd_initialized = false; \ - a_cleanup(&a_name##tsd_tls); \ - } \ - return a_name##tsd_initialized; \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) { \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_booted = true; \ - return false; \ -} \ -a_attr void \ -a_name##tsd_boot1(void) { \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) { \ - return a_name##tsd_boot0(); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) { \ - return a_name##tsd_booted; \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) { \ - return false; \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) { \ - assert(a_name##tsd_booted); \ - return &a_name##tsd_tls; \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) { \ - assert(a_name##tsd_booted); \ - if (likely(&a_name##tsd_tls != val)) { \ - a_name##tsd_tls = (*val); \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - a_name##tsd_initialized = true; \ - } \ -} -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_boot0(void) { \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ - 0) { \ - return true; \ - } \ - } \ - a_name##tsd_booted = true; \ - return false; \ -} \ -a_attr void \ -a_name##tsd_boot1(void) { \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) { \ - return a_name##tsd_boot0(); \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) { \ - return a_name##tsd_booted; \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) { \ - return false; \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) { \ - assert(a_name##tsd_booted); \ - return &a_name##tsd_tls; \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) { \ - assert(a_name##tsd_booted); \ - if (likely(&a_name##tsd_tls != val)) { \ - a_name##tsd_tls = (*val); \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)(&a_name##tsd_tls))) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) { \ - abort(); \ - } \ - } \ - } \ -} -#elif (defined(_WIN32)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) { \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (wrapper == NULL) { \ - return false; \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - return true; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ - return false; \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) { \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) { \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - } \ - return wrapper; \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) { \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) { \ - return true; \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return false; \ -} \ -a_attr void \ -a_name##tsd_boot1(void) { \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - a_name##tsd_boot_wrapper.initialized = false; \ - a_cleanup(&a_name##tsd_boot_wrapper.val); \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) { \ - if (a_name##tsd_boot0()) { \ - return true; \ - } \ - a_name##tsd_boot1(); \ - return false; \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) { \ - return a_name##tsd_booted; \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) { \ - return true; \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) { \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) { \ - return NULL; \ - } \ - return &wrapper->val; \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) { \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - if (likely(&wrapper->val != val)) { \ - wrapper->val = *(val); \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - wrapper->initialized = true; \ - } \ -} -#else -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr void \ -a_name##tsd_cleanup_wrapper(void *arg) { \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ - \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) { \ - abort(); \ - } \ - } \ - return; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) { \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write(": Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(bool init) { \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - pthread_getspecific(a_name##tsd_tsd); \ - \ - if (init && unlikely(wrapper == NULL)) { \ - tsd_init_block_t block; \ - wrapper = (a_name##tsd_wrapper_t *) \ - tsd_init_check_recursion(&a_name##tsd_init_head, \ - &block); \ - if (wrapper) { \ - return wrapper; \ - } \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - block.data = (void *)wrapper; \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - tsd_init_finish(&a_name##tsd_init_head, &block); \ - } \ - return wrapper; \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) { \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) { \ - return true; \ - } \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return false; \ -} \ -a_attr void \ -a_name##tsd_boot1(void) { \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write(": Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - a_name##tsd_boot_wrapper.initialized = false; \ - a_cleanup(&a_name##tsd_boot_wrapper.val); \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) { \ - if (a_name##tsd_boot0()) { \ - return true; \ - } \ - a_name##tsd_boot1(); \ - return false; \ -} \ -a_attr bool \ -a_name##tsd_booted_get(void) { \ - return a_name##tsd_booted; \ -} \ -a_attr bool \ -a_name##tsd_get_allocates(void) { \ - return true; \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(bool init) { \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(init); \ - if (a_name##tsd_get_allocates() && !init && wrapper == NULL) { \ - return NULL; \ - } \ - return &wrapper->val; \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) { \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(true); \ - if (likely(&wrapper->val != val)) { \ - wrapper->val = *(val); \ - } \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - wrapper->initialized = true; \ - } \ -} -#endif +typedef bool (*malloc_tsd_cleanup_t)(void); #endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */ diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h new file mode 100644 index 00000000..cf30d18e --- /dev/null +++ b/include/jemalloc/internal/tsd_win.h @@ -0,0 +1,139 @@ +#ifdef JEMALLOC_INTERNAL_TSD_WIN_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_WIN_H + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +extern DWORD tsd_tsd; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd); + SetLastError(error); + + if (wrapper == NULL) { + return false; + } + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + return true; + } + } + malloc_tsd_dalloc(wrapper); + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (!TlsSetValue(tsd_tsd, (void *)wrapper)) { + malloc_write(": Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd); + SetLastError(error); + + if (init && unlikely(wrapper == NULL)) { + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write(": Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + /* MSVC is finicky about aggregate initialization. */ + tsd_t tsd_initializer = TSD_INITIALIZER; + wrapper->val = tsd_initializer; + } + tsd_wrapper_set(wrapper); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + tsd_tsd = TlsAlloc(); + if (tsd_tsd == TLS_OUT_OF_INDEXES) { + return true; + } + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write(": Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/src/jemalloc.c b/src/jemalloc.c index 97a64431..b8c94133 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2035,7 +2035,9 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize, JEMALLOC_ALWAYS_INLINE void ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - assert(slow_path || tsd_assert_fast(tsd)); + if (!slow_path) { + tsd_assert_fast(tsd); + } if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } else { @@ -2073,7 +2075,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { JEMALLOC_ALWAYS_INLINE void isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { - assert(slow_path || tsd_assert_fast(tsd)); + if (!slow_path) { + tsd_assert_fast(tsd); + } if (tsd_reentrancy_level_get(tsd) == 0) { witness_assert_lockless(tsd_tsdn(tsd)); } else { diff --git a/src/prof.c b/src/prof.c index 1e818ab4..d60680c1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1642,7 +1642,7 @@ static bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) { cassert(config_prof); - assert(tsd->reentrancy_level == 0); + assert(tsd_reentrancy_level_get(tsd) == 0); prof_tdata_t * tdata = prof_tdata_get(tsd, true); if (tdata == NULL) { @@ -1757,7 +1757,7 @@ prof_fdump(void) { return; } tsd = tsd_fetch(); - assert(tsd->reentrancy_level == 0); + assert(tsd_reentrancy_level_get(tsd) == 0); malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); prof_dump_filename(filename, 'f', VSEQ_INVALID); @@ -1792,7 +1792,7 @@ prof_idump(tsdn_t *tsdn) { return; } tsd = tsdn_tsd(tsdn); - if (tsd->reentrancy_level > 0) { + if (tsd_reentrancy_level_get(tsd) > 0) { return; } @@ -1818,7 +1818,7 @@ prof_idump(tsdn_t *tsdn) { bool prof_mdump(tsd_t *tsd, const char *filename) { cassert(config_prof); - assert(tsd->reentrancy_level == 0); + assert(tsd_reentrancy_level_get(tsd) == 0); if (!opt_prof || !prof_booted) { return true; @@ -1849,7 +1849,7 @@ prof_gdump(tsdn_t *tsdn) { return; } tsd = tsdn_tsd(tsdn); - if (tsd->reentrancy_level > 0) { + if (tsd_reentrancy_level_get(tsd) > 0) { return; } diff --git a/src/rtree.c b/src/rtree.c index ada6e9d5..72786ff5 100644 --- a/src/rtree.c +++ b/src/rtree.c @@ -461,10 +461,3 @@ rtree_ctx_data_init(rtree_ctx_t *ctx) { cache->leaf = NULL; } } - -bool -tsd_rtree_ctx_data_init(tsd_t *tsd) { - rtree_ctx_data_init(&tsd->rtree_ctx); - - return false; -} diff --git a/src/tcache.c b/src/tcache.c index a7e05b17..afb1faa6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -453,15 +453,12 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { /* Initialize auto tcache (embedded in TSD). */ bool tsd_tcache_data_init(tsd_t *tsd) { - tcache_t *tcache = &tsd->tcache; + tcache_t *tcache = tsd_tcachep_get_unsafe(tsd); assert(tcache_small_bin_get(tcache, 0)->avail == NULL); size_t size = stack_nelms * sizeof(void *); /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - /* Manually initialize rcache as we may need it for allocation. */ - tsd_rtree_ctx_data_init(tsd); - void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true, arena_get(TSDN_NULL, 0, true)); if (avail_array == NULL) { diff --git a/src/tsd.c b/src/tsd.c index 686b4ef4..612f7523 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -10,14 +10,38 @@ static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; -malloc_tsd_data(, , tsd_t, TSD_INITIALIZER) +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; +__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false; +bool tsd_booted = false; +#elif (defined(JEMALLOC_TLS)) +__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; +pthread_key_t tsd_tsd; +bool tsd_booted = false; +#elif (defined(_WIN32)) +DWORD tsd_tsd; +tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; +bool tsd_booted = false; +#else +pthread_key_t tsd_tsd; +tsd_init_head_t tsd_init_head = { + ql_head_initializer(blocks), + MALLOC_MUTEX_INITIALIZER +}; +tsd_wrapper_t tsd_boot_wrapper = { + false, + TSD_INITIALIZER +}; +bool tsd_booted = false; +#endif + /******************************************************************************/ void tsd_slow_update(tsd_t *tsd) { if (tsd_nominal(tsd)) { - if (malloc_slow || !tsd->tcache_enabled || + if (malloc_slow || !tsd_tcache_enabled_get(tsd) || tsd_reentrancy_level_get(tsd) > 0) { tsd->state = tsd_state_nominal_slow; } else { @@ -97,20 +121,28 @@ malloc_tsd_cleanup_register(bool (*f)(void)) { bool tsd_data_init(void *arg) { tsd_t *tsd = (tsd_t *)arg; -#define MALLOC_TSD_init_yes(n, t) \ - if (tsd_##n##_data_init(tsd)) { \ - return true; \ + /* + * We initialize the rtree context first (before the tcache), since the + * tcache initialization depends on it. + */ + rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); + + if (tsd_tcache_enabled_data_init(tsd)) { + return true; } -#define MALLOC_TSD_init_no(n, t) -#define O(n, t, gs, i, c) \ - MALLOC_TSD_init_##i(n, t) -MALLOC_TSD -#undef MALLOC_TSD_init_yes -#undef MALLOC_TSD_init_no -#undef O return false; } +static void +tsd_do_data_cleanup(tsd_t *tsd) { + prof_tdata_cleanup(tsd); + iarena_cleanup(tsd); + arena_cleanup(tsd); + arenas_tdata_cleanup(tsd); + tcache_cleanup(tsd); + witnesses_cleanup(tsd); +} + void tsd_cleanup(void *arg) { tsd_t *tsd = (tsd_t *)arg; @@ -127,15 +159,7 @@ tsd_cleanup(void *arg) { * after this destructor was called. Reset state to * tsd_state_purgatory and request another callback. */ -#define MALLOC_TSD_cleanup_yes(n, t) \ - n##_cleanup(tsd); -#define MALLOC_TSD_cleanup_no(n, t) -#define O(n, t, gs, i, c) \ - MALLOC_TSD_cleanup_##c(n, t) -MALLOC_TSD -#undef MALLOC_TSD_cleanup_yes -#undef MALLOC_TSD_cleanup_no -#undef O + tsd_do_data_cleanup(tsd); tsd->state = tsd_state_purgatory; tsd_set(tsd); break; @@ -150,6 +174,13 @@ MALLOC_TSD default: not_reached(); } +#ifdef JEMALLOC_JET + test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); + int *data = tsd_test_datap_get_unsafe(tsd); + if (test_callback != NULL) { + test_callback(data); + } +#endif } tsd_t * diff --git a/test/unit/tsd.c b/test/unit/tsd.c index 38114674..c9a7d809 100644 --- a/test/unit/tsd.c +++ b/test/unit/tsd.c @@ -1,41 +1,29 @@ #include "test/jemalloc_test.h" -#define THREAD_DATA 0x72b65c10 - -typedef unsigned int data_t; - -static bool data_cleanup_executed; -static bool data_test_started; - -malloc_tsd_types(data_, data_t) -malloc_tsd_protos(, data_, data_t) +static int data_cleanup_count; void -data_cleanup(void *arg) { - data_t *data = (data_t *)arg; - - if (!data_test_started) { - return; - } - if (!data_cleanup_executed) { - assert_x_eq(*data, THREAD_DATA, +data_cleanup(int *data) { + if (data_cleanup_count == 0) { + assert_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT, "Argument passed into cleanup function should match tsd " "value"); } - data_cleanup_executed = true; + ++data_cleanup_count; /* * Allocate during cleanup for two rounds, in order to assure that * jemalloc's internal tsd reinitialization happens. */ + bool reincarnate = false; switch (*data) { - case THREAD_DATA: + case MALLOC_TSD_TEST_DATA_INIT: *data = 1; - data_tsd_set(data); + reincarnate = true; break; case 1: *data = 2; - data_tsd_set(data); + reincarnate = true; break; case 2: return; @@ -43,37 +31,35 @@ data_cleanup(void *arg) { not_reached(); } - { + if (reincarnate) { void *p = mallocx(1, 0); assert_ptr_not_null(p, "Unexpeced mallocx() failure"); dallocx(p, 0); } } -malloc_tsd_externs(data_, data_t) -#define DATA_INIT 0x12345678 -malloc_tsd_data(, data_, data_t, DATA_INIT) -malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup) - static void * thd_start(void *arg) { - data_t d = (data_t)(uintptr_t)arg; + int d = (int)(uintptr_t)arg; void *p; - assert_x_eq(*data_tsd_get(true), DATA_INIT, + tsd_t *tsd = tsd_fetch(); + assert_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT, "Initial tsd get should return initialization value"); p = malloc(1); assert_ptr_not_null(p, "Unexpected malloc() failure"); - data_tsd_set(&d); - assert_x_eq(*data_tsd_get(true), d, + tsd_test_data_set(tsd, d); + assert_x_eq(tsd_test_data_get(tsd), d, "After tsd set, tsd get should return value that was set"); d = 0; - assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg, + assert_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg, "Resetting local data should have no effect on tsd"); + tsd_test_callback_set(tsd, &data_cleanup); + free(p); return NULL; } @@ -86,11 +72,15 @@ TEST_END TEST_BEGIN(test_tsd_sub_thread) { thd_t thd; - data_cleanup_executed = false; - thd_create(&thd, thd_start, (void *)THREAD_DATA); + data_cleanup_count = 0; + thd_create(&thd, thd_start, (void *)MALLOC_TSD_TEST_DATA_INIT); thd_join(thd, NULL); - assert_true(data_cleanup_executed, - "Cleanup function should have executed"); + /* + * We reincarnate twice in the data cleanup, so it should execute at + * least 3 times. + */ + assert_x_ge(data_cleanup_count, 3, + "Cleanup function should have executed multiple times."); } TEST_END @@ -103,9 +93,11 @@ thd_start_reincarnated(void *arg) { assert_ptr_not_null(p, "Unexpected malloc() failure"); /* Manually trigger reincarnation. */ - assert_ptr_not_null(tsd->arena, "Should have tsd arena set."); + assert_ptr_not_null(tsd_arena_get(tsd), + "Should have tsd arena set."); tsd_cleanup((void *)tsd); - assert_ptr_null(tsd->arena, "TSD arena should have been cleared."); + assert_ptr_null(*tsd_arenap_get_unsafe(tsd), + "TSD arena should have been cleared."); assert_u_eq(tsd->state, tsd_state_purgatory, "TSD state should be purgatory\n"); @@ -114,12 +106,12 @@ thd_start_reincarnated(void *arg) { "TSD state should be reincarnated\n"); p = mallocx(1, MALLOCX_TCACHE_NONE); assert_ptr_not_null(p, "Unexpected malloc() failure"); - assert_ptr_not_null(tsd->arena, + assert_ptr_not_null(*tsd_arenap_get_unsafe(tsd), "Should have tsd arena set after reincarnation."); free(p); tsd_cleanup((void *)tsd); - assert_ptr_null(tsd->arena, + assert_ptr_null(*tsd_arenap_get_unsafe(tsd), "TSD arena should have been cleared after 2nd cleanup."); return NULL; @@ -134,14 +126,11 @@ TEST_END int main(void) { - /* Core tsd bootstrapping must happen prior to data_tsd_boot(). */ + /* Ensure tsd bootstrapped. */ if (nallocx(1, 0) == 0) { malloc_printf("Initialization error"); return test_status_fail; } - data_test_started = false; - data_tsd_boot(); - data_test_started = true; return test_no_reentrancy( test_tsd_main_thread,