#ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H #define JEMALLOC_INTERNAL_TSD_STRUCTS_H #include "jemalloc/internal/ql.h" #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) struct tsd_init_block_s { ql_elm(tsd_init_block_t) link; pthread_t thread; void *data; }; struct tsd_init_head_s { ql_head(tsd_init_block_t) blocks; malloc_mutex_t lock; }; #endif /* * Thread-Specific-Data layout * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- * s: state * e: tcache_enabled * m: thread_allocated (config_stats) * f: thread_deallocated (config_stats) * p: prof_tdata (config_prof) * c: rtree_ctx (rtree cache accessed on deallocation) * t: tcache * --- data not accessed on tcache fast path: arena related fields --- * d: arenas_tdata_bypass * r: reentrancy_level * x: narenas_tdata * i: iarena * a: arena * o: arenas_tdata * Loading TSD data is on the critical path of basically all malloc operations. * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. * Use a compact layout to reduce cache footprint. * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ * |---------------------------- 1st cacheline ----------------------------| * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | * |---------------------------- 2nd cacheline ----------------------------| * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | * |---------------------------- 3nd cacheline ----------------------------| * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | * +-------------------------------------------------------------------------+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines. * * The last 3 members (i, a and o) before tcache isn't really needed on tcache * fast path. However we have a number of unused tcache bins and witnesses * (never touched unless config_debug) at the end of tcache, so we place them * there to avoid breaking the cachelines and possibly paging in an extra page. */ #define MALLOC_TSD \ /* O(name, type, [gs]et, init, cleanup) */ \ O(tcache_enabled, bool, yes, yes, no) \ O(arenas_tdata_bypass, bool, no, no, no) \ O(reentrancy_level, int8_t, no, no, no) \ O(narenas_tdata, uint32_t, yes, no, no) \ O(thread_allocated, uint64_t, yes, no, no) \ O(thread_deallocated, uint64_t, yes, no, no) \ O(prof_tdata, prof_tdata_t *, yes, no, yes) \ O(rtree_ctx, rtree_ctx_t, no, yes, no) \ O(iarena, arena_t *, yes, no, yes) \ O(arena, arena_t *, yes, no, yes) \ O(arenas_tdata, arena_tdata_t *,yes, no, yes) \ O(tcache, tcache_t, no, no, yes) \ O(witnesses, witness_list_t, no, no, yes) \ O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t, \ no, no, no) \ O(witness_fork, bool, yes, no, no) #define TSD_INITIALIZER { \ tsd_state_uninitialized, \ TCACHE_ENABLED_ZERO_INITIALIZER, \ false, \ 0, \ 0, \ 0, \ 0, \ NULL, \ RTREE_CTX_ZERO_INITIALIZER, \ NULL, \ NULL, \ NULL, \ TCACHE_ZERO_INITIALIZER, \ ql_head_initializer(witnesses), \ RTREE_ELM_WITNESS_TSD_INITIALIZER, \ false \ } struct tsd_s { tsd_state_t state; #define O(n, t, gs, i, c) \ t n; MALLOC_TSD #undef O }; /* * Wrapper around tsd_t that makes it possible to avoid implicit conversion * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be * explicitly converted to tsd_t, which is non-nullable. */ struct tsdn_s { tsd_t tsd; }; static const tsd_t tsd_initializer = TSD_INITIALIZER; UNUSED static const void *malloc_tsd_no_cleanup = (void *)0; malloc_tsd_types(, tsd_t) #endif /* JEMALLOC_INTERNAL_TSD_STRUCTS_H */