diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h index 383af30a..d528c051 100644 --- a/include/jemalloc/internal/thread_event.h +++ b/include/jemalloc/internal/thread_event.h @@ -3,39 +3,40 @@ #include "jemalloc/internal/tsd.h" +/* "te" is short for "thread_event" */ + /* * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is * no need to check overflow in malloc fast path. (The allocation size in malloc * fast path never exceeds SC_LOOKUP_MAXCLASS.) */ -#define THREAD_NEXT_EVENT_FAST_MAX \ - (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U) +#define TE_NEXT_EVENT_FAST_MAX (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U) /* * The max interval helps make sure that malloc stays on the fast path in the * common case, i.e. thread_allocated < thread_allocated_next_event_fast. When - * thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX + * thread_allocated is within an event's distance to TE_NEXT_EVENT_FAST_MAX * above, thread_allocated_next_event_fast is wrapped around and we fall back to * the medium-fast path. The max interval makes sure that we're not staying on * the fallback case for too long, even if there's no active event or if all * active events have long wait times. */ -#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20)) +#define TE_MAX_INTERVAL ((uint64_t)(4U << 20)) -typedef struct event_ctx_s { +typedef struct te_ctx_s { bool is_alloc; uint64_t *current; uint64_t *last_event; uint64_t *next_event; uint64_t *next_event_fast; -} event_ctx_t; +} te_ctx_t; -void thread_event_assert_invariants_debug(tsd_t *tsd); -void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event); -void thread_alloc_event_rollback(tsd_t *tsd, size_t diff); -void thread_event_update(tsd_t *tsd, bool alloc_event); -void thread_event_recompute_fast_threshold(tsd_t *tsd); -void tsd_thread_event_init(tsd_t *tsd); +void te_assert_invariants_debug(tsd_t *tsd); +void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event); +void te_alloc_rollback(tsd_t *tsd, size_t diff); +void te_event_update(tsd_t *tsd, bool alloc_event); +void te_recompute_fast_threshold(tsd_t *tsd); +void tsd_te_init(tsd_t *tsd); /* * List of all events, in the following format: @@ -97,21 +98,16 @@ ITERATE_OVER_ALL_COUNTERS * * Note that these can only be used on the fastpath. */ -JEMALLOC_ALWAYS_INLINE uint64_t -thread_allocated_malloc_fastpath(tsd_t *tsd) { - return *tsd_thread_allocatedp_get_unsafe(tsd); -} - -JEMALLOC_ALWAYS_INLINE uint64_t -thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) { - uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd); - assert(v <= THREAD_NEXT_EVENT_FAST_MAX); - return v; +JEMALLOC_ALWAYS_INLINE void +te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) { + *allocated = *tsd_thread_allocatedp_get_unsafe(tsd); + *threshold = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd); + assert(*threshold <= TE_NEXT_EVENT_FAST_MAX); } JEMALLOC_ALWAYS_INLINE void -thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, - uint64_t *threshold, bool size_hint) { +te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold, + bool size_hint) { if (!size_hint) { *deallocated = tsd_thread_deallocated_get(tsd); *threshold = tsd_thread_deallocated_next_event_fast_get(tsd); @@ -121,50 +117,50 @@ thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, *threshold = *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd); } - assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX); + assert(*threshold <= TE_NEXT_EVENT_FAST_MAX); } JEMALLOC_ALWAYS_INLINE bool -event_ctx_is_alloc(event_ctx_t *ctx) { +te_ctx_is_alloc(te_ctx_t *ctx) { return ctx->is_alloc; } JEMALLOC_ALWAYS_INLINE uint64_t -event_ctx_current_bytes_get(event_ctx_t *ctx) { +te_ctx_current_bytes_get(te_ctx_t *ctx) { return *ctx->current; } JEMALLOC_ALWAYS_INLINE void -event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) { +te_ctx_current_bytes_set(te_ctx_t *ctx, uint64_t v) { *ctx->current = v; } JEMALLOC_ALWAYS_INLINE uint64_t -event_ctx_last_event_get(event_ctx_t *ctx) { +te_ctx_last_event_get(te_ctx_t *ctx) { return *ctx->last_event; } JEMALLOC_ALWAYS_INLINE void -event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) { +te_ctx_last_event_set(te_ctx_t *ctx, uint64_t v) { *ctx->last_event = v; } /* Below 3 for next_event_fast. */ JEMALLOC_ALWAYS_INLINE uint64_t -event_ctx_next_event_fast_get(event_ctx_t *ctx) { +te_ctx_next_event_fast_get(te_ctx_t *ctx) { uint64_t v = *ctx->next_event_fast; - assert(v <= THREAD_NEXT_EVENT_FAST_MAX); + assert(v <= TE_NEXT_EVENT_FAST_MAX); return v; } JEMALLOC_ALWAYS_INLINE void -event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) { - assert(v <= THREAD_NEXT_EVENT_FAST_MAX); +te_ctx_next_event_fast_set(te_ctx_t *ctx, uint64_t v) { + assert(v <= TE_NEXT_EVENT_FAST_MAX); *ctx->next_event_fast = v; } JEMALLOC_ALWAYS_INLINE void -thread_next_event_fast_set_non_nominal(tsd_t *tsd) { +te_next_event_fast_set_non_nominal(tsd_t *tsd) { /* * Set the fast thresholds to zero when tsd is non-nominal. Use the * unsafe getter as this may get called during tsd init and clean up. @@ -175,14 +171,14 @@ thread_next_event_fast_set_non_nominal(tsd_t *tsd) { /* For next_event. Setter also updates the fast threshold. */ JEMALLOC_ALWAYS_INLINE uint64_t -event_ctx_next_event_get(event_ctx_t *ctx) { +te_ctx_next_event_get(te_ctx_t *ctx) { return *ctx->next_event; } JEMALLOC_ALWAYS_INLINE void -event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) { +te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) { *ctx->next_event = v; - thread_event_recompute_fast_threshold(tsd); + te_recompute_fast_threshold(tsd); } /* @@ -190,22 +186,22 @@ event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) { * a consistent state, which forms the invariants before and after each round * of thread event handling that we can rely on and need to promise. * The invariants are only temporarily violated in the middle of: - * (a) thread_event() if an event is triggered (the thread_event_trigger() call + * (a) event_advance() if an event is triggered (the te_event_trigger() call * at the end will restore the invariants), - * (b) thread_##event##_event_update() (the thread_event_update() call at the + * (b) te_##event##_event_update() (the te_event_update() call at the * end will restore the invariants), or - * (c) thread_alloc_event_rollback() if the rollback falls below the last_event - * (the thread_event_update() call at the end will restore the invariants). + * (c) te_alloc_rollback() if the rollback falls below the last_event + * (the te_event_update() call at the end will restore the invariants). */ JEMALLOC_ALWAYS_INLINE void -thread_event_assert_invariants(tsd_t *tsd) { +te_assert_invariants(tsd_t *tsd) { if (config_debug) { - thread_event_assert_invariants_debug(tsd); + te_assert_invariants_debug(tsd); } } JEMALLOC_ALWAYS_INLINE void -event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) { +te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) { ctx->is_alloc = is_alloc; if (is_alloc) { ctx->current = tsd_thread_allocatedp_get(tsd); @@ -223,51 +219,51 @@ event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) { } JEMALLOC_ALWAYS_INLINE void -thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) { - thread_event_assert_invariants(tsd); +te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) { + te_assert_invariants(tsd); - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, is_alloc); + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, is_alloc); - uint64_t bytes_before = event_ctx_current_bytes_get(&ctx); - event_ctx_current_bytes_set(&ctx, bytes_before + usize); + uint64_t bytes_before = te_ctx_current_bytes_get(&ctx); + te_ctx_current_bytes_set(&ctx, bytes_before + usize); /* The subtraction is intentionally susceptible to underflow. */ - if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) { - thread_event_assert_invariants(tsd); + if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) { + te_assert_invariants(tsd); } else { - thread_event_trigger(tsd, &ctx, false); + te_event_trigger(tsd, &ctx, false); } } JEMALLOC_ALWAYS_INLINE void thread_dalloc_event(tsd_t *tsd, size_t usize) { - thread_event_advance(tsd, usize, false); + te_event_advance(tsd, usize, false); } JEMALLOC_ALWAYS_INLINE void thread_alloc_event(tsd_t *tsd, size_t usize) { - thread_event_advance(tsd, usize, true); + te_event_advance(tsd, usize, true); } #define E(event, condition, is_alloc) \ JEMALLOC_ALWAYS_INLINE void \ -thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) { \ - thread_event_assert_invariants(tsd); \ +te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) { \ + te_assert_invariants(tsd); \ assert(condition); \ assert(tsd_nominal(tsd)); \ assert(tsd_reentrancy_level_get(tsd) == 0); \ assert(event_wait > 0U); \ - if (THREAD_EVENT_MIN_START_WAIT > 1U && \ - unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) { \ - event_wait = THREAD_EVENT_MIN_START_WAIT; \ + if (TE_MIN_START_WAIT > 1U && \ + unlikely(event_wait < TE_MIN_START_WAIT)) { \ + event_wait = TE_MIN_START_WAIT; \ } \ - if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX && \ - unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) { \ - event_wait = THREAD_EVENT_MAX_START_WAIT; \ + if (TE_MAX_START_WAIT < UINT64_MAX && \ + unlikely(event_wait > TE_MAX_START_WAIT)) { \ + event_wait = TE_MAX_START_WAIT; \ } \ event##_event_wait_set(tsd, event_wait); \ - thread_event_update(tsd, is_alloc); \ + te_event_update(tsd, is_alloc); \ } ITERATE_OVER_ALL_EVENTS diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 6868ce4b..163ffc4b 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -104,10 +104,10 @@ typedef void (*test_callback_t)(int *); MALLOC_TEST_TSD /* - * THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize. + * TE_MIN_START_WAIT should not exceed the minimal allocation usize. */ -#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U) -#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX +#define TE_MIN_START_WAIT ((uint64_t)1U) +#define TE_MAX_START_WAIT UINT64_MAX #define TSD_INITIALIZER { \ /* state */ ATOMIC_INIT(tsd_state_uninitialized), \ @@ -121,14 +121,14 @@ typedef void (*test_callback_t)(int *); /* thread_deallocated_next_event_fast */ 0, \ /* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \ /* thread_allocated_last_event */ 0, \ - /* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \ + /* thread_allocated_next_event */ TE_MIN_START_WAIT, \ /* thread_deallocated_last_event */ 0, \ - /* thread_deallocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \ - /* tcache_gc_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ - /* tcache_gc_dalloc_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ - /* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ + /* thread_deallocated_next_event */ TE_MIN_START_WAIT, \ + /* tcache_gc_event_wait */ TE_MIN_START_WAIT, \ + /* tcache_gc_dalloc_event_wait */ TE_MIN_START_WAIT, \ + /* prof_sample_event_wait */ TE_MIN_START_WAIT, \ /* prof_sample_last_event */ 0, \ - /* stats_interval_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ + /* stats_interval_event_wait */ TE_MIN_START_WAIT, \ /* stats_interval_last_event */ 0, \ /* prof_tdata */ NULL, \ /* prng_state */ 0, \ diff --git a/src/jemalloc.c b/src/jemalloc.c index e4ef7f3a..190b3a2a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2180,7 +2180,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { } if (unlikely(allocation == NULL)) { - thread_alloc_event_rollback(tsd, usize); + te_alloc_rollback(tsd, usize); prof_alloc_rollback(tsd, tctx, true); goto label_oom; } @@ -2190,7 +2190,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) { allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize, ind); if (unlikely(allocation == NULL)) { - thread_alloc_event_rollback(tsd, usize); + te_alloc_rollback(tsd, usize); goto label_oom; } } @@ -2386,15 +2386,14 @@ je_malloc(size_t size) { * it's not always needed in the core allocation logic. */ size_t usize; - sz_size2index_usize_fastpath(size, &ind, &usize); /* Fast path relies on size being a bin. */ assert(ind < SC_NBINS); assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) && (size <= SC_SMALL_MAXCLASS)); - uint64_t allocated = thread_allocated_malloc_fastpath(tsd); - uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd); + uint64_t allocated, threshold; + te_malloc_fastpath_ctx(tsd, &allocated, &threshold); uint64_t allocated_after = allocated + usize; /* * The ind and usize might be uninitialized (or partially) before @@ -2729,7 +2728,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) { szind = sz_size2index_lookup(size); } uint64_t deallocated, threshold; - thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint); + te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint); size_t usize = sz_index2size(szind); uint64_t deallocated_after = deallocated + usize; @@ -3161,7 +3160,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) { p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, zero, tcache, arena, &alloc_ctx, &hook_args); if (unlikely(p == NULL)) { - thread_alloc_event_rollback(tsd, usize); + te_alloc_rollback(tsd, usize); goto label_oom; } } else { @@ -3362,7 +3361,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, extra, alignment, zero); } if (usize <= usize_max) { - thread_alloc_event_rollback(tsd, usize_max - usize); + te_alloc_rollback(tsd, usize_max - usize); } else { /* * For downsizing request, usize_max can be less than usize. @@ -3460,7 +3459,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata); if (unlikely(usize == old_usize)) { - thread_alloc_event_rollback(tsd, usize); + te_alloc_rollback(tsd, usize); goto label_not_resized; } thread_dalloc_event(tsd, old_usize); diff --git a/src/prof.c b/src/prof.c index 0d29c681..248532e8 100644 --- a/src/prof.c +++ b/src/prof.c @@ -444,8 +444,7 @@ prof_sample_threshold_update(tsd_t *tsd) { } if (lg_prof_sample == 0) { - thread_prof_sample_event_update(tsd, - THREAD_EVENT_MIN_START_WAIT); + te_prof_sample_event_update(tsd, TE_MIN_START_WAIT); return; } @@ -472,7 +471,7 @@ prof_sample_threshold_update(tsd_t *tsd) { uint64_t bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + (uint64_t)1U; - thread_prof_sample_event_update(tsd, bytes_until_sample); + te_prof_sample_event_update(tsd, bytes_until_sample); #endif } diff --git a/src/thread_event.c b/src/thread_event.c index 0fbdebee..dadace38 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -6,46 +6,45 @@ /* TSD event init function signatures. */ #define E(event, condition_unused, is_alloc_event_unused) \ -static void tsd_thread_##event##_event_init(tsd_t *tsd); +static void te_tsd_##event##_event_init(tsd_t *tsd); ITERATE_OVER_ALL_EVENTS #undef E /* Event handler function signatures. */ #define E(event, condition_unused, is_alloc_event_unused) \ -static void thread_##event##_event_handler(tsd_t *tsd); +static void te_##event##_event_handler(tsd_t *tsd); ITERATE_OVER_ALL_EVENTS #undef E /* (Re)Init functions. */ static void -tsd_thread_tcache_gc_event_init(tsd_t *tsd) { +te_tsd_tcache_gc_event_init(tsd_t *tsd) { assert(TCACHE_GC_INCR_BYTES > 0); - thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES); + te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES); } static void -tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) { +te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) { assert(TCACHE_GC_INCR_BYTES > 0); - thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES); + te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES); } static void -tsd_thread_prof_sample_event_init(tsd_t *tsd) { +te_tsd_prof_sample_event_init(tsd_t *tsd) { assert(config_prof && opt_prof); prof_sample_threshold_update(tsd); } static void -tsd_thread_stats_interval_event_init(tsd_t *tsd) { +te_tsd_stats_interval_event_init(tsd_t *tsd) { assert(opt_stats_interval >= 0); uint64_t interval = stats_interval_accum_batch_size(); - thread_stats_interval_event_update(tsd, interval); + te_stats_interval_event_update(tsd, interval); } /* Handler functions. */ - static void tcache_gc_event(tsd_t *tsd) { assert(TCACHE_GC_INCR_BYTES > 0); @@ -56,21 +55,21 @@ tcache_gc_event(tsd_t *tsd) { } static void -thread_tcache_gc_event_handler(tsd_t *tsd) { +te_tcache_gc_event_handler(tsd_t *tsd) { assert(tcache_gc_event_wait_get(tsd) == 0U); - tsd_thread_tcache_gc_event_init(tsd); + te_tsd_tcache_gc_event_init(tsd); tcache_gc_event(tsd); } static void -thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) { +te_tcache_gc_dalloc_event_handler(tsd_t *tsd) { assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U); - tsd_thread_tcache_gc_dalloc_event_init(tsd); + te_tsd_tcache_gc_dalloc_event_init(tsd); tcache_gc_event(tsd); } static void -thread_prof_sample_event_handler(tsd_t *tsd) { +te_prof_sample_event_handler(tsd_t *tsd) { assert(config_prof && opt_prof); assert(prof_sample_event_wait_get(tsd) == 0U); uint64_t last_event = thread_allocated_last_event_get(tsd); @@ -87,13 +86,13 @@ thread_prof_sample_event_handler(tsd_t *tsd) { * prof_active is turned on later, the counting for sampling * can immediately resume as normal. */ - thread_prof_sample_event_update(tsd, + te_prof_sample_event_update(tsd, (uint64_t)(1 << lg_prof_sample)); } } static void -thread_stats_interval_event_handler(tsd_t *tsd) { +te_stats_interval_event_handler(tsd_t *tsd) { assert(opt_stats_interval >= 0); assert(stats_interval_event_wait_get(tsd) == 0U); uint64_t last_event = thread_allocated_last_event_get(tsd); @@ -103,12 +102,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) { if (stats_interval_accum(tsd, last_event - last_stats_event)) { je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts); } - tsd_thread_stats_interval_event_init(tsd); + te_tsd_stats_interval_event_init(tsd); } /* Per event facilities done. */ static bool -event_ctx_has_active_events(event_ctx_t *ctx) { +te_ctx_has_active_events(te_ctx_t *ctx) { assert(config_debug); #define E(event, condition, alloc_event) \ if (condition && alloc_event == ctx->is_alloc) { \ @@ -120,13 +119,13 @@ event_ctx_has_active_events(event_ctx_t *ctx) { } static uint64_t -thread_next_event_compute(tsd_t *tsd, bool is_alloc) { - uint64_t wait = THREAD_EVENT_MAX_START_WAIT; +te_next_event_compute(tsd_t *tsd, bool is_alloc) { + uint64_t wait = TE_MAX_START_WAIT; #define E(event, condition, alloc_event) \ if (is_alloc == alloc_event && condition) { \ uint64_t event_wait = \ event##_event_wait_get(tsd); \ - assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \ + assert(event_wait <= TE_MAX_START_WAIT); \ if (event_wait > 0U && event_wait < wait) { \ wait = event_wait; \ } \ @@ -134,20 +133,19 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) { ITERATE_OVER_ALL_EVENTS #undef E - assert(wait <= THREAD_EVENT_MAX_START_WAIT); + assert(wait <= TE_MAX_START_WAIT); return wait; } static void -thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) { - uint64_t current_bytes = event_ctx_current_bytes_get(ctx); - uint64_t last_event = event_ctx_last_event_get(ctx); - uint64_t next_event = event_ctx_next_event_get(ctx); - uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx); +te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) { + uint64_t current_bytes = te_ctx_current_bytes_get(ctx); + uint64_t last_event = te_ctx_last_event_get(ctx); + uint64_t next_event = te_ctx_next_event_get(ctx); + uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx); assert(last_event != next_event); - if (next_event > THREAD_NEXT_EVENT_FAST_MAX || - !tsd_fast(tsd)) { + if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) { assert(next_event_fast == 0U); } else { assert(next_event_fast == next_event); @@ -158,27 +156,26 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) { /* The subtraction is intentionally susceptible to underflow. */ assert(current_bytes - last_event < interval); - uint64_t min_wait = thread_next_event_compute(tsd, - event_ctx_is_alloc(ctx)); + uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx)); /* * next_event should have been pushed up only except when no event is * on and the TSD is just initialized. The last_event == 0U guard * below is stronger than needed, but having an exactly accurate guard * is more complicated to implement. */ - assert((!event_ctx_has_active_events(ctx) && last_event == 0U) || + assert((!te_ctx_has_active_events(ctx) && last_event == 0U) || interval == min_wait || - (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL)); + (interval < min_wait && interval == TE_MAX_INTERVAL)); } void -thread_event_assert_invariants_debug(tsd_t *tsd) { - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, true); - thread_event_assert_invariants_impl(tsd, &ctx); +te_assert_invariants_debug(tsd_t *tsd) { + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, true); + te_assert_invariants_impl(tsd, &ctx); - event_ctx_get(tsd, &ctx, false); - thread_event_assert_invariants_impl(tsd, &ctx); + te_ctx_get(tsd, &ctx, false); + te_assert_invariants_impl(tsd, &ctx); } /* @@ -229,66 +226,65 @@ thread_event_assert_invariants_debug(tsd_t *tsd) { * it down the slow path earlier). */ static void -event_ctx_next_event_fast_update(event_ctx_t *ctx) { - uint64_t next_event = event_ctx_next_event_get(ctx); - uint64_t next_event_fast = (next_event <= - THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U; - event_ctx_next_event_fast_set(ctx, next_event_fast); +te_ctx_next_event_fast_update(te_ctx_t *ctx) { + uint64_t next_event = te_ctx_next_event_get(ctx); + uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ? + next_event : 0U; + te_ctx_next_event_fast_set(ctx, next_event_fast); } void -thread_event_recompute_fast_threshold(tsd_t *tsd) { +te_recompute_fast_threshold(tsd_t *tsd) { if (tsd_state_get(tsd) != tsd_state_nominal) { /* Check first because this is also called on purgatory. */ - thread_next_event_fast_set_non_nominal(tsd); + te_next_event_fast_set_non_nominal(tsd); return; } - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, true); - event_ctx_next_event_fast_update(&ctx); - event_ctx_get(tsd, &ctx, false); - event_ctx_next_event_fast_update(&ctx); + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, true); + te_ctx_next_event_fast_update(&ctx); + te_ctx_get(tsd, &ctx, false); + te_ctx_next_event_fast_update(&ctx); atomic_fence(ATOMIC_SEQ_CST); if (tsd_state_get(tsd) != tsd_state_nominal) { - thread_next_event_fast_set_non_nominal(tsd); + te_next_event_fast_set_non_nominal(tsd); } } static void -thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx, +te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) { - assert(wait <= THREAD_EVENT_MAX_START_WAIT); - uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <= - THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL); - event_ctx_next_event_set(tsd, ctx, next_event); + assert(wait <= TE_MAX_START_WAIT); + uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <= + TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL); + te_ctx_next_event_set(tsd, ctx, next_event); } static uint64_t -thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes, - bool is_alloc, bool allow_event_trigger) { - uint64_t wait = THREAD_EVENT_MAX_START_WAIT; +te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc, + bool allow_event_trigger) { + uint64_t wait = TE_MAX_START_WAIT; #define E(event, condition, alloc_event) \ if (is_alloc == alloc_event && condition) { \ uint64_t event_wait = event##_event_wait_get(tsd); \ - assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \ + assert(event_wait <= TE_MAX_START_WAIT); \ if (event_wait > accumbytes) { \ event_wait -= accumbytes; \ } else { \ event_wait = 0U; \ if (!allow_event_trigger) { \ - event_wait = \ - THREAD_EVENT_MIN_START_WAIT; \ + event_wait = TE_MIN_START_WAIT; \ } \ } \ - assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \ + assert(event_wait <= TE_MAX_START_WAIT); \ event##_event_wait_set(tsd, event_wait); \ /* \ * If there is a single event, then the remaining wait \ * time may become zero, and we rely on either the \ - * event handler or a thread_event_update() call later \ + * event handler or a te_event_update() call later \ * to properly set next_event; if there are multiple \ * events, then here we can get the minimum remaining \ * wait time to the next already set event. \ @@ -301,72 +297,64 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes, ITERATE_OVER_ALL_EVENTS #undef E - assert(wait <= THREAD_EVENT_MAX_START_WAIT); + assert(wait <= TE_MAX_START_WAIT); return wait; } void -thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) { +te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) { /* usize has already been added to thread_allocated. */ - uint64_t bytes_after = event_ctx_current_bytes_get(ctx); - + uint64_t bytes_after = te_ctx_current_bytes_get(ctx); /* The subtraction is intentionally susceptible to underflow. */ - uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx); + uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx); - /* Make sure that accumbytes cannot overflow uint64_t. */ - assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1); - - event_ctx_last_event_set(ctx, bytes_after); + te_ctx_last_event_set(ctx, bytes_after); bool allow_event_trigger = !delay_event && tsd_nominal(tsd) && tsd_reentrancy_level_get(tsd) == 0; bool is_alloc = ctx->is_alloc; - uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes, - is_alloc, allow_event_trigger); - thread_event_adjust_thresholds_helper(tsd, ctx, wait); - - thread_event_assert_invariants(tsd); + uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc, + allow_event_trigger); + te_adjust_thresholds_helper(tsd, ctx, wait); + te_assert_invariants(tsd); #define E(event, condition, alloc_event) \ if (is_alloc == alloc_event && condition && \ event##_event_wait_get(tsd) == 0U) { \ assert(allow_event_trigger); \ - thread_##event##_event_handler(tsd); \ + te_##event##_event_handler(tsd); \ } ITERATE_OVER_ALL_EVENTS #undef E - - thread_event_assert_invariants(tsd); + te_assert_invariants(tsd); } void -thread_alloc_event_rollback(tsd_t *tsd, size_t diff) { - thread_event_assert_invariants(tsd); - +te_alloc_rollback(tsd_t *tsd, size_t diff) { + te_assert_invariants(tsd); if (diff == 0U) { return; } /* Rollback happens only on alloc events. */ - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, true); + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, true); - uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx); + uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx); /* The subtraction is intentionally susceptible to underflow. */ uint64_t thread_allocated_rollback = thread_allocated - diff; - event_ctx_current_bytes_set(&ctx, thread_allocated_rollback); + te_ctx_current_bytes_set(&ctx, thread_allocated_rollback); - uint64_t last_event = event_ctx_last_event_get(&ctx); + uint64_t last_event = te_ctx_last_event_get(&ctx); /* Both subtractions are intentionally susceptible to underflow. */ if (thread_allocated_rollback - last_event <= thread_allocated - last_event) { - thread_event_assert_invariants(tsd); + te_assert_invariants(tsd); return; } - event_ctx_last_event_set(&ctx, thread_allocated_rollback); - + te_ctx_last_event_set(&ctx, thread_allocated_rollback); /* The subtraction is intentionally susceptible to underflow. */ uint64_t wait_diff = last_event - thread_allocated_rollback; assert(wait_diff <= diff); @@ -374,49 +362,48 @@ thread_alloc_event_rollback(tsd_t *tsd, size_t diff) { #define E(event, condition, alloc_event) \ if (alloc_event == true && condition) { \ uint64_t event_wait = event##_event_wait_get(tsd); \ - assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \ + assert(event_wait <= TE_MAX_START_WAIT); \ if (event_wait > 0U) { \ - if (wait_diff > \ - THREAD_EVENT_MAX_START_WAIT - event_wait) { \ - event_wait = \ - THREAD_EVENT_MAX_START_WAIT; \ + if (wait_diff > TE_MAX_START_WAIT - event_wait) {\ + event_wait = TE_MAX_START_WAIT; \ } else { \ event_wait += wait_diff; \ } \ - assert(event_wait <= \ - THREAD_EVENT_MAX_START_WAIT); \ + assert(event_wait <= TE_MAX_START_WAIT); \ event##_event_wait_set(tsd, event_wait); \ } \ } ITERATE_OVER_ALL_EVENTS #undef E - - thread_event_update(tsd, true); + te_event_update(tsd, true); } void -thread_event_update(tsd_t *tsd, bool is_alloc) { - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, is_alloc); +te_event_update(tsd_t *tsd, bool is_alloc) { + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, is_alloc); - uint64_t wait = thread_next_event_compute(tsd, is_alloc); - thread_event_adjust_thresholds_helper(tsd, &ctx, wait); + uint64_t wait = te_next_event_compute(tsd, is_alloc); + te_adjust_thresholds_helper(tsd, &ctx, wait); - uint64_t last_event = event_ctx_last_event_get(&ctx); + uint64_t last_event = te_ctx_last_event_get(&ctx); /* Both subtractions are intentionally susceptible to underflow. */ - if (event_ctx_current_bytes_get(&ctx) - last_event >= - event_ctx_next_event_get(&ctx) - last_event) { - thread_event_trigger(tsd, &ctx, true); + if (te_ctx_current_bytes_get(&ctx) - last_event >= + te_ctx_next_event_get(&ctx) - last_event) { + te_event_trigger(tsd, &ctx, true); } else { - thread_event_assert_invariants(tsd); + te_assert_invariants(tsd); } } -void tsd_thread_event_init(tsd_t *tsd) { +void tsd_te_init(tsd_t *tsd) { + /* Make sure no overflow for the bytes accumulated on event_trigger. */ + assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1); + #define E(event, condition, is_alloc_event_unused) \ if (condition) { \ - tsd_thread_##event##_event_init(tsd); \ + te_tsd_##event##_event_init(tsd); \ } ITERATE_OVER_ALL_EVENTS diff --git a/src/tsd.c b/src/tsd.c index 54e5b4af..38196c80 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -117,9 +117,9 @@ tsd_force_recompute(tsdn_t *tsdn) { <= tsd_state_nominal_max); tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute, ATOMIC_RELAXED); - /* See comments in thread_event_recompute_fast_threshold(). */ + /* See comments in te_recompute_fast_threshold(). */ atomic_fence(ATOMIC_SEQ_CST); - thread_next_event_fast_set_non_nominal(remote_tsd); + te_next_event_fast_set_non_nominal(remote_tsd); } malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); } @@ -179,7 +179,7 @@ tsd_slow_update(tsd_t *tsd) { ATOMIC_ACQUIRE); } while (old_state == tsd_state_nominal_recompute); - thread_event_recompute_fast_threshold(tsd); + te_recompute_fast_threshold(tsd); } void @@ -218,7 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) { tsd_slow_update(tsd); } } - thread_event_recompute_fast_threshold(tsd); + te_recompute_fast_threshold(tsd); } static bool @@ -240,7 +240,7 @@ tsd_data_init(tsd_t *tsd) { (uint64_t)(uintptr_t)tsd; /* event_init may use the prng state above. */ - tsd_thread_event_init(tsd); + tsd_te_init(tsd); return tsd_tcache_enabled_data_init(tsd); } diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c index db2d637e..0855829c 100644 --- a/test/unit/thread_event.c +++ b/test/unit/thread_event.c @@ -2,17 +2,15 @@ TEST_BEGIN(test_next_event_fast_roll_back) { tsd_t *tsd = tsd_fetch(); - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, true); + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, true); - event_ctx_last_event_set(&ctx, 0); - event_ctx_current_bytes_set(&ctx, - THREAD_NEXT_EVENT_FAST_MAX - 8U); - event_ctx_next_event_set(tsd, &ctx, - THREAD_NEXT_EVENT_FAST_MAX); + te_ctx_last_event_set(&ctx, 0); + te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U); + te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX); #define E(event, condition, is_alloc) \ if (is_alloc && condition) { \ - event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\ + event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX); \ } ITERATE_OVER_ALL_EVENTS #undef E @@ -25,18 +23,16 @@ TEST_END TEST_BEGIN(test_next_event_fast_resume) { tsd_t *tsd = tsd_fetch(); - event_ctx_t ctx; - event_ctx_get(tsd, &ctx, true); + te_ctx_t ctx; + te_ctx_get(tsd, &ctx, true); - event_ctx_last_event_set(&ctx, 0); - event_ctx_current_bytes_set(&ctx, - THREAD_NEXT_EVENT_FAST_MAX + 8U); - event_ctx_next_event_set(tsd, &ctx, - THREAD_NEXT_EVENT_FAST_MAX + 16U); + te_ctx_last_event_set(&ctx, 0); + te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U); + te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U); #define E(event, condition, is_alloc) \ if (is_alloc && condition) { \ event##_event_wait_set(tsd, \ - THREAD_NEXT_EVENT_FAST_MAX + 16U); \ + TE_NEXT_EVENT_FAST_MAX + 16U); \ } ITERATE_OVER_ALL_EVENTS #undef E @@ -48,11 +44,11 @@ TEST_END TEST_BEGIN(test_event_rollback) { tsd_t *tsd = tsd_fetch(); - const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2; + const uint64_t diff = TE_MAX_INTERVAL >> 2; size_t count = 10; uint64_t thread_allocated = thread_allocated_get(tsd); while (count-- != 0) { - thread_alloc_event_rollback(tsd, diff); + te_alloc_rollback(tsd, diff); uint64_t thread_allocated_after = thread_allocated_get(tsd); assert_u64_eq(thread_allocated - thread_allocated_after, diff, "thread event counters are not properly rolled back");