Abbreviate thread-event to te.

This commit is contained in:
Qi Wang 2020-01-30 16:31:45 -08:00 committed by Qi Wang
parent 5e500523a0
commit e896522616
7 changed files with 201 additions and 224 deletions

View File

@ -3,39 +3,40 @@
#include "jemalloc/internal/tsd.h"
/* "te" is short for "thread_event" */
/*
* Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
* no need to check overflow in malloc fast path. (The allocation size in malloc
* fast path never exceeds SC_LOOKUP_MAXCLASS.)
*/
#define THREAD_NEXT_EVENT_FAST_MAX \
(UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
#define TE_NEXT_EVENT_FAST_MAX (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
/*
* The max interval helps make sure that malloc stays on the fast path in the
* common case, i.e. thread_allocated < thread_allocated_next_event_fast. When
* thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX
* thread_allocated is within an event's distance to TE_NEXT_EVENT_FAST_MAX
* above, thread_allocated_next_event_fast is wrapped around and we fall back to
* the medium-fast path. The max interval makes sure that we're not staying on
* the fallback case for too long, even if there's no active event or if all
* active events have long wait times.
*/
#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
#define TE_MAX_INTERVAL ((uint64_t)(4U << 20))
typedef struct event_ctx_s {
typedef struct te_ctx_s {
bool is_alloc;
uint64_t *current;
uint64_t *last_event;
uint64_t *next_event;
uint64_t *next_event_fast;
} event_ctx_t;
} te_ctx_t;
void thread_event_assert_invariants_debug(tsd_t *tsd);
void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
void thread_event_update(tsd_t *tsd, bool alloc_event);
void thread_event_recompute_fast_threshold(tsd_t *tsd);
void tsd_thread_event_init(tsd_t *tsd);
void te_assert_invariants_debug(tsd_t *tsd);
void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
void te_alloc_rollback(tsd_t *tsd, size_t diff);
void te_event_update(tsd_t *tsd, bool alloc_event);
void te_recompute_fast_threshold(tsd_t *tsd);
void tsd_te_init(tsd_t *tsd);
/*
* List of all events, in the following format:
@ -97,21 +98,16 @@ ITERATE_OVER_ALL_COUNTERS
*
* Note that these can only be used on the fastpath.
*/
JEMALLOC_ALWAYS_INLINE uint64_t
thread_allocated_malloc_fastpath(tsd_t *tsd) {
return *tsd_thread_allocatedp_get_unsafe(tsd);
}
JEMALLOC_ALWAYS_INLINE uint64_t
thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
return v;
JEMALLOC_ALWAYS_INLINE void
te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
*allocated = *tsd_thread_allocatedp_get_unsafe(tsd);
*threshold = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
}
JEMALLOC_ALWAYS_INLINE void
thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
uint64_t *threshold, bool size_hint) {
te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
bool size_hint) {
if (!size_hint) {
*deallocated = tsd_thread_deallocated_get(tsd);
*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
@ -121,50 +117,50 @@ thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
*threshold =
*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
}
assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX);
assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
}
JEMALLOC_ALWAYS_INLINE bool
event_ctx_is_alloc(event_ctx_t *ctx) {
te_ctx_is_alloc(te_ctx_t *ctx) {
return ctx->is_alloc;
}
JEMALLOC_ALWAYS_INLINE uint64_t
event_ctx_current_bytes_get(event_ctx_t *ctx) {
te_ctx_current_bytes_get(te_ctx_t *ctx) {
return *ctx->current;
}
JEMALLOC_ALWAYS_INLINE void
event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) {
te_ctx_current_bytes_set(te_ctx_t *ctx, uint64_t v) {
*ctx->current = v;
}
JEMALLOC_ALWAYS_INLINE uint64_t
event_ctx_last_event_get(event_ctx_t *ctx) {
te_ctx_last_event_get(te_ctx_t *ctx) {
return *ctx->last_event;
}
JEMALLOC_ALWAYS_INLINE void
event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) {
te_ctx_last_event_set(te_ctx_t *ctx, uint64_t v) {
*ctx->last_event = v;
}
/* Below 3 for next_event_fast. */
JEMALLOC_ALWAYS_INLINE uint64_t
event_ctx_next_event_fast_get(event_ctx_t *ctx) {
te_ctx_next_event_fast_get(te_ctx_t *ctx) {
uint64_t v = *ctx->next_event_fast;
assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
assert(v <= TE_NEXT_EVENT_FAST_MAX);
return v;
}
JEMALLOC_ALWAYS_INLINE void
event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) {
assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
te_ctx_next_event_fast_set(te_ctx_t *ctx, uint64_t v) {
assert(v <= TE_NEXT_EVENT_FAST_MAX);
*ctx->next_event_fast = v;
}
JEMALLOC_ALWAYS_INLINE void
thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
te_next_event_fast_set_non_nominal(tsd_t *tsd) {
/*
* Set the fast thresholds to zero when tsd is non-nominal. Use the
* unsafe getter as this may get called during tsd init and clean up.
@ -175,14 +171,14 @@ thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
/* For next_event. Setter also updates the fast threshold. */
JEMALLOC_ALWAYS_INLINE uint64_t
event_ctx_next_event_get(event_ctx_t *ctx) {
te_ctx_next_event_get(te_ctx_t *ctx) {
return *ctx->next_event;
}
JEMALLOC_ALWAYS_INLINE void
event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
*ctx->next_event = v;
thread_event_recompute_fast_threshold(tsd);
te_recompute_fast_threshold(tsd);
}
/*
@ -190,22 +186,22 @@ event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
* a consistent state, which forms the invariants before and after each round
* of thread event handling that we can rely on and need to promise.
* The invariants are only temporarily violated in the middle of:
* (a) thread_event() if an event is triggered (the thread_event_trigger() call
* (a) event_advance() if an event is triggered (the te_event_trigger() call
* at the end will restore the invariants),
* (b) thread_##event##_event_update() (the thread_event_update() call at the
* (b) te_##event##_event_update() (the te_event_update() call at the
* end will restore the invariants), or
* (c) thread_alloc_event_rollback() if the rollback falls below the last_event
* (the thread_event_update() call at the end will restore the invariants).
* (c) te_alloc_rollback() if the rollback falls below the last_event
* (the te_event_update() call at the end will restore the invariants).
*/
JEMALLOC_ALWAYS_INLINE void
thread_event_assert_invariants(tsd_t *tsd) {
te_assert_invariants(tsd_t *tsd) {
if (config_debug) {
thread_event_assert_invariants_debug(tsd);
te_assert_invariants_debug(tsd);
}
}
JEMALLOC_ALWAYS_INLINE void
event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
ctx->is_alloc = is_alloc;
if (is_alloc) {
ctx->current = tsd_thread_allocatedp_get(tsd);
@ -223,51 +219,51 @@ event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
}
JEMALLOC_ALWAYS_INLINE void
thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
thread_event_assert_invariants(tsd);
te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
te_assert_invariants(tsd);
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, is_alloc);
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, is_alloc);
uint64_t bytes_before = event_ctx_current_bytes_get(&ctx);
event_ctx_current_bytes_set(&ctx, bytes_before + usize);
uint64_t bytes_before = te_ctx_current_bytes_get(&ctx);
te_ctx_current_bytes_set(&ctx, bytes_before + usize);
/* The subtraction is intentionally susceptible to underflow. */
if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) {
thread_event_assert_invariants(tsd);
if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) {
te_assert_invariants(tsd);
} else {
thread_event_trigger(tsd, &ctx, false);
te_event_trigger(tsd, &ctx, false);
}
}
JEMALLOC_ALWAYS_INLINE void
thread_dalloc_event(tsd_t *tsd, size_t usize) {
thread_event_advance(tsd, usize, false);
te_event_advance(tsd, usize, false);
}
JEMALLOC_ALWAYS_INLINE void
thread_alloc_event(tsd_t *tsd, size_t usize) {
thread_event_advance(tsd, usize, true);
te_event_advance(tsd, usize, true);
}
#define E(event, condition, is_alloc) \
JEMALLOC_ALWAYS_INLINE void \
thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) { \
thread_event_assert_invariants(tsd); \
te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) { \
te_assert_invariants(tsd); \
assert(condition); \
assert(tsd_nominal(tsd)); \
assert(tsd_reentrancy_level_get(tsd) == 0); \
assert(event_wait > 0U); \
if (THREAD_EVENT_MIN_START_WAIT > 1U && \
unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) { \
event_wait = THREAD_EVENT_MIN_START_WAIT; \
if (TE_MIN_START_WAIT > 1U && \
unlikely(event_wait < TE_MIN_START_WAIT)) { \
event_wait = TE_MIN_START_WAIT; \
} \
if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX && \
unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) { \
event_wait = THREAD_EVENT_MAX_START_WAIT; \
if (TE_MAX_START_WAIT < UINT64_MAX && \
unlikely(event_wait > TE_MAX_START_WAIT)) { \
event_wait = TE_MAX_START_WAIT; \
} \
event##_event_wait_set(tsd, event_wait); \
thread_event_update(tsd, is_alloc); \
te_event_update(tsd, is_alloc); \
}
ITERATE_OVER_ALL_EVENTS

View File

@ -104,10 +104,10 @@ typedef void (*test_callback_t)(int *);
MALLOC_TEST_TSD
/*
* THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize.
* TE_MIN_START_WAIT should not exceed the minimal allocation usize.
*/
#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U)
#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX
#define TE_MIN_START_WAIT ((uint64_t)1U)
#define TE_MAX_START_WAIT UINT64_MAX
#define TSD_INITIALIZER { \
/* state */ ATOMIC_INIT(tsd_state_uninitialized), \
@ -121,14 +121,14 @@ typedef void (*test_callback_t)(int *);
/* thread_deallocated_next_event_fast */ 0, \
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \
/* thread_allocated_last_event */ 0, \
/* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \
/* thread_allocated_next_event */ TE_MIN_START_WAIT, \
/* thread_deallocated_last_event */ 0, \
/* thread_deallocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \
/* tcache_gc_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
/* tcache_gc_dalloc_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
/* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
/* thread_deallocated_next_event */ TE_MIN_START_WAIT, \
/* tcache_gc_event_wait */ TE_MIN_START_WAIT, \
/* tcache_gc_dalloc_event_wait */ TE_MIN_START_WAIT, \
/* prof_sample_event_wait */ TE_MIN_START_WAIT, \
/* prof_sample_last_event */ 0, \
/* stats_interval_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
/* stats_interval_event_wait */ TE_MIN_START_WAIT, \
/* stats_interval_last_event */ 0, \
/* prof_tdata */ NULL, \
/* prng_state */ 0, \

View File

@ -2180,7 +2180,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
}
if (unlikely(allocation == NULL)) {
thread_alloc_event_rollback(tsd, usize);
te_alloc_rollback(tsd, usize);
prof_alloc_rollback(tsd, tctx, true);
goto label_oom;
}
@ -2190,7 +2190,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
ind);
if (unlikely(allocation == NULL)) {
thread_alloc_event_rollback(tsd, usize);
te_alloc_rollback(tsd, usize);
goto label_oom;
}
}
@ -2386,15 +2386,14 @@ je_malloc(size_t size) {
* it's not always needed in the core allocation logic.
*/
size_t usize;
sz_size2index_usize_fastpath(size, &ind, &usize);
/* Fast path relies on size being a bin. */
assert(ind < SC_NBINS);
assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
(size <= SC_SMALL_MAXCLASS));
uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
uint64_t allocated, threshold;
te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
uint64_t allocated_after = allocated + usize;
/*
* The ind and usize might be uninitialized (or partially) before
@ -2729,7 +2728,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
szind = sz_size2index_lookup(size);
}
uint64_t deallocated, threshold;
thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
size_t usize = sz_index2size(szind);
uint64_t deallocated_after = deallocated + usize;
@ -3161,7 +3160,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
zero, tcache, arena, &alloc_ctx, &hook_args);
if (unlikely(p == NULL)) {
thread_alloc_event_rollback(tsd, usize);
te_alloc_rollback(tsd, usize);
goto label_oom;
}
} else {
@ -3362,7 +3361,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
extra, alignment, zero);
}
if (usize <= usize_max) {
thread_alloc_event_rollback(tsd, usize_max - usize);
te_alloc_rollback(tsd, usize_max - usize);
} else {
/*
* For downsizing request, usize_max can be less than usize.
@ -3460,7 +3459,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
if (unlikely(usize == old_usize)) {
thread_alloc_event_rollback(tsd, usize);
te_alloc_rollback(tsd, usize);
goto label_not_resized;
}
thread_dalloc_event(tsd, old_usize);

View File

@ -444,8 +444,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
}
if (lg_prof_sample == 0) {
thread_prof_sample_event_update(tsd,
THREAD_EVENT_MIN_START_WAIT);
te_prof_sample_event_update(tsd, TE_MIN_START_WAIT);
return;
}
@ -472,7 +471,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
uint64_t bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+ (uint64_t)1U;
thread_prof_sample_event_update(tsd, bytes_until_sample);
te_prof_sample_event_update(tsd, bytes_until_sample);
#endif
}

View File

@ -6,46 +6,45 @@
/* TSD event init function signatures. */
#define E(event, condition_unused, is_alloc_event_unused) \
static void tsd_thread_##event##_event_init(tsd_t *tsd);
static void te_tsd_##event##_event_init(tsd_t *tsd);
ITERATE_OVER_ALL_EVENTS
#undef E
/* Event handler function signatures. */
#define E(event, condition_unused, is_alloc_event_unused) \
static void thread_##event##_event_handler(tsd_t *tsd);
static void te_##event##_event_handler(tsd_t *tsd);
ITERATE_OVER_ALL_EVENTS
#undef E
/* (Re)Init functions. */
static void
tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
te_tsd_tcache_gc_event_init(tsd_t *tsd) {
assert(TCACHE_GC_INCR_BYTES > 0);
thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
}
static void
tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) {
te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
assert(TCACHE_GC_INCR_BYTES > 0);
thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
}
static void
tsd_thread_prof_sample_event_init(tsd_t *tsd) {
te_tsd_prof_sample_event_init(tsd_t *tsd) {
assert(config_prof && opt_prof);
prof_sample_threshold_update(tsd);
}
static void
tsd_thread_stats_interval_event_init(tsd_t *tsd) {
te_tsd_stats_interval_event_init(tsd_t *tsd) {
assert(opt_stats_interval >= 0);
uint64_t interval = stats_interval_accum_batch_size();
thread_stats_interval_event_update(tsd, interval);
te_stats_interval_event_update(tsd, interval);
}
/* Handler functions. */
static void
tcache_gc_event(tsd_t *tsd) {
assert(TCACHE_GC_INCR_BYTES > 0);
@ -56,21 +55,21 @@ tcache_gc_event(tsd_t *tsd) {
}
static void
thread_tcache_gc_event_handler(tsd_t *tsd) {
te_tcache_gc_event_handler(tsd_t *tsd) {
assert(tcache_gc_event_wait_get(tsd) == 0U);
tsd_thread_tcache_gc_event_init(tsd);
te_tsd_tcache_gc_event_init(tsd);
tcache_gc_event(tsd);
}
static void
thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
te_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
tsd_thread_tcache_gc_dalloc_event_init(tsd);
te_tsd_tcache_gc_dalloc_event_init(tsd);
tcache_gc_event(tsd);
}
static void
thread_prof_sample_event_handler(tsd_t *tsd) {
te_prof_sample_event_handler(tsd_t *tsd) {
assert(config_prof && opt_prof);
assert(prof_sample_event_wait_get(tsd) == 0U);
uint64_t last_event = thread_allocated_last_event_get(tsd);
@ -87,13 +86,13 @@ thread_prof_sample_event_handler(tsd_t *tsd) {
* prof_active is turned on later, the counting for sampling
* can immediately resume as normal.
*/
thread_prof_sample_event_update(tsd,
te_prof_sample_event_update(tsd,
(uint64_t)(1 << lg_prof_sample));
}
}
static void
thread_stats_interval_event_handler(tsd_t *tsd) {
te_stats_interval_event_handler(tsd_t *tsd) {
assert(opt_stats_interval >= 0);
assert(stats_interval_event_wait_get(tsd) == 0U);
uint64_t last_event = thread_allocated_last_event_get(tsd);
@ -103,12 +102,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
if (stats_interval_accum(tsd, last_event - last_stats_event)) {
je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
}
tsd_thread_stats_interval_event_init(tsd);
te_tsd_stats_interval_event_init(tsd);
}
/* Per event facilities done. */
static bool
event_ctx_has_active_events(event_ctx_t *ctx) {
te_ctx_has_active_events(te_ctx_t *ctx) {
assert(config_debug);
#define E(event, condition, alloc_event) \
if (condition && alloc_event == ctx->is_alloc) { \
@ -120,13 +119,13 @@ event_ctx_has_active_events(event_ctx_t *ctx) {
}
static uint64_t
thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
te_next_event_compute(tsd_t *tsd, bool is_alloc) {
uint64_t wait = TE_MAX_START_WAIT;
#define E(event, condition, alloc_event) \
if (is_alloc == alloc_event && condition) { \
uint64_t event_wait = \
event##_event_wait_get(tsd); \
assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \
assert(event_wait <= TE_MAX_START_WAIT); \
if (event_wait > 0U && event_wait < wait) { \
wait = event_wait; \
} \
@ -134,20 +133,19 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
ITERATE_OVER_ALL_EVENTS
#undef E
assert(wait <= THREAD_EVENT_MAX_START_WAIT);
assert(wait <= TE_MAX_START_WAIT);
return wait;
}
static void
thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
uint64_t current_bytes = event_ctx_current_bytes_get(ctx);
uint64_t last_event = event_ctx_last_event_get(ctx);
uint64_t next_event = event_ctx_next_event_get(ctx);
uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx);
te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
uint64_t current_bytes = te_ctx_current_bytes_get(ctx);
uint64_t last_event = te_ctx_last_event_get(ctx);
uint64_t next_event = te_ctx_next_event_get(ctx);
uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx);
assert(last_event != next_event);
if (next_event > THREAD_NEXT_EVENT_FAST_MAX ||
!tsd_fast(tsd)) {
if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) {
assert(next_event_fast == 0U);
} else {
assert(next_event_fast == next_event);
@ -158,27 +156,26 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
/* The subtraction is intentionally susceptible to underflow. */
assert(current_bytes - last_event < interval);
uint64_t min_wait = thread_next_event_compute(tsd,
event_ctx_is_alloc(ctx));
uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
/*
* next_event should have been pushed up only except when no event is
* on and the TSD is just initialized. The last_event == 0U guard
* below is stronger than needed, but having an exactly accurate guard
* is more complicated to implement.
*/
assert((!event_ctx_has_active_events(ctx) && last_event == 0U) ||
assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
interval == min_wait ||
(interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
(interval < min_wait && interval == TE_MAX_INTERVAL));
}
void
thread_event_assert_invariants_debug(tsd_t *tsd) {
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, true);
thread_event_assert_invariants_impl(tsd, &ctx);
te_assert_invariants_debug(tsd_t *tsd) {
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, true);
te_assert_invariants_impl(tsd, &ctx);
event_ctx_get(tsd, &ctx, false);
thread_event_assert_invariants_impl(tsd, &ctx);
te_ctx_get(tsd, &ctx, false);
te_assert_invariants_impl(tsd, &ctx);
}
/*
@ -229,66 +226,65 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
* it down the slow path earlier).
*/
static void
event_ctx_next_event_fast_update(event_ctx_t *ctx) {
uint64_t next_event = event_ctx_next_event_get(ctx);
uint64_t next_event_fast = (next_event <=
THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
event_ctx_next_event_fast_set(ctx, next_event_fast);
te_ctx_next_event_fast_update(te_ctx_t *ctx) {
uint64_t next_event = te_ctx_next_event_get(ctx);
uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
next_event : 0U;
te_ctx_next_event_fast_set(ctx, next_event_fast);
}
void
thread_event_recompute_fast_threshold(tsd_t *tsd) {
te_recompute_fast_threshold(tsd_t *tsd) {
if (tsd_state_get(tsd) != tsd_state_nominal) {
/* Check first because this is also called on purgatory. */
thread_next_event_fast_set_non_nominal(tsd);
te_next_event_fast_set_non_nominal(tsd);
return;
}
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, true);
event_ctx_next_event_fast_update(&ctx);
event_ctx_get(tsd, &ctx, false);
event_ctx_next_event_fast_update(&ctx);
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, true);
te_ctx_next_event_fast_update(&ctx);
te_ctx_get(tsd, &ctx, false);
te_ctx_next_event_fast_update(&ctx);
atomic_fence(ATOMIC_SEQ_CST);
if (tsd_state_get(tsd) != tsd_state_nominal) {
thread_next_event_fast_set_non_nominal(tsd);
te_next_event_fast_set_non_nominal(tsd);
}
}
static void
thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx,
te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
uint64_t wait) {
assert(wait <= THREAD_EVENT_MAX_START_WAIT);
uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <=
THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
event_ctx_next_event_set(tsd, ctx, next_event);
assert(wait <= TE_MAX_START_WAIT);
uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
te_ctx_next_event_set(tsd, ctx, next_event);
}
static uint64_t
thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
bool is_alloc, bool allow_event_trigger) {
uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
bool allow_event_trigger) {
uint64_t wait = TE_MAX_START_WAIT;
#define E(event, condition, alloc_event) \
if (is_alloc == alloc_event && condition) { \
uint64_t event_wait = event##_event_wait_get(tsd); \
assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \
assert(event_wait <= TE_MAX_START_WAIT); \
if (event_wait > accumbytes) { \
event_wait -= accumbytes; \
} else { \
event_wait = 0U; \
if (!allow_event_trigger) { \
event_wait = \
THREAD_EVENT_MIN_START_WAIT; \
event_wait = TE_MIN_START_WAIT; \
} \
} \
assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \
assert(event_wait <= TE_MAX_START_WAIT); \
event##_event_wait_set(tsd, event_wait); \
/* \
* If there is a single event, then the remaining wait \
* time may become zero, and we rely on either the \
* event handler or a thread_event_update() call later \
* event handler or a te_event_update() call later \
* to properly set next_event; if there are multiple \
* events, then here we can get the minimum remaining \
* wait time to the next already set event. \
@ -301,72 +297,64 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
ITERATE_OVER_ALL_EVENTS
#undef E
assert(wait <= THREAD_EVENT_MAX_START_WAIT);
assert(wait <= TE_MAX_START_WAIT);
return wait;
}
void
thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) {
te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
/* usize has already been added to thread_allocated. */
uint64_t bytes_after = event_ctx_current_bytes_get(ctx);
uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
/* The subtraction is intentionally susceptible to underflow. */
uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx);
uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
/* Make sure that accumbytes cannot overflow uint64_t. */
assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
event_ctx_last_event_set(ctx, bytes_after);
te_ctx_last_event_set(ctx, bytes_after);
bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
tsd_reentrancy_level_get(tsd) == 0;
bool is_alloc = ctx->is_alloc;
uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
is_alloc, allow_event_trigger);
thread_event_adjust_thresholds_helper(tsd, ctx, wait);
thread_event_assert_invariants(tsd);
uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc,
allow_event_trigger);
te_adjust_thresholds_helper(tsd, ctx, wait);
te_assert_invariants(tsd);
#define E(event, condition, alloc_event) \
if (is_alloc == alloc_event && condition && \
event##_event_wait_get(tsd) == 0U) { \
assert(allow_event_trigger); \
thread_##event##_event_handler(tsd); \
te_##event##_event_handler(tsd); \
}
ITERATE_OVER_ALL_EVENTS
#undef E
thread_event_assert_invariants(tsd);
te_assert_invariants(tsd);
}
void
thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
thread_event_assert_invariants(tsd);
te_alloc_rollback(tsd_t *tsd, size_t diff) {
te_assert_invariants(tsd);
if (diff == 0U) {
return;
}
/* Rollback happens only on alloc events. */
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, true);
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, true);
uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx);
uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx);
/* The subtraction is intentionally susceptible to underflow. */
uint64_t thread_allocated_rollback = thread_allocated - diff;
event_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
te_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
uint64_t last_event = event_ctx_last_event_get(&ctx);
uint64_t last_event = te_ctx_last_event_get(&ctx);
/* Both subtractions are intentionally susceptible to underflow. */
if (thread_allocated_rollback - last_event <=
thread_allocated - last_event) {
thread_event_assert_invariants(tsd);
te_assert_invariants(tsd);
return;
}
event_ctx_last_event_set(&ctx, thread_allocated_rollback);
te_ctx_last_event_set(&ctx, thread_allocated_rollback);
/* The subtraction is intentionally susceptible to underflow. */
uint64_t wait_diff = last_event - thread_allocated_rollback;
assert(wait_diff <= diff);
@ -374,49 +362,48 @@ thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
#define E(event, condition, alloc_event) \
if (alloc_event == true && condition) { \
uint64_t event_wait = event##_event_wait_get(tsd); \
assert(event_wait <= THREAD_EVENT_MAX_START_WAIT); \
assert(event_wait <= TE_MAX_START_WAIT); \
if (event_wait > 0U) { \
if (wait_diff > \
THREAD_EVENT_MAX_START_WAIT - event_wait) { \
event_wait = \
THREAD_EVENT_MAX_START_WAIT; \
if (wait_diff > TE_MAX_START_WAIT - event_wait) {\
event_wait = TE_MAX_START_WAIT; \
} else { \
event_wait += wait_diff; \
} \
assert(event_wait <= \
THREAD_EVENT_MAX_START_WAIT); \
assert(event_wait <= TE_MAX_START_WAIT); \
event##_event_wait_set(tsd, event_wait); \
} \
}
ITERATE_OVER_ALL_EVENTS
#undef E
thread_event_update(tsd, true);
te_event_update(tsd, true);
}
void
thread_event_update(tsd_t *tsd, bool is_alloc) {
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, is_alloc);
te_event_update(tsd_t *tsd, bool is_alloc) {
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, is_alloc);
uint64_t wait = thread_next_event_compute(tsd, is_alloc);
thread_event_adjust_thresholds_helper(tsd, &ctx, wait);
uint64_t wait = te_next_event_compute(tsd, is_alloc);
te_adjust_thresholds_helper(tsd, &ctx, wait);
uint64_t last_event = event_ctx_last_event_get(&ctx);
uint64_t last_event = te_ctx_last_event_get(&ctx);
/* Both subtractions are intentionally susceptible to underflow. */
if (event_ctx_current_bytes_get(&ctx) - last_event >=
event_ctx_next_event_get(&ctx) - last_event) {
thread_event_trigger(tsd, &ctx, true);
if (te_ctx_current_bytes_get(&ctx) - last_event >=
te_ctx_next_event_get(&ctx) - last_event) {
te_event_trigger(tsd, &ctx, true);
} else {
thread_event_assert_invariants(tsd);
te_assert_invariants(tsd);
}
}
void tsd_thread_event_init(tsd_t *tsd) {
void tsd_te_init(tsd_t *tsd) {
/* Make sure no overflow for the bytes accumulated on event_trigger. */
assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
#define E(event, condition, is_alloc_event_unused) \
if (condition) { \
tsd_thread_##event##_event_init(tsd); \
te_tsd_##event##_event_init(tsd); \
}
ITERATE_OVER_ALL_EVENTS

View File

@ -117,9 +117,9 @@ tsd_force_recompute(tsdn_t *tsdn) {
<= tsd_state_nominal_max);
tsd_atomic_store(&remote_tsd->state,
tsd_state_nominal_recompute, ATOMIC_RELAXED);
/* See comments in thread_event_recompute_fast_threshold(). */
/* See comments in te_recompute_fast_threshold(). */
atomic_fence(ATOMIC_SEQ_CST);
thread_next_event_fast_set_non_nominal(remote_tsd);
te_next_event_fast_set_non_nominal(remote_tsd);
}
malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
}
@ -179,7 +179,7 @@ tsd_slow_update(tsd_t *tsd) {
ATOMIC_ACQUIRE);
} while (old_state == tsd_state_nominal_recompute);
thread_event_recompute_fast_threshold(tsd);
te_recompute_fast_threshold(tsd);
}
void
@ -218,7 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
tsd_slow_update(tsd);
}
}
thread_event_recompute_fast_threshold(tsd);
te_recompute_fast_threshold(tsd);
}
static bool
@ -240,7 +240,7 @@ tsd_data_init(tsd_t *tsd) {
(uint64_t)(uintptr_t)tsd;
/* event_init may use the prng state above. */
tsd_thread_event_init(tsd);
tsd_te_init(tsd);
return tsd_tcache_enabled_data_init(tsd);
}

View File

@ -2,17 +2,15 @@
TEST_BEGIN(test_next_event_fast_roll_back) {
tsd_t *tsd = tsd_fetch();
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, true);
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, true);
event_ctx_last_event_set(&ctx, 0);
event_ctx_current_bytes_set(&ctx,
THREAD_NEXT_EVENT_FAST_MAX - 8U);
event_ctx_next_event_set(tsd, &ctx,
THREAD_NEXT_EVENT_FAST_MAX);
te_ctx_last_event_set(&ctx, 0);
te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
#define E(event, condition, is_alloc) \
if (is_alloc && condition) { \
event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\
event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX); \
}
ITERATE_OVER_ALL_EVENTS
#undef E
@ -25,18 +23,16 @@ TEST_END
TEST_BEGIN(test_next_event_fast_resume) {
tsd_t *tsd = tsd_fetch();
event_ctx_t ctx;
event_ctx_get(tsd, &ctx, true);
te_ctx_t ctx;
te_ctx_get(tsd, &ctx, true);
event_ctx_last_event_set(&ctx, 0);
event_ctx_current_bytes_set(&ctx,
THREAD_NEXT_EVENT_FAST_MAX + 8U);
event_ctx_next_event_set(tsd, &ctx,
THREAD_NEXT_EVENT_FAST_MAX + 16U);
te_ctx_last_event_set(&ctx, 0);
te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U);
te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U);
#define E(event, condition, is_alloc) \
if (is_alloc && condition) { \
event##_event_wait_set(tsd, \
THREAD_NEXT_EVENT_FAST_MAX + 16U); \
TE_NEXT_EVENT_FAST_MAX + 16U); \
}
ITERATE_OVER_ALL_EVENTS
#undef E
@ -48,11 +44,11 @@ TEST_END
TEST_BEGIN(test_event_rollback) {
tsd_t *tsd = tsd_fetch();
const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2;
const uint64_t diff = TE_MAX_INTERVAL >> 2;
size_t count = 10;
uint64_t thread_allocated = thread_allocated_get(tsd);
while (count-- != 0) {
thread_alloc_event_rollback(tsd, diff);
te_alloc_rollback(tsd, diff);
uint64_t thread_allocated_after = thread_allocated_get(tsd);
assert_u64_eq(thread_allocated - thread_allocated_after, diff,
"thread event counters are not properly rolled back");