diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index a4523ae0..a71f9446 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -49,7 +49,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, void arena_reset(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); + cache_bin_t *tbin, szind_t binind); void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero); diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index 9abf7f6a..27434c30 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -21,17 +21,6 @@ arena_internal_get(arena_t *arena) { return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); } -static inline bool -arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - - if (likely(prof_interval == 0 || !prof_active_get_unlocked())) { - return false; - } - - return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); -} - static inline void percpu_arena_update(tsd_t *tsd, unsigned cpu) { assert(have_percpu_arena); diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h index 70d6e578..f0b73d02 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { if (tcache_available(tsd)) { tcache_t *tcache = tcache_get(tsd); if (tcache->arena != NULL) { - /* See comments in tcache_data_init().*/ + /* See comments in tsd_tcache_data_init().*/ assert(tcache->arena == arena_get(tsd_tsdn(tsd), 0, false)); if (tcache->arena != ret) { diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index 7befad64..94fbd752 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -33,13 +33,7 @@ extern bool prof_active; /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ extern bool prof_gdump_val; -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ +/* Profile dump interval, measured in bytes allocated. */ extern uint64_t prof_interval; /* @@ -50,6 +44,10 @@ extern size_t lg_prof_sample; extern bool prof_booted; +/* Functions only accessed in prof_inlines_a.h */ +bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes); +void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize); + void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx); @@ -73,7 +71,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, #endif int prof_getpid(void); void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind); -bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); +bool prof_accum_init(tsdn_t *tsdn); void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h index 6716d2f4..61773a2b 100644 --- a/include/jemalloc/internal/prof_inlines_a.h +++ b/include/jemalloc/internal/prof_inlines_a.h @@ -3,74 +3,6 @@ #include "jemalloc/internal/mutex.h" -static inline bool -prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, - uint64_t accumbytes) { - cassert(config_prof); - - bool overflow; - uint64_t a0, a1; - - /* - * If the application allocates fast enough (and/or if idump is slow - * enough), extreme overflow here (a1 >= prof_interval * 2) can cause - * idump trigger coalescing. This is an intentional mechanism that - * avoids rate-limiting allocation. - */ -#ifdef JEMALLOC_ATOMIC_U64 - a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); - do { - a1 = a0 + accumbytes; - assert(a1 >= a0); - overflow = (a1 >= prof_interval); - if (overflow) { - a1 %= prof_interval; - } - } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, - a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); -#else - malloc_mutex_lock(tsdn, &prof_accum->mtx); - a0 = prof_accum->accumbytes; - a1 = a0 + accumbytes; - overflow = (a1 >= prof_interval); - if (overflow) { - a1 %= prof_interval; - } - prof_accum->accumbytes = a1; - malloc_mutex_unlock(tsdn, &prof_accum->mtx); -#endif - return overflow; -} - -static inline void -prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, - size_t usize) { - cassert(config_prof); - - /* - * Cancel out as much of the excessive prof_accumbytes increase as - * possible without underflowing. Interval-triggered dumps occur - * slightly more often than intended as a result of incomplete - * canceling. - */ - uint64_t a0, a1; -#ifdef JEMALLOC_ATOMIC_U64 - a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); - do { - a1 = (a0 >= SC_LARGE_MINCLASS - usize) - ? a0 - (SC_LARGE_MINCLASS - usize) : 0; - } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, - a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); -#else - malloc_mutex_lock(tsdn, &prof_accum->mtx); - a0 = prof_accum->accumbytes; - a1 = (a0 >= SC_LARGE_MINCLASS - usize) - ? a0 - (SC_LARGE_MINCLASS - usize) : 0; - prof_accum->accumbytes = a1; - malloc_mutex_unlock(tsdn, &prof_accum->mtx); -#endif -} - JEMALLOC_ALWAYS_INLINE void prof_active_assert() { cassert(config_prof); @@ -93,4 +25,26 @@ prof_active_get_unlocked(void) { return prof_active; } +JEMALLOC_ALWAYS_INLINE bool +prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) { + cassert(config_prof); + + if (prof_interval == 0 || !prof_active_get_unlocked()) { + return false; + } + + return prof_idump_accum_impl(tsdn, accumbytes); +} + +JEMALLOC_ALWAYS_INLINE void +prof_idump_rollback(tsdn_t *tsdn, size_t usize) { + cassert(config_prof); + + if (prof_interval == 0 || !prof_active_get_unlocked()) { + return; + } + + prof_idump_rollback_impl(tsdn, usize); +} + #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 8988ae9d..85c6cc46 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -93,9 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, if (config_stats) { bin->tstats.nrequests++; } - if (config_prof) { - tcache->prof_accumbytes += usize; - } tcache_event(tsd, tcache); return ret; } @@ -151,9 +148,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, if (config_stats) { bin->tstats.nrequests++; } - if (config_prof) { - tcache->prof_accumbytes += usize; - } } tcache_event(tsd, tcache); diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h index 008b1f73..98d3ef70 100644 --- a/include/jemalloc/internal/tcache_structs.h +++ b/include/jemalloc/internal/tcache_structs.h @@ -16,10 +16,9 @@ struct tcache_s { * together at the start of this struct. */ - /* Cleared after arena_prof_accum(). */ - uint64_t prof_accumbytes; /* Drives incremental GC. */ ticker_t gc_ticker; + /* * The pointer stacks associated with bins follow as a contiguous array. * During tcache initialization, the avail pointer in each element of diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h index dce69382..60261fc0 100644 --- a/include/jemalloc/internal/tcache_types.h +++ b/include/jemalloc/internal/tcache_types.h @@ -47,8 +47,8 @@ typedef struct tcaches_s tcaches_t; #define TCACHE_GC_INCR \ ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1)) -/* Used in TSD static initializer only. Real init in tcache_data_init(). */ -#define TCACHE_ZERO_INITIALIZER {0} +/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER {{0}} /* Used in TSD static initializer only. Will be initialized to opt_tcache. */ #define TCACHE_ENABLED_ZERO_INITIALIZER false diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h index 08678b74..6aa334fc 100644 --- a/include/jemalloc/internal/thread_event.h +++ b/include/jemalloc/internal/thread_event.h @@ -44,7 +44,8 @@ void thread_event_boot(); C(thread_allocated_next_event_fast) \ C(thread_allocated_last_event) \ C(thread_allocated_next_event) \ - ITERATE_OVER_ALL_EVENTS + ITERATE_OVER_ALL_EVENTS \ + C(prof_sample_last_event) /* Getters directly wrap TSD getters. */ #define C(counter) \ diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 14ad53d7..60500df7 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -30,6 +30,7 @@ * l: thread_allocated_last_event * j: thread_allocated_next_event * w: prof_sample_event_wait (config_prof) + * x: prof_sample_last_event (config_prof) * p: prof_tdata (config_prof) * v: offset_state * i: iarena @@ -45,11 +46,11 @@ * |---------------------------- 2nd cacheline ----------------------------| * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | * |---------------------------- 3nd cacheline ----------------------------| - * | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp | + * | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx | * +---------------------------- 4th cacheline ----------------------------+ - * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ | + * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ | * +---------------------------- 5th cacheline ----------------------------+ - * | ..b][t.. ........ ........ ........ ........ ........ ........ ........ | + * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ | * +-------------------------------------------------------------------------+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines. * @@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *); O(thread_allocated_last_event, uint64_t, uint64_t) \ O(thread_allocated_next_event, uint64_t, uint64_t) \ O(prof_sample_event_wait, uint64_t, uint64_t) \ + O(prof_sample_last_event, uint64_t, uint64_t) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ O(offset_state, uint64_t, uint64_t) \ O(iarena, arena_t *, arena_t *) \ @@ -109,9 +111,10 @@ typedef void (*test_callback_t)(int *); /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT, \ /* thread_deallocated */ 0, \ /* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \ - /* thread_allocated_last_event */ 0, \ + /* thread_allocated_last_event */ 0, \ /* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \ /* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ + /* prof_sample_last_event */ 0, \ /* prof_tdata */ NULL, \ /* offset_state */ 0, \ /* iarena */ NULL, \ diff --git a/src/arena.c b/src/arena.c index e096f3a6..a60a6843 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1378,13 +1378,10 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind, void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { + cache_bin_t *tbin, szind_t binind) { unsigned i, nfill, cnt; assert(cache_bin_ncached_get(tbin, binind) == 0); - if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) { - prof_idump(tsdn); - } tcache->bin_refilled[binind] = true; unsigned binshard; @@ -1484,10 +1481,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { bin->stats.nrequests++; bin->stats.curregs++; } + malloc_mutex_unlock(tsdn, &bin->lock); - if (config_prof && arena_prof_accum(tsdn, arena, usize)) { - prof_idump(tsdn); - } if (!zero) { if (config_fill) { @@ -1565,14 +1560,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); - arena_t *arena = arena_get_from_extent(extent); szind_t szind = sz_size2index(usize); extent_szind_set(extent, szind); rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, szind, false); - prof_accum_cancel(tsdn, &arena->prof_accum, usize); + prof_idump_rollback(tsdn, usize); assert(isalloc(tsdn, ptr) == usize); } @@ -1982,7 +1976,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } if (config_prof) { - if (prof_accum_init(tsdn, &arena->prof_accum)) { + if (prof_accum_init(tsdn)) { goto label_error; } } diff --git a/src/jemalloc.c b/src/jemalloc.c index 63a1e302..264b3f3f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2386,9 +2386,6 @@ je_malloc(size_t size) { if (config_stats) { bin->tstats.nrequests++; } - if (config_prof) { - tcache->prof_accumbytes += usize; - } LOG("core.malloc.exit", "result: %p", ret); diff --git a/src/large.c b/src/large.c index 13d8e56c..8aaa3ce2 100644 --- a/src/large.c +++ b/src/large.c @@ -56,9 +56,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, extent_list_append(&arena->large, extent); malloc_mutex_unlock(tsdn, &arena->large_mtx); } - if (config_prof && arena_prof_accum(tsdn, arena, usize)) { - prof_idump(tsdn); - } if (zero) { assert(is_zeroed); diff --git a/src/prof.c b/src/prof.c index 7e219dc3..5360662b 100644 --- a/src/prof.c +++ b/src/prof.c @@ -45,6 +45,9 @@ bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[PROF_DUMP_FILENAME_LEN]; +/* Accessed via prof_idump_[accum/rollback](). */ +static prof_accum_t prof_idump_accumulated; + /* * Initialized as opt_prof_active, and accessed via * prof_active_[gs]et{_unlocked,}(). @@ -586,21 +589,91 @@ prof_fdump(void) { } bool -prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { +prof_accum_init(tsdn_t *tsdn) { cassert(config_prof); #ifndef JEMALLOC_ATOMIC_U64 - if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", + if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum", WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) { return true; } - prof_accum->accumbytes = 0; + prof_idump_accumulated.accumbytes = 0; #else - atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED); + atomic_store_u64(&prof_idump_accumulated.accumbytes, 0, + ATOMIC_RELAXED); #endif return false; } +bool +prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes, + ATOMIC_RELAXED); + do { + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (!atomic_compare_exchange_weak_u64( + &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED, + ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx); + a0 = prof_idump_accumulated.accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_idump_accumulated.accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx); +#endif + return overflow; +} + +void +prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive accumbytes increase as possible + * without underflowing. Interval-triggered dumps occur slightly more + * often than intended as a result of incomplete canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes, + ATOMIC_RELAXED); + do { + a1 = (a0 >= SC_LARGE_MINCLASS - usize) + ? a0 - (SC_LARGE_MINCLASS - usize) : 0; + } while (!atomic_compare_exchange_weak_u64( + &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED, + ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx); + a0 = prof_idump_accumulated.accumbytes; + a1 = (a0 >= SC_LARGE_MINCLASS - usize) + ? a0 - (SC_LARGE_MINCLASS - usize) : 0; + prof_idump_accumulated.accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx); +#endif +} + bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) { cassert(config_prof); @@ -641,7 +714,7 @@ prof_idump(tsdn_t *tsdn) { return; } - tdata = prof_tdata_get(tsd, false); + tdata = prof_tdata_get(tsd, true); if (tdata == NULL) { return; } diff --git a/src/tcache.c b/src/tcache.c index e17b67a3..7758c4f2 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -106,11 +106,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, void *ret; assert(tcache->arena != NULL); - arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind, - config_prof ? tcache->prof_accumbytes : 0); - if (config_prof) { - tcache->prof_accumbytes = 0; - } + arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind); ret = cache_bin_alloc_easy(tbin, tcache_success, binind); return ret; @@ -181,14 +177,6 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, assert(binshard < bin_infos[binind].n_shards); bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard]; - if (config_prof && bin_arena == arena) { - if (arena_prof_accum(tsd_tsdn(tsd), arena, - tcache->prof_accumbytes)) { - prof_idump(tsd_tsdn(tsd)); - } - tcache->prof_accumbytes = 0; - } - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); if (config_stats && bin_arena == arena && !merged_stats) { merged_stats = true; @@ -274,11 +262,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t unsigned locked_arena_ind = extent_arena_ind_get(extent); arena_t *locked_arena = arena_get(tsd_tsdn(tsd), locked_arena_ind, false); - bool idump; - - if (config_prof) { - idump = false; - } bool lock_large = !arena_is_auto(locked_arena); if (lock_large) { @@ -295,11 +278,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t } if ((config_prof || config_stats) && (locked_arena == tcache_arena)) { - if (config_prof) { - idump = arena_prof_accum(tsd_tsdn(tsd), - tcache_arena, tcache->prof_accumbytes); - tcache->prof_accumbytes = 0; - } if (config_stats) { merged_stats = true; arena_stats_large_flush_nrequests_add( @@ -332,9 +310,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t ndeferred++; } } - if (config_prof && idump) { - prof_idump(tsd_tsdn(tsd)); - } arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush - ndeferred); nflush = ndeferred; @@ -462,7 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { assert(!tcache_bin_lowbits_overflowable(avail_stack)); memset(&tcache->link, 0, sizeof(ql_elm(tcache_t))); - tcache->prof_accumbytes = 0; tcache->next_gc_bin = 0; tcache->arena = NULL; @@ -590,14 +564,6 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { assert(tbin->tstats.nrequests == 0); } } - - if (config_prof && tcache->prof_accumbytes > 0) { - if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena, - tcache->prof_accumbytes)) { - prof_idump(tsd_tsdn(tsd)); - } - tcache->prof_accumbytes = 0; - } } void diff --git a/src/thread_event.c b/src/thread_event.c index c6542f46..312dff26 100644 --- a/src/thread_event.c +++ b/src/thread_event.c @@ -18,6 +18,29 @@ static void thread_##event##_event_handler(tsd_t *tsd); ITERATE_OVER_ALL_EVENTS #undef E +static void +thread_prof_sample_event_handler(tsd_t *tsd) { + assert(config_prof && opt_prof); + assert(prof_sample_event_wait_get(tsd) == 0U); + uint64_t last_event = thread_allocated_last_event_get(tsd); + uint64_t last_sample_event = prof_sample_last_event_get(tsd); + prof_sample_last_event_set(tsd, last_event); + if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) { + prof_idump(tsd_tsdn(tsd)); + } + if (!prof_active_get_unlocked()) { + /* + * If prof_active is off, we reset prof_sample_event_wait to be + * the sample interval when it drops to 0, so that there won't + * be excessive routings to the slow path, and that when + * prof_active is turned on later, the counting for sampling + * can immediately resume as normal. + */ + thread_prof_sample_event_update(tsd, + (uint64_t)(1 << lg_prof_sample)); + } +} + static uint64_t thread_allocated_next_event_compute(tsd_t *tsd) { uint64_t wait = THREAD_EVENT_MAX_START_WAIT; @@ -86,23 +109,6 @@ thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) { thread_allocated_next_event_fast_set(tsd, next_event_fast); } -static void -thread_prof_sample_event_handler(tsd_t *tsd) { - assert(config_prof && opt_prof); - assert(prof_sample_event_wait_get(tsd) == 0U); - if (!prof_active_get_unlocked()) { - /* - * If prof_active is off, we reset prof_sample_event_wait to be - * the sample interval when it drops to 0, so that there won't - * be excessive routings to the slow path, and that when - * prof_active is turned on later, the counting for sampling - * can immediately resume as normal. - */ - thread_prof_sample_event_update(tsd, - (uint64_t)(1 << lg_prof_sample)); - } -} - static uint64_t thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes, bool allow_event_trigger) {