Pull prof_accumbytes into thread event handler
This commit is contained in:
@@ -49,7 +49,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
|
||||
void arena_reset(tsd_t *tsd, arena_t *arena);
|
||||
void arena_destroy(tsd_t *tsd, arena_t *arena);
|
||||
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
|
||||
cache_bin_t *tbin, szind_t binind);
|
||||
void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
|
||||
bool zero);
|
||||
|
||||
|
@@ -21,17 +21,6 @@ arena_internal_get(arena_t *arena) {
|
||||
return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
|
||||
cassert(config_prof);
|
||||
|
||||
if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
|
||||
}
|
||||
|
||||
static inline void
|
||||
percpu_arena_update(tsd_t *tsd, unsigned cpu) {
|
||||
assert(have_percpu_arena);
|
||||
|
@@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
|
||||
if (tcache_available(tsd)) {
|
||||
tcache_t *tcache = tcache_get(tsd);
|
||||
if (tcache->arena != NULL) {
|
||||
/* See comments in tcache_data_init().*/
|
||||
/* See comments in tsd_tcache_data_init().*/
|
||||
assert(tcache->arena ==
|
||||
arena_get(tsd_tsdn(tsd), 0, false));
|
||||
if (tcache->arena != ret) {
|
||||
|
@@ -33,13 +33,7 @@ extern bool prof_active;
|
||||
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
|
||||
extern bool prof_gdump_val;
|
||||
|
||||
/*
|
||||
* Profile dump interval, measured in bytes allocated. Each arena triggers a
|
||||
* profile dump when it reaches this threshold. The effect is that the
|
||||
* interval between profile dumps averages prof_interval, though the actual
|
||||
* interval between dumps will tend to be sporadic, and the interval will be a
|
||||
* maximum of approximately (prof_interval * narenas).
|
||||
*/
|
||||
/* Profile dump interval, measured in bytes allocated. */
|
||||
extern uint64_t prof_interval;
|
||||
|
||||
/*
|
||||
@@ -50,6 +44,10 @@ extern size_t lg_prof_sample;
|
||||
|
||||
extern bool prof_booted;
|
||||
|
||||
/* Functions only accessed in prof_inlines_a.h */
|
||||
bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
|
||||
void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
|
||||
|
||||
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
|
||||
void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
|
||||
prof_tctx_t *tctx);
|
||||
@@ -73,7 +71,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
|
||||
#endif
|
||||
int prof_getpid(void);
|
||||
void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
|
||||
bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
|
||||
bool prof_accum_init(tsdn_t *tsdn);
|
||||
void prof_idump(tsdn_t *tsdn);
|
||||
bool prof_mdump(tsd_t *tsd, const char *filename);
|
||||
void prof_gdump(tsdn_t *tsdn);
|
||||
|
@@ -3,74 +3,6 @@
|
||||
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
|
||||
static inline bool
|
||||
prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
|
||||
uint64_t accumbytes) {
|
||||
cassert(config_prof);
|
||||
|
||||
bool overflow;
|
||||
uint64_t a0, a1;
|
||||
|
||||
/*
|
||||
* If the application allocates fast enough (and/or if idump is slow
|
||||
* enough), extreme overflow here (a1 >= prof_interval * 2) can cause
|
||||
* idump trigger coalescing. This is an intentional mechanism that
|
||||
* avoids rate-limiting allocation.
|
||||
*/
|
||||
#ifdef JEMALLOC_ATOMIC_U64
|
||||
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
|
||||
do {
|
||||
a1 = a0 + accumbytes;
|
||||
assert(a1 >= a0);
|
||||
overflow = (a1 >= prof_interval);
|
||||
if (overflow) {
|
||||
a1 %= prof_interval;
|
||||
}
|
||||
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
|
||||
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
|
||||
#else
|
||||
malloc_mutex_lock(tsdn, &prof_accum->mtx);
|
||||
a0 = prof_accum->accumbytes;
|
||||
a1 = a0 + accumbytes;
|
||||
overflow = (a1 >= prof_interval);
|
||||
if (overflow) {
|
||||
a1 %= prof_interval;
|
||||
}
|
||||
prof_accum->accumbytes = a1;
|
||||
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
|
||||
#endif
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static inline void
|
||||
prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
|
||||
size_t usize) {
|
||||
cassert(config_prof);
|
||||
|
||||
/*
|
||||
* Cancel out as much of the excessive prof_accumbytes increase as
|
||||
* possible without underflowing. Interval-triggered dumps occur
|
||||
* slightly more often than intended as a result of incomplete
|
||||
* canceling.
|
||||
*/
|
||||
uint64_t a0, a1;
|
||||
#ifdef JEMALLOC_ATOMIC_U64
|
||||
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
|
||||
do {
|
||||
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
|
||||
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
|
||||
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
|
||||
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
|
||||
#else
|
||||
malloc_mutex_lock(tsdn, &prof_accum->mtx);
|
||||
a0 = prof_accum->accumbytes;
|
||||
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
|
||||
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
|
||||
prof_accum->accumbytes = a1;
|
||||
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
|
||||
#endif
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
prof_active_assert() {
|
||||
cassert(config_prof);
|
||||
@@ -93,4 +25,26 @@ prof_active_get_unlocked(void) {
|
||||
return prof_active;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
|
||||
cassert(config_prof);
|
||||
|
||||
if (prof_interval == 0 || !prof_active_get_unlocked()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return prof_idump_accum_impl(tsdn, accumbytes);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
|
||||
cassert(config_prof);
|
||||
|
||||
if (prof_interval == 0 || !prof_active_get_unlocked()) {
|
||||
return;
|
||||
}
|
||||
|
||||
prof_idump_rollback_impl(tsdn, usize);
|
||||
}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
|
||||
|
@@ -93,9 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
|
||||
if (config_stats) {
|
||||
bin->tstats.nrequests++;
|
||||
}
|
||||
if (config_prof) {
|
||||
tcache->prof_accumbytes += usize;
|
||||
}
|
||||
tcache_event(tsd, tcache);
|
||||
return ret;
|
||||
}
|
||||
@@ -151,9 +148,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
||||
if (config_stats) {
|
||||
bin->tstats.nrequests++;
|
||||
}
|
||||
if (config_prof) {
|
||||
tcache->prof_accumbytes += usize;
|
||||
}
|
||||
}
|
||||
|
||||
tcache_event(tsd, tcache);
|
||||
|
@@ -16,10 +16,9 @@ struct tcache_s {
|
||||
* together at the start of this struct.
|
||||
*/
|
||||
|
||||
/* Cleared after arena_prof_accum(). */
|
||||
uint64_t prof_accumbytes;
|
||||
/* Drives incremental GC. */
|
||||
ticker_t gc_ticker;
|
||||
|
||||
/*
|
||||
* The pointer stacks associated with bins follow as a contiguous array.
|
||||
* During tcache initialization, the avail pointer in each element of
|
||||
|
@@ -47,8 +47,8 @@ typedef struct tcaches_s tcaches_t;
|
||||
#define TCACHE_GC_INCR \
|
||||
((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
|
||||
|
||||
/* Used in TSD static initializer only. Real init in tcache_data_init(). */
|
||||
#define TCACHE_ZERO_INITIALIZER {0}
|
||||
/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
|
||||
#define TCACHE_ZERO_INITIALIZER {{0}}
|
||||
|
||||
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
|
||||
#define TCACHE_ENABLED_ZERO_INITIALIZER false
|
||||
|
@@ -44,7 +44,8 @@ void thread_event_boot();
|
||||
C(thread_allocated_next_event_fast) \
|
||||
C(thread_allocated_last_event) \
|
||||
C(thread_allocated_next_event) \
|
||||
ITERATE_OVER_ALL_EVENTS
|
||||
ITERATE_OVER_ALL_EVENTS \
|
||||
C(prof_sample_last_event)
|
||||
|
||||
/* Getters directly wrap TSD getters. */
|
||||
#define C(counter) \
|
||||
|
@@ -30,6 +30,7 @@
|
||||
* l: thread_allocated_last_event
|
||||
* j: thread_allocated_next_event
|
||||
* w: prof_sample_event_wait (config_prof)
|
||||
* x: prof_sample_last_event (config_prof)
|
||||
* p: prof_tdata (config_prof)
|
||||
* v: offset_state
|
||||
* i: iarena
|
||||
@@ -45,11 +46,11 @@
|
||||
* |---------------------------- 2nd cacheline ----------------------------|
|
||||
* | [c * 64 ........ ........ ........ ........ ........ ........ .......] |
|
||||
* |---------------------------- 3nd cacheline ----------------------------|
|
||||
* | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp |
|
||||
* | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
|
||||
* +---------------------------- 4th cacheline ----------------------------+
|
||||
* | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ |
|
||||
* | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
|
||||
* +---------------------------- 5th cacheline ----------------------------+
|
||||
* | ..b][t.. ........ ........ ........ ........ ........ ........ ........ |
|
||||
* | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
|
||||
* +-------------------------------------------------------------------------+
|
||||
* Note: the entire tcache is embedded into TSD and spans multiple cachelines.
|
||||
*
|
||||
@@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
|
||||
O(thread_allocated_last_event, uint64_t, uint64_t) \
|
||||
O(thread_allocated_next_event, uint64_t, uint64_t) \
|
||||
O(prof_sample_event_wait, uint64_t, uint64_t) \
|
||||
O(prof_sample_last_event, uint64_t, uint64_t) \
|
||||
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
|
||||
O(offset_state, uint64_t, uint64_t) \
|
||||
O(iarena, arena_t *, arena_t *) \
|
||||
@@ -109,9 +111,10 @@ typedef void (*test_callback_t)(int *);
|
||||
/* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT, \
|
||||
/* thread_deallocated */ 0, \
|
||||
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \
|
||||
/* thread_allocated_last_event */ 0, \
|
||||
/* thread_allocated_last_event */ 0, \
|
||||
/* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \
|
||||
/* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
|
||||
/* prof_sample_last_event */ 0, \
|
||||
/* prof_tdata */ NULL, \
|
||||
/* offset_state */ 0, \
|
||||
/* iarena */ NULL, \
|
||||
|
Reference in New Issue
Block a user