Pull prof_accumbytes into thread event handler

This commit is contained in:
Yinan Zhang 2019-10-14 09:35:51 -07:00
parent 152c0ef954
commit 198f02e797
16 changed files with 148 additions and 177 deletions

View File

@ -49,7 +49,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void arena_reset(tsd_t *tsd, arena_t *arena); void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); cache_bin_t *tbin, szind_t binind);
void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
bool zero); bool zero);

View File

@ -21,17 +21,6 @@ arena_internal_get(arena_t *arena) {
return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
} }
static inline bool
arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
return false;
}
return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
}
static inline void static inline void
percpu_arena_update(tsd_t *tsd, unsigned cpu) { percpu_arena_update(tsd_t *tsd, unsigned cpu) {
assert(have_percpu_arena); assert(have_percpu_arena);

View File

@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
if (tcache_available(tsd)) { if (tcache_available(tsd)) {
tcache_t *tcache = tcache_get(tsd); tcache_t *tcache = tcache_get(tsd);
if (tcache->arena != NULL) { if (tcache->arena != NULL) {
/* See comments in tcache_data_init().*/ /* See comments in tsd_tcache_data_init().*/
assert(tcache->arena == assert(tcache->arena ==
arena_get(tsd_tsdn(tsd), 0, false)); arena_get(tsd_tsdn(tsd), 0, false));
if (tcache->arena != ret) { if (tcache->arena != ret) {

View File

@ -33,13 +33,7 @@ extern bool prof_active;
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
extern bool prof_gdump_val; extern bool prof_gdump_val;
/* /* Profile dump interval, measured in bytes allocated. */
* Profile dump interval, measured in bytes allocated. Each arena triggers a
* profile dump when it reaches this threshold. The effect is that the
* interval between profile dumps averages prof_interval, though the actual
* interval between dumps will tend to be sporadic, and the interval will be a
* maximum of approximately (prof_interval * narenas).
*/
extern uint64_t prof_interval; extern uint64_t prof_interval;
/* /*
@ -50,6 +44,10 @@ extern size_t lg_prof_sample;
extern bool prof_booted; extern bool prof_booted;
/* Functions only accessed in prof_inlines_a.h */
bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
prof_tctx_t *tctx); prof_tctx_t *tctx);
@ -73,7 +71,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
#endif #endif
int prof_getpid(void); int prof_getpid(void);
void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind); void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); bool prof_accum_init(tsdn_t *tsdn);
void prof_idump(tsdn_t *tsdn); void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename); bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn); void prof_gdump(tsdn_t *tsdn);

View File

@ -3,74 +3,6 @@
#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex.h"
static inline bool
prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
uint64_t accumbytes) {
cassert(config_prof);
bool overflow;
uint64_t a0, a1;
/*
* If the application allocates fast enough (and/or if idump is slow
* enough), extreme overflow here (a1 >= prof_interval * 2) can cause
* idump trigger coalescing. This is an intentional mechanism that
* avoids rate-limiting allocation.
*/
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
do {
a1 = a0 + accumbytes;
assert(a1 >= a0);
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
a1 = a0 + accumbytes;
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
return overflow;
}
static inline void
prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
size_t usize) {
cassert(config_prof);
/*
* Cancel out as much of the excessive prof_accumbytes increase as
* possible without underflowing. Interval-triggered dumps occur
* slightly more often than intended as a result of incomplete
* canceling.
*/
uint64_t a0, a1;
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
do {
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
}
JEMALLOC_ALWAYS_INLINE void JEMALLOC_ALWAYS_INLINE void
prof_active_assert() { prof_active_assert() {
cassert(config_prof); cassert(config_prof);
@ -93,4 +25,26 @@ prof_active_get_unlocked(void) {
return prof_active; return prof_active;
} }
JEMALLOC_ALWAYS_INLINE bool
prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
cassert(config_prof);
if (prof_interval == 0 || !prof_active_get_unlocked()) {
return false;
}
return prof_idump_accum_impl(tsdn, accumbytes);
}
JEMALLOC_ALWAYS_INLINE void
prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
cassert(config_prof);
if (prof_interval == 0 || !prof_active_get_unlocked()) {
return;
}
prof_idump_rollback_impl(tsdn, usize);
}
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */

View File

@ -93,9 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
if (config_stats) { if (config_stats) {
bin->tstats.nrequests++; bin->tstats.nrequests++;
} }
if (config_prof) {
tcache->prof_accumbytes += usize;
}
tcache_event(tsd, tcache); tcache_event(tsd, tcache);
return ret; return ret;
} }
@ -151,9 +148,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
if (config_stats) { if (config_stats) {
bin->tstats.nrequests++; bin->tstats.nrequests++;
} }
if (config_prof) {
tcache->prof_accumbytes += usize;
}
} }
tcache_event(tsd, tcache); tcache_event(tsd, tcache);

View File

@ -16,10 +16,9 @@ struct tcache_s {
* together at the start of this struct. * together at the start of this struct.
*/ */
/* Cleared after arena_prof_accum(). */
uint64_t prof_accumbytes;
/* Drives incremental GC. */ /* Drives incremental GC. */
ticker_t gc_ticker; ticker_t gc_ticker;
/* /*
* The pointer stacks associated with bins follow as a contiguous array. * The pointer stacks associated with bins follow as a contiguous array.
* During tcache initialization, the avail pointer in each element of * During tcache initialization, the avail pointer in each element of

View File

@ -47,8 +47,8 @@ typedef struct tcaches_s tcaches_t;
#define TCACHE_GC_INCR \ #define TCACHE_GC_INCR \
((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1)) ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
/* Used in TSD static initializer only. Real init in tcache_data_init(). */ /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
#define TCACHE_ZERO_INITIALIZER {0} #define TCACHE_ZERO_INITIALIZER {{0}}
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
#define TCACHE_ENABLED_ZERO_INITIALIZER false #define TCACHE_ENABLED_ZERO_INITIALIZER false

View File

@ -44,7 +44,8 @@ void thread_event_boot();
C(thread_allocated_next_event_fast) \ C(thread_allocated_next_event_fast) \
C(thread_allocated_last_event) \ C(thread_allocated_last_event) \
C(thread_allocated_next_event) \ C(thread_allocated_next_event) \
ITERATE_OVER_ALL_EVENTS ITERATE_OVER_ALL_EVENTS \
C(prof_sample_last_event)
/* Getters directly wrap TSD getters. */ /* Getters directly wrap TSD getters. */
#define C(counter) \ #define C(counter) \

View File

@ -30,6 +30,7 @@
* l: thread_allocated_last_event * l: thread_allocated_last_event
* j: thread_allocated_next_event * j: thread_allocated_next_event
* w: prof_sample_event_wait (config_prof) * w: prof_sample_event_wait (config_prof)
* x: prof_sample_last_event (config_prof)
* p: prof_tdata (config_prof) * p: prof_tdata (config_prof)
* v: offset_state * v: offset_state
* i: iarena * i: iarena
@ -45,11 +46,11 @@
* |---------------------------- 2nd cacheline ----------------------------| * |---------------------------- 2nd cacheline ----------------------------|
* | [c * 64 ........ ........ ........ ........ ........ ........ .......] | * | [c * 64 ........ ........ ........ ........ ........ ........ .......] |
* |---------------------------- 3nd cacheline ----------------------------| * |---------------------------- 3nd cacheline ----------------------------|
* | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp | * | [c * 32 ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
* +---------------------------- 4th cacheline ----------------------------+ * +---------------------------- 4th cacheline ----------------------------+
* | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ | * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
* +---------------------------- 5th cacheline ----------------------------+ * +---------------------------- 5th cacheline ----------------------------+
* | ..b][t.. ........ ........ ........ ........ ........ ........ ........ | * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
* +-------------------------------------------------------------------------+ * +-------------------------------------------------------------------------+
* Note: the entire tcache is embedded into TSD and spans multiple cachelines. * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
* *
@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
O(thread_allocated_last_event, uint64_t, uint64_t) \ O(thread_allocated_last_event, uint64_t, uint64_t) \
O(thread_allocated_next_event, uint64_t, uint64_t) \ O(thread_allocated_next_event, uint64_t, uint64_t) \
O(prof_sample_event_wait, uint64_t, uint64_t) \ O(prof_sample_event_wait, uint64_t, uint64_t) \
O(prof_sample_last_event, uint64_t, uint64_t) \
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
O(offset_state, uint64_t, uint64_t) \ O(offset_state, uint64_t, uint64_t) \
O(iarena, arena_t *, arena_t *) \ O(iarena, arena_t *, arena_t *) \
@ -109,9 +111,10 @@ typedef void (*test_callback_t)(int *);
/* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT, \ /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT, \
/* thread_deallocated */ 0, \ /* thread_deallocated */ 0, \
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \ /* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, \
/* thread_allocated_last_event */ 0, \ /* thread_allocated_last_event */ 0, \
/* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \ /* thread_allocated_next_event */ THREAD_EVENT_MIN_START_WAIT, \
/* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \ /* prof_sample_event_wait */ THREAD_EVENT_MIN_START_WAIT, \
/* prof_sample_last_event */ 0, \
/* prof_tdata */ NULL, \ /* prof_tdata */ NULL, \
/* offset_state */ 0, \ /* offset_state */ 0, \
/* iarena */ NULL, \ /* iarena */ NULL, \

View File

@ -1378,13 +1378,10 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
void void
arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { cache_bin_t *tbin, szind_t binind) {
unsigned i, nfill, cnt; unsigned i, nfill, cnt;
assert(cache_bin_ncached_get(tbin, binind) == 0); assert(cache_bin_ncached_get(tbin, binind) == 0);
if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
prof_idump(tsdn);
}
tcache->bin_refilled[binind] = true; tcache->bin_refilled[binind] = true;
unsigned binshard; unsigned binshard;
@ -1484,10 +1481,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
bin->stats.nrequests++; bin->stats.nrequests++;
bin->stats.curregs++; bin->stats.curregs++;
} }
malloc_mutex_unlock(tsdn, &bin->lock); malloc_mutex_unlock(tsdn, &bin->lock);
if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
prof_idump(tsdn);
}
if (!zero) { if (!zero) {
if (config_fill) { if (config_fill) {
@ -1565,14 +1560,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)ptr, true); (uintptr_t)ptr, true);
arena_t *arena = arena_get_from_extent(extent);
szind_t szind = sz_size2index(usize); szind_t szind = sz_size2index(usize);
extent_szind_set(extent, szind); extent_szind_set(extent, szind);
rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
szind, false); szind, false);
prof_accum_cancel(tsdn, &arena->prof_accum, usize); prof_idump_rollback(tsdn, usize);
assert(isalloc(tsdn, ptr) == usize); assert(isalloc(tsdn, ptr) == usize);
} }
@ -1982,7 +1976,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
} }
if (config_prof) { if (config_prof) {
if (prof_accum_init(tsdn, &arena->prof_accum)) { if (prof_accum_init(tsdn)) {
goto label_error; goto label_error;
} }
} }

View File

@ -2386,9 +2386,6 @@ je_malloc(size_t size) {
if (config_stats) { if (config_stats) {
bin->tstats.nrequests++; bin->tstats.nrequests++;
} }
if (config_prof) {
tcache->prof_accumbytes += usize;
}
LOG("core.malloc.exit", "result: %p", ret); LOG("core.malloc.exit", "result: %p", ret);

View File

@ -56,9 +56,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
extent_list_append(&arena->large, extent); extent_list_append(&arena->large, extent);
malloc_mutex_unlock(tsdn, &arena->large_mtx); malloc_mutex_unlock(tsdn, &arena->large_mtx);
} }
if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
prof_idump(tsdn);
}
if (zero) { if (zero) {
assert(is_zeroed); assert(is_zeroed);

View File

@ -45,6 +45,9 @@ bool opt_prof_leak = false;
bool opt_prof_accum = false; bool opt_prof_accum = false;
char opt_prof_prefix[PROF_DUMP_FILENAME_LEN]; char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
/* Accessed via prof_idump_[accum/rollback](). */
static prof_accum_t prof_idump_accumulated;
/* /*
* Initialized as opt_prof_active, and accessed via * Initialized as opt_prof_active, and accessed via
* prof_active_[gs]et{_unlocked,}(). * prof_active_[gs]et{_unlocked,}().
@ -586,21 +589,91 @@ prof_fdump(void) {
} }
bool bool
prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { prof_accum_init(tsdn_t *tsdn) {
cassert(config_prof); cassert(config_prof);
#ifndef JEMALLOC_ATOMIC_U64 #ifndef JEMALLOC_ATOMIC_U64
if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum",
WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) { WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
return true; return true;
} }
prof_accum->accumbytes = 0; prof_idump_accumulated.accumbytes = 0;
#else #else
atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED); atomic_store_u64(&prof_idump_accumulated.accumbytes, 0,
ATOMIC_RELAXED);
#endif #endif
return false; return false;
} }
bool
prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
cassert(config_prof);
bool overflow;
uint64_t a0, a1;
/*
* If the application allocates fast enough (and/or if idump is slow
* enough), extreme overflow here (a1 >= prof_interval * 2) can cause
* idump trigger coalescing. This is an intentional mechanism that
* avoids rate-limiting allocation.
*/
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
ATOMIC_RELAXED);
do {
a1 = a0 + accumbytes;
assert(a1 >= a0);
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
} while (!atomic_compare_exchange_weak_u64(
&prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
a0 = prof_idump_accumulated.accumbytes;
a1 = a0 + accumbytes;
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
prof_idump_accumulated.accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
#endif
return overflow;
}
void
prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
cassert(config_prof);
/*
* Cancel out as much of the excessive accumbytes increase as possible
* without underflowing. Interval-triggered dumps occur slightly more
* often than intended as a result of incomplete canceling.
*/
uint64_t a0, a1;
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
ATOMIC_RELAXED);
do {
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
} while (!atomic_compare_exchange_weak_u64(
&prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
a0 = prof_idump_accumulated.accumbytes;
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
prof_idump_accumulated.accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
#endif
}
bool bool
prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) { prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
cassert(config_prof); cassert(config_prof);
@ -641,7 +714,7 @@ prof_idump(tsdn_t *tsdn) {
return; return;
} }
tdata = prof_tdata_get(tsd, false); tdata = prof_tdata_get(tsd, true);
if (tdata == NULL) { if (tdata == NULL) {
return; return;
} }

View File

@ -106,11 +106,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
void *ret; void *ret;
assert(tcache->arena != NULL); assert(tcache->arena != NULL);
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind, arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
config_prof ? tcache->prof_accumbytes : 0);
if (config_prof) {
tcache->prof_accumbytes = 0;
}
ret = cache_bin_alloc_easy(tbin, tcache_success, binind); ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
return ret; return ret;
@ -181,14 +177,6 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
assert(binshard < bin_infos[binind].n_shards); assert(binshard < bin_infos[binind].n_shards);
bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard]; bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
if (config_prof && bin_arena == arena) {
if (arena_prof_accum(tsd_tsdn(tsd), arena,
tcache->prof_accumbytes)) {
prof_idump(tsd_tsdn(tsd));
}
tcache->prof_accumbytes = 0;
}
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
if (config_stats && bin_arena == arena && !merged_stats) { if (config_stats && bin_arena == arena && !merged_stats) {
merged_stats = true; merged_stats = true;
@ -274,11 +262,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
unsigned locked_arena_ind = extent_arena_ind_get(extent); unsigned locked_arena_ind = extent_arena_ind_get(extent);
arena_t *locked_arena = arena_get(tsd_tsdn(tsd), arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
locked_arena_ind, false); locked_arena_ind, false);
bool idump;
if (config_prof) {
idump = false;
}
bool lock_large = !arena_is_auto(locked_arena); bool lock_large = !arena_is_auto(locked_arena);
if (lock_large) { if (lock_large) {
@ -295,11 +278,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
} }
if ((config_prof || config_stats) && if ((config_prof || config_stats) &&
(locked_arena == tcache_arena)) { (locked_arena == tcache_arena)) {
if (config_prof) {
idump = arena_prof_accum(tsd_tsdn(tsd),
tcache_arena, tcache->prof_accumbytes);
tcache->prof_accumbytes = 0;
}
if (config_stats) { if (config_stats) {
merged_stats = true; merged_stats = true;
arena_stats_large_flush_nrequests_add( arena_stats_large_flush_nrequests_add(
@ -332,9 +310,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
ndeferred++; ndeferred++;
} }
} }
if (config_prof && idump) {
prof_idump(tsd_tsdn(tsd));
}
arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush - arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
ndeferred); ndeferred);
nflush = ndeferred; nflush = ndeferred;
@ -462,7 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
assert(!tcache_bin_lowbits_overflowable(avail_stack)); assert(!tcache_bin_lowbits_overflowable(avail_stack));
memset(&tcache->link, 0, sizeof(ql_elm(tcache_t))); memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
tcache->prof_accumbytes = 0;
tcache->next_gc_bin = 0; tcache->next_gc_bin = 0;
tcache->arena = NULL; tcache->arena = NULL;
@ -590,14 +564,6 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
assert(tbin->tstats.nrequests == 0); assert(tbin->tstats.nrequests == 0);
} }
} }
if (config_prof && tcache->prof_accumbytes > 0) {
if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
tcache->prof_accumbytes)) {
prof_idump(tsd_tsdn(tsd));
}
tcache->prof_accumbytes = 0;
}
} }
void void

View File

@ -18,6 +18,29 @@ static void thread_##event##_event_handler(tsd_t *tsd);
ITERATE_OVER_ALL_EVENTS ITERATE_OVER_ALL_EVENTS
#undef E #undef E
static void
thread_prof_sample_event_handler(tsd_t *tsd) {
assert(config_prof && opt_prof);
assert(prof_sample_event_wait_get(tsd) == 0U);
uint64_t last_event = thread_allocated_last_event_get(tsd);
uint64_t last_sample_event = prof_sample_last_event_get(tsd);
prof_sample_last_event_set(tsd, last_event);
if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
prof_idump(tsd_tsdn(tsd));
}
if (!prof_active_get_unlocked()) {
/*
* If prof_active is off, we reset prof_sample_event_wait to be
* the sample interval when it drops to 0, so that there won't
* be excessive routings to the slow path, and that when
* prof_active is turned on later, the counting for sampling
* can immediately resume as normal.
*/
thread_prof_sample_event_update(tsd,
(uint64_t)(1 << lg_prof_sample));
}
}
static uint64_t static uint64_t
thread_allocated_next_event_compute(tsd_t *tsd) { thread_allocated_next_event_compute(tsd_t *tsd) {
uint64_t wait = THREAD_EVENT_MAX_START_WAIT; uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
@ -86,23 +109,6 @@ thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
thread_allocated_next_event_fast_set(tsd, next_event_fast); thread_allocated_next_event_fast_set(tsd, next_event_fast);
} }
static void
thread_prof_sample_event_handler(tsd_t *tsd) {
assert(config_prof && opt_prof);
assert(prof_sample_event_wait_get(tsd) == 0U);
if (!prof_active_get_unlocked()) {
/*
* If prof_active is off, we reset prof_sample_event_wait to be
* the sample interval when it drops to 0, so that there won't
* be excessive routings to the slow path, and that when
* prof_active is turned on later, the counting for sampling
* can immediately resume as normal.
*/
thread_prof_sample_event_update(tsd,
(uint64_t)(1 << lg_prof_sample));
}
}
static uint64_t static uint64_t
thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes, thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
bool allow_event_trigger) { bool allow_event_trigger) {