Convert arena->prof_accumbytes synchronization to atomics.

This commit is contained in:
Jason Evans 2017-02-12 17:03:46 -08:00
parent b779522b9b
commit fa2d64c94b
15 changed files with 128 additions and 59 deletions

View File

@ -6,8 +6,6 @@ unsigned arena_ind_get(const arena_t *arena);
void arena_internal_add(arena_t *arena, size_t size);
void arena_internal_sub(arena_t *arena, size_t size);
size_t arena_internal_get(arena_t *arena);
bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
#endif /* JEMALLOC_ENABLE_INLINE */
@ -33,29 +31,6 @@ arena_internal_get(arena_t *arena) {
return atomic_read_zu(&arena->stats.internal);
}
JEMALLOC_INLINE bool
arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
assert(prof_interval != 0);
arena->prof_accumbytes += accumbytes;
if (arena->prof_accumbytes >= prof_interval) {
arena->prof_accumbytes %= prof_interval;
return true;
}
return false;
}
JEMALLOC_INLINE bool
arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
if (likely(prof_interval == 0)) {
return false;
}
return arena_prof_accum_impl(arena, accumbytes);
}
JEMALLOC_INLINE bool
arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
return false;
}
{
bool ret;
malloc_mutex_lock(tsdn, &arena->lock);
ret = arena_prof_accum_impl(arena, accumbytes);
malloc_mutex_unlock(tsdn, &arena->lock);
return ret;
}
return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
}
#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */

View File

@ -138,7 +138,8 @@ struct arena_s {
*/
ql_head(tcache_t) tcache_ql;
/* Synchronization: lock. */
/* Synchronization: internal. */
prof_accum_t prof_accum;
uint64_t prof_accumbytes;
/*

View File

@ -23,7 +23,7 @@
*/
#ifndef JEMALLOC_ENABLE_INLINE
# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
# ifdef JEMALLOC_ATOMIC_U64
uint64_t atomic_add_u64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_u64(uint64_t *p, uint64_t x);
bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
@ -50,7 +50,7 @@ void atomic_write_u(unsigned *p, unsigned x);
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
#ifdef JEMALLOC_ATOMIC_U64
# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {

View File

@ -0,0 +1,8 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
# define JEMALLOC_ATOMIC_U64
#endif
#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */

View File

@ -380,6 +380,7 @@ typedef unsigned szind_t;
#include "jemalloc/internal/nstime_types.h"
#include "jemalloc/internal/util_types.h"
#include "jemalloc/internal/atomic_types.h"
#include "jemalloc/internal/spin_types.h"
#include "jemalloc/internal/prng_types.h"
#include "jemalloc/internal/ticker_types.h"
@ -419,10 +420,10 @@ typedef unsigned szind_t;
#include "jemalloc/internal/extent_structs.h"
#include "jemalloc/internal/extent_dss_structs.h"
#include "jemalloc/internal/base_structs.h"
#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/arena_structs_b.h"
#include "jemalloc/internal/rtree_structs.h"
#include "jemalloc/internal/tcache_structs.h"
#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/tsd_structs.h"
@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
* Include portions of arena code interleaved with tcache code in order to
* resolve circular dependencies.
*/
#include "jemalloc/internal/prof_inlines_a.h"
#include "jemalloc/internal/arena_inlines_a.h"
#ifndef JEMALLOC_ENABLE_INLINE
@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
}
#endif
#include "jemalloc/internal/prof_inlines.h"
#include "jemalloc/internal/prof_inlines_b.h"
#ifdef __cplusplus
}

View File

@ -54,8 +54,6 @@ arena_prefork1
arena_prefork2
arena_prefork3
arena_prof_accum
arena_prof_accum_impl
arena_prof_accum_locked
arena_prof_promote
arena_prof_tctx_get
arena_prof_tctx_reset
@ -364,6 +362,9 @@ prng_range_zu
prng_state_next_u32
prng_state_next_u64
prng_state_next_zu
prof_accum_add
prof_accum_cancel
prof_accum_init
prof_active
prof_active_get
prof_active_get_unlocked

View File

@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header;
void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes,
uint64_t *accumobjs, uint64_t *accumbytes);
#endif
bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn);

View File

@ -0,0 +1,76 @@
#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
#ifndef JEMALLOC_ENABLE_INLINE
bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
uint64_t accumbytes);
void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
JEMALLOC_INLINE bool
prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
cassert(config_prof);
bool overflow;
uint64_t a0, a1;
/*
* If the application allocates fast enough (and/or if idump is slow
* enough), extreme overflow here (a1 >= prof_interval * 2) can cause
* idump trigger coalescing. This is an intentional mechanism that
* avoids rate-limiting allocation.
*/
#ifdef JEMALLOC_ATOMIC_U64
do {
a0 = atomic_read_u64(&prof_accum->accumbytes);
a1 = a0 + accumbytes;
assert(a1 >= a0);
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
a1 = a0 + accumbytes;
overflow = (a1 >= prof_interval);
if (overflow) {
a1 %= prof_interval;
}
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
return overflow;
}
JEMALLOC_INLINE void
prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
cassert(config_prof);
/*
* Cancel out as much of the excessive prof_accumbytes increase as
* possible without underflowing. Interval-triggered dumps occur
* slightly more often than intended as a result of incomplete
* canceling.
*/
uint64_t a0, a1;
#ifdef JEMALLOC_ATOMIC_U64
do {
a0 = atomic_read_u64(&prof_accum->accumbytes);
a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS -
usize) : 0;
} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) :
0;
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
}
#endif
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */

View File

@ -1,5 +1,5 @@
#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
#define JEMALLOC_INTERNAL_PROF_INLINES_H
#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
#ifndef JEMALLOC_ENABLE_INLINE
bool prof_active_get_unlocked(void);
@ -237,4 +237,4 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
}
#endif
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */

View File

@ -15,6 +15,13 @@ typedef struct {
} prof_unwind_data_t;
#endif
struct prof_accum_s {
#ifndef JEMALLOC_ATOMIC_U64
malloc_mutex_t mtx;
#endif
uint64_t accumbytes;
};
struct prof_cnt_s {
/* Profiling counters. */
uint64_t curobjs;

View File

@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_PROF_TYPES_H
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_accum_s prof_accum_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_gctx_s prof_gctx_t;

View File

@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
#define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF
#define WITNESS_RANK_DSS WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF

View File

@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
extent_usize_set(extent, usize);
/*
* Cancel out as much of the excessive prof_accumbytes increase as
* possible without underflowing. Interval-triggered dumps occur
* slightly more often than intended as a result of incomplete
* canceling.
*/
malloc_mutex_lock(tsdn, &arena->lock);
if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) {
arena->prof_accumbytes -= LARGE_MINCLASS - usize;
} else {
arena->prof_accumbytes = 0;
}
malloc_mutex_unlock(tsdn, &arena->lock);
prof_accum_cancel(tsdn, &arena->prof_accum, usize);
assert(isalloc(tsdn, extent, ptr) == usize);
}
@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
if (config_prof) {
arena->prof_accumbytes = 0;
if (prof_accum_init(tsdn, &arena->prof_accum)) {
goto label_error;
}
}
if (config_cache_oblivious) {

View File

@ -1753,6 +1753,20 @@ prof_fdump(void) {
prof_dump(tsd, false, filename, opt_prof_leak);
}
bool
prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
cassert(config_prof);
#ifndef JEMALLOC_ATOMIC_U64
if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
WITNESS_RANK_PROF_ACCUM)) {
return true;
}
#endif
prof_accum->accumbytes = 0;
return false;
}
void
prof_idump(tsdn_t *tsdn) {
tsd_t *tsd;

View File

@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
}
if ((config_prof || config_stats) && locked_arena == arena) {
if (config_prof) {
idump = arena_prof_accum_locked(arena,
idump = arena_prof_accum(tsd_tsdn(tsd), arena,
tcache->prof_accumbytes);
tcache->prof_accumbytes = 0;
}