mutex: fix trylock spin wait contention
If there are 3 or more threads spin-waiting on the same mutex, there will be excessive exclusive cacheline contention because pthread_trylock() immediately tries to CAS in a new value, instead of first checking if the lock is locked. This diff adds a 'locked' hint flag, and we will only spin wait without trylock()ing while set. I don't know of any other portable way to get the same behavior as pthread_mutex_lock(). This is pretty easy to test via ttest, e.g. ./ttest1 500 3 10000 1 100 Throughput is nearly 3x as fast. This blames to the mutex profiling changes, however, we almost never have 3 or more threads contending in properly configured production workloads, but still worth fixing.
This commit is contained in:
parent
c4063ce439
commit
b23336af96
@ -43,6 +43,11 @@ struct malloc_mutex_s {
|
|||||||
#else
|
#else
|
||||||
pthread_mutex_t lock;
|
pthread_mutex_t lock;
|
||||||
#endif
|
#endif
|
||||||
|
/*
|
||||||
|
* Hint flag to avoid exclusive cache line contention
|
||||||
|
* during spin waiting
|
||||||
|
*/
|
||||||
|
atomic_b_t locked;
|
||||||
};
|
};
|
||||||
/*
|
/*
|
||||||
* We only touch witness when configured w/ debug. However we
|
* We only touch witness when configured w/ debug. However we
|
||||||
@ -97,21 +102,21 @@ struct malloc_mutex_s {
|
|||||||
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
||||||
# if defined(JEMALLOC_DEBUG)
|
# if defined(JEMALLOC_DEBUG)
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
||||||
# else
|
# else
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||||
# endif
|
# endif
|
||||||
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
|
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
|
||||||
# if (defined(JEMALLOC_DEBUG))
|
# if (defined(JEMALLOC_DEBUG))
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
||||||
# else
|
# else
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
@ -119,11 +124,11 @@ struct malloc_mutex_s {
|
|||||||
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
|
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
|
||||||
# if defined(JEMALLOC_DEBUG)
|
# if defined(JEMALLOC_DEBUG)
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
|
||||||
# else
|
# else
|
||||||
# define MALLOC_MUTEX_INITIALIZER \
|
# define MALLOC_MUTEX_INITIALIZER \
|
||||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
|
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
|
||||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
@ -148,6 +153,7 @@ void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
|
|||||||
static inline void
|
static inline void
|
||||||
malloc_mutex_lock_final(malloc_mutex_t *mutex) {
|
malloc_mutex_lock_final(malloc_mutex_t *mutex) {
|
||||||
MALLOC_MUTEX_LOCK(mutex);
|
MALLOC_MUTEX_LOCK(mutex);
|
||||||
|
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
@ -173,6 +179,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
|||||||
witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
|
witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
|
||||||
if (isthreaded) {
|
if (isthreaded) {
|
||||||
if (malloc_mutex_trylock_final(mutex)) {
|
if (malloc_mutex_trylock_final(mutex)) {
|
||||||
|
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
mutex_owner_stats_update(tsdn, mutex);
|
mutex_owner_stats_update(tsdn, mutex);
|
||||||
@ -212,6 +219,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
|||||||
if (isthreaded) {
|
if (isthreaded) {
|
||||||
if (malloc_mutex_trylock_final(mutex)) {
|
if (malloc_mutex_trylock_final(mutex)) {
|
||||||
malloc_mutex_lock_slow(mutex);
|
malloc_mutex_lock_slow(mutex);
|
||||||
|
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
|
||||||
}
|
}
|
||||||
mutex_owner_stats_update(tsdn, mutex);
|
mutex_owner_stats_update(tsdn, mutex);
|
||||||
}
|
}
|
||||||
@ -220,6 +228,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
|||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
||||||
|
atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
|
||||||
witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
|
witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
|
||||||
if (isthreaded) {
|
if (isthreaded) {
|
||||||
MALLOC_MUTEX_UNLOCK(mutex);
|
MALLOC_MUTEX_UNLOCK(mutex);
|
||||||
|
@ -55,7 +55,8 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
|
|||||||
int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
|
int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
|
||||||
do {
|
do {
|
||||||
spin_cpu_spinwait();
|
spin_cpu_spinwait();
|
||||||
if (!malloc_mutex_trylock_final(mutex)) {
|
if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
|
||||||
|
&& !malloc_mutex_trylock_final(mutex)) {
|
||||||
data->n_spin_acquired++;
|
data->n_spin_acquired++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user