First stage of mutex profiling.
Switched to trylock and update counters based on state.
This commit is contained in:
parent
32e7cf51cd
commit
6309df628f
@ -1,31 +1,42 @@
|
||||
#ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
|
||||
#define JEMALLOC_INTERNAL_MUTEX_INLINES_H
|
||||
|
||||
void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
|
||||
|
||||
#ifndef JEMALLOC_ENABLE_INLINE
|
||||
void malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
bool malloc_mutex_trylock(malloc_mutex_t *mutex);
|
||||
void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
|
||||
JEMALLOC_INLINE void
|
||||
malloc_mutex_lock_final(malloc_mutex_t *mutex) {
|
||||
MALLOC_MUTEX_LOCK(mutex);
|
||||
}
|
||||
|
||||
/* Trylock: return false if the lock is successfully acquired. */
|
||||
JEMALLOC_INLINE bool
|
||||
malloc_mutex_trylock(malloc_mutex_t *mutex) {
|
||||
return MALLOC_MUTEX_TRYLOCK(mutex);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
||||
witness_assert_not_owner(tsdn, &mutex->witness);
|
||||
if (isthreaded) {
|
||||
#ifdef _WIN32
|
||||
# if _WIN32_WINNT >= 0x0600
|
||||
AcquireSRWLockExclusive(&mutex->lock);
|
||||
# else
|
||||
EnterCriticalSection(&mutex->lock);
|
||||
# endif
|
||||
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
||||
os_unfair_lock_lock(&mutex->lock);
|
||||
#elif (defined(JEMALLOC_OSSPIN))
|
||||
OSSpinLockLock(&mutex->lock);
|
||||
#else
|
||||
pthread_mutex_lock(&mutex->lock);
|
||||
#endif
|
||||
if (malloc_mutex_trylock(mutex)) {
|
||||
malloc_mutex_lock_slow(mutex);
|
||||
}
|
||||
/* We own the lock now. Update a few counters. */
|
||||
lock_prof_data_t *data = &mutex->prof_data;
|
||||
data->n_lock_ops++;
|
||||
if (data->prev_owner != tsdn) {
|
||||
data->prev_owner = tsdn;
|
||||
data->n_owner_switches++;
|
||||
}
|
||||
}
|
||||
witness_lock(tsdn, &mutex->witness);
|
||||
}
|
||||
@ -34,19 +45,7 @@ JEMALLOC_INLINE void
|
||||
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
|
||||
witness_unlock(tsdn, &mutex->witness);
|
||||
if (isthreaded) {
|
||||
#ifdef _WIN32
|
||||
# if _WIN32_WINNT >= 0x0600
|
||||
ReleaseSRWLockExclusive(&mutex->lock);
|
||||
# else
|
||||
LeaveCriticalSection(&mutex->lock);
|
||||
# endif
|
||||
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
||||
os_unfair_lock_unlock(&mutex->lock);
|
||||
#elif (defined(JEMALLOC_OSSPIN))
|
||||
OSSpinLockUnlock(&mutex->lock);
|
||||
#else
|
||||
pthread_mutex_unlock(&mutex->lock);
|
||||
#endif
|
||||
MALLOC_MUTEX_UNLOCK(mutex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,50 @@
|
||||
#ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
|
||||
#define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
|
||||
|
||||
struct lock_prof_data_s {
|
||||
/*
|
||||
* Counters touched on the slow path, i.e. when there is lock
|
||||
* contention. We update them once we have the lock.
|
||||
*/
|
||||
/* Total time spent waiting on this lock. */
|
||||
nstime_t tot_wait_time;
|
||||
/* Max time spent on a single lock operation. */
|
||||
nstime_t max_wait_time;
|
||||
/* # of times have to wait for this lock (after spinning). */
|
||||
uint64_t n_wait_times;
|
||||
/* # of times acquired the lock through local spinning. */
|
||||
uint64_t n_spin_acquired;
|
||||
/* Max # of threads waiting for the lock at the same time. */
|
||||
uint32_t max_n_thds;
|
||||
/* Current # of threads waiting on the lock. Atomic synced. */
|
||||
uint32_t n_waiting_thds;
|
||||
|
||||
/*
|
||||
* Data touched on the fast path. These are modified right after we
|
||||
* grab the lock, so it's placed closest to the end (i.e. right before
|
||||
* the lock) so that we have a higher chance of them being on the same
|
||||
* cacheline.
|
||||
*/
|
||||
/* # of times the new lock holder is different from the previous one. */
|
||||
uint64_t n_owner_switches;
|
||||
/* Previous lock holder, to facilitate n_owner_switches. */
|
||||
tsdn_t *prev_owner;
|
||||
/* # of lock() operations in total. */
|
||||
uint64_t n_lock_ops;
|
||||
};
|
||||
|
||||
struct malloc_mutex_s {
|
||||
union {
|
||||
struct {
|
||||
/*
|
||||
* prof_data is defined first to reduce cacheline
|
||||
* bouncing: the data is not touched by the lock holder
|
||||
* during unlocking, while might be modified by
|
||||
* contenders. Having it before the lock itself could
|
||||
* avoid prefetching a modified cacheline (for the
|
||||
* unlocking thread).
|
||||
*/
|
||||
lock_prof_data_t prof_data;
|
||||
#ifdef _WIN32
|
||||
# if _WIN32_WINNT >= 0x0600
|
||||
SRWLOCK lock;
|
||||
@ -22,7 +63,7 @@ struct malloc_mutex_s {
|
||||
#endif
|
||||
};
|
||||
/*
|
||||
* We only touch witness when configured w/ debug. However we
|
||||
* We only touch witness when configured w/ debug. However we
|
||||
* keep the field in a union when !debug so that we don't have
|
||||
* to pollute the code base with #ifdefs, while avoid paying the
|
||||
* memory cost.
|
||||
|
@ -1,31 +1,63 @@
|
||||
#ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
|
||||
#define JEMALLOC_INTERNAL_MUTEX_TYPES_H
|
||||
|
||||
typedef struct lock_prof_data_s lock_prof_data_t;
|
||||
typedef struct malloc_mutex_s malloc_mutex_t;
|
||||
|
||||
#ifdef _WIN32
|
||||
# if _WIN32_WINNT >= 0x0600
|
||||
# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
|
||||
# define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock)
|
||||
# define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
|
||||
# else
|
||||
# define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
|
||||
# define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
|
||||
# define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
|
||||
# endif
|
||||
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
||||
# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
|
||||
#elif (defined(JEMALLOC_OSSPIN))
|
||||
# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
|
||||
#else
|
||||
# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
|
||||
# define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
|
||||
#endif
|
||||
|
||||
#define LOCK_PROF_DATA_INITIALIZER \
|
||||
{NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
|
||||
|
||||
#ifdef _WIN32
|
||||
# define MALLOC_MUTEX_INITIALIZER
|
||||
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
|
||||
# define MALLOC_MUTEX_INITIALIZER \
|
||||
{{{OS_UNFAIR_LOCK_INIT}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
|
||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
#elif (defined(JEMALLOC_OSSPIN))
|
||||
# define MALLOC_MUTEX_INITIALIZER \
|
||||
{{{0}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
{{{LOCK_PROF_DATA_INITIALIZER, 0}}, \
|
||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
|
||||
# define MALLOC_MUTEX_INITIALIZER \
|
||||
{{{PTHREAD_MUTEX_INITIALIZER, NULL}}, \
|
||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
|
||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
#else
|
||||
/* TODO: get rid of adaptive mutex once we do our own spin. */
|
||||
# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \
|
||||
defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
|
||||
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
|
||||
# define MALLOC_MUTEX_INITIALIZER \
|
||||
{{{PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}}, \
|
||||
{{{LOCK_PROF_DATA_INITIALIZER, \
|
||||
PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}}, \
|
||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
# else
|
||||
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
|
||||
# define MALLOC_MUTEX_INITIALIZER \
|
||||
{{{PTHREAD_MUTEX_INITIALIZER}}, \
|
||||
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
|
||||
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
|
||||
# endif
|
||||
#endif
|
||||
|
@ -6,4 +6,6 @@ typedef struct nstime_s nstime_t;
|
||||
/* Maximum supported number of seconds (~584 years). */
|
||||
#define NSTIME_SEC_MAX KQU(18446744072)
|
||||
|
||||
#define NSTIME_ZERO_INITIALIZER {0}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */
|
||||
|
43
src/mutex.c
43
src/mutex.c
@ -65,6 +65,49 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
|
||||
void *(calloc_cb)(size_t, size_t));
|
||||
#endif
|
||||
|
||||
void
|
||||
malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
|
||||
lock_prof_data_t *data = &mutex->prof_data;
|
||||
bool spin_success = false;
|
||||
|
||||
{//TODO: a smart spin policy
|
||||
if (!malloc_mutex_trylock(mutex)) {
|
||||
spin_success = true;
|
||||
goto label_locked;
|
||||
}
|
||||
}
|
||||
|
||||
nstime_t now, before;
|
||||
uint32_t n_thds;
|
||||
nstime_init(&now, 0);
|
||||
nstime_update(&now);
|
||||
n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
|
||||
/* One last try as above two calls may take quite some cycles. */
|
||||
if (!malloc_mutex_trylock(mutex)) {
|
||||
spin_success = true;
|
||||
atomic_sub_u32(&data->n_waiting_thds, 1);
|
||||
goto label_locked;
|
||||
}
|
||||
nstime_copy(&before, &now);
|
||||
malloc_mutex_lock_final(mutex);
|
||||
atomic_sub_u32(&data->n_waiting_thds, 1);
|
||||
nstime_update(&now);
|
||||
nstime_subtract(&now, &before);
|
||||
label_locked:
|
||||
if (spin_success) {
|
||||
data->n_spin_acquired++;
|
||||
} else {
|
||||
data->n_wait_times++;
|
||||
nstime_add(&data->tot_wait_time, &now);
|
||||
if (nstime_compare(&now, &data->max_wait_time)) {
|
||||
nstime_copy(&data->max_wait_time, &now);
|
||||
}
|
||||
if (n_thds > data->max_n_thds) {
|
||||
data->max_n_thds = n_thds;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
|
||||
witness_rank_t rank) {
|
||||
|
Loading…
Reference in New Issue
Block a user