diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h index 3a12a722..0552e190 100644 --- a/include/jemalloc/internal/mutex_inlines.h +++ b/include/jemalloc/internal/mutex_inlines.h @@ -40,7 +40,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) { if (sum->max_n_thds < data->max_n_thds) { sum->max_n_thds = data->max_n_thds; } - sum->n_waiting_thds += data->n_waiting_thds; + uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds, + ATOMIC_RELAXED); + uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32( + &data->n_waiting_thds, ATOMIC_RELAXED); + atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds, + ATOMIC_RELAXED); sum->n_owner_switches += data->n_owner_switches; sum->n_lock_ops += data->n_lock_ops; } @@ -91,9 +96,14 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data, /* Can only read holding the mutex. */ malloc_mutex_assert_owner(tsdn, mutex); + /* + * Not *really* allowed (we shouldn't be doing non-atomic loads of + * atomic data), but the mutex protection makes this safe, and writing + * a member-for-member copy is tedious for this situation. + */ *data = *source; /* n_wait_thds is not reported (modified w/o locking). */ - data->n_waiting_thds = 0; + atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); } #endif diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h index 5dddb846..ff090b22 100644 --- a/include/jemalloc/internal/mutex_structs.h +++ b/include/jemalloc/internal/mutex_structs.h @@ -17,7 +17,7 @@ struct mutex_prof_data_s { /* Max # of threads waiting for the mutex at the same time. */ uint32_t max_n_thds; /* Current # of threads waiting on the lock. Atomic synced. */ - uint32_t n_waiting_thds; + atomic_u32_t n_waiting_thds; /* * Data touched on the fast path. These are modified right after we diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h index bd261490..e6589374 100644 --- a/include/jemalloc/internal/mutex_types.h +++ b/include/jemalloc/internal/mutex_types.h @@ -35,7 +35,8 @@ typedef struct malloc_mutex_s malloc_mutex_t; #endif #define LOCK_PROF_DATA_INITIALIZER \ - {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0} + {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, \ + ATOMIC_INIT(0), 0, NULL, 0} #ifdef _WIN32 # define MALLOC_MUTEX_INITIALIZER diff --git a/src/mutex.c b/src/mutex.c index fa2770a3..8c593101 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -93,10 +93,11 @@ label_spin_done: /* Copy before to after to avoid clock skews. */ nstime_t after; nstime_copy(&after, &before); - uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1); + uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1, + ATOMIC_RELAXED) + 1; /* One last try as above two calls may take quite some cycles. */ if (!malloc_mutex_trylock(mutex)) { - atomic_sub_u32(&data->n_waiting_thds, 1); + atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED); data->n_spin_acquired++; return; } @@ -104,7 +105,7 @@ label_spin_done: /* True slow path. */ malloc_mutex_lock_final(mutex); /* Update more slow-path only counters. */ - atomic_sub_u32(&data->n_waiting_thds, 1); + atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED); nstime_update(&after); nstime_t delta;