Make the mutex n_waiting_thds field a C11-style atomic

2017-04-04 18:34:01 -07:00 · 2017-04-04 18:34:01 -07:00 · 5dcc13b342
commit 5dcc13b342
parent 492a941f49
4 changed files with 19 additions and 7 deletions
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@ -40,7 +40,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	if (sum->max_n_thds < data->max_n_thds) {
 		sum->max_n_thds = data->max_n_thds;
 	}
-	sum->n_waiting_thds += data->n_waiting_thds;
+	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
+	    ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
+	    &data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
+	    ATOMIC_RELAXED);
 	sum->n_owner_switches += data->n_owner_switches;
 	sum->n_lock_ops += data->n_lock_ops;
 }
@ -91,9 +96,14 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);

+	/*
+	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
+	 * atomic data), but the mutex protection makes this safe, and writing
+	 * a member-for-member copy is tedious for this situation.
+	 */
 	*data = *source;
 	/* n_wait_thds is not reported (modified w/o locking). */
-	data->n_waiting_thds = 0;
+	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
 }

 #endif
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@ -17,7 +17,7 @@ struct mutex_prof_data_s {
 	/* Max # of threads waiting for the mutex at the same time. */
 	uint32_t		max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
-	uint32_t		n_waiting_thds;
+	atomic_u32_t		n_waiting_thds;

 	/*
 	 * Data touched on the fast path.  These are modified right after we
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@ -35,7 +35,8 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #endif

 #define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
+    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
+	    ATOMIC_INIT(0), 0, NULL, 0}

 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
--- a/src/mutex.c
+++ b/src/mutex.c
@ -93,10 +93,11 @@ label_spin_done:
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
-	uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
+	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
+	    ATOMIC_RELAXED) + 1;
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock(mutex)) {
-		atomic_sub_u32(&data->n_waiting_thds, 1);
+		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
 		data->n_spin_acquired++;
 		return;
 	}
@ -104,7 +105,7 @@ label_spin_done:
 	/* True slow path. */
 	malloc_mutex_lock_final(mutex);
 	/* Update more slow-path only counters. */
-	atomic_sub_u32(&data->n_waiting_thds, 1);
+	atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
 	nstime_update(&after);

 	nstime_t delta;