From 6f41ba55ee85ce505d61713650f49f8bbb5bee6b Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Wed, 4 Aug 2021 12:53:39 -0700 Subject: [PATCH] Mutex: Make spin count configurable. Don't document it since we don't want to support this as a "real" setting, but it's handy for testing. --- include/jemalloc/internal/mutex.h | 10 +++------- src/ctl.c | 3 +++ src/jemalloc.c | 3 +++ src/mutex.c | 10 ++++++++-- src/stats.c | 1 + 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index f5b1163a..63a0b1b3 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -6,6 +6,8 @@ #include "jemalloc/internal/tsd.h" #include "jemalloc/internal/witness.h" +extern int64_t opt_mutex_max_spin; + typedef enum { /* Can only acquire one mutex of a given witness rank at a time. */ malloc_mutex_rank_exclusive, @@ -43,7 +45,7 @@ struct malloc_mutex_s { #else pthread_mutex_t lock; #endif - /* + /* * Hint flag to avoid exclusive cache line contention * during spin waiting */ @@ -67,12 +69,6 @@ struct malloc_mutex_s { #endif }; -/* - * Based on benchmark results, a fixed spin with this amount of retries works - * well for our critical sections. - */ -#define MALLOC_MUTEX_MAX_SPIN 250 - #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 # define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock) diff --git a/src/ctl.c b/src/ctl.c index b3e62dfa..3ed00072 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -110,6 +110,7 @@ CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_oversize_threshold) CTL_PROTO(opt_background_thread) +CTL_PROTO(opt_mutex_max_spin) CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_background_thread_hpa_interval_max_ms) CTL_PROTO(opt_dirty_decay_ms) @@ -421,6 +422,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("oversize_threshold"), CTL(opt_oversize_threshold)}, + {NAME("mutex_max_spin"), CTL(opt_mutex_max_spin)}, {NAME("background_thread"), CTL(opt_background_thread)}, {NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("background_thread_hpa_interval_max_ms"), @@ -2138,6 +2140,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *) +CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t) CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index 8d57180e..d5e886e7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1256,6 +1256,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], } while (vlen_left > 0); CONF_CONTINUE; } + CONF_HANDLE_INT64_T(opt_mutex_max_spin, + "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN, + CONF_DONT_CHECK_MAX, false); CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms, "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) : diff --git a/src/mutex.c b/src/mutex.c index 83d9ce76..79b8f275 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -9,6 +9,12 @@ #define _CRT_SPINCOUNT 4000 #endif +/* + * Based on benchmark results, a fixed spin with this amount of retries works + * well for our critical sections. + */ +int64_t opt_mutex_max_spin = 250; + /******************************************************************************/ /* Data. */ @@ -51,7 +57,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) { goto label_spin_done; } - int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN; + int cnt = 0; do { spin_cpu_spinwait(); if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED) @@ -59,7 +65,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) { data->n_spin_acquired++; return; } - } while (cnt++ < max_cnt); + } while (cnt++ < opt_mutex_max_spin || opt_mutex_max_spin == -1); if (!config_stats) { /* Only spin is useful when stats is off. */ diff --git a/src/stats.c b/src/stats.c index 16aa3fd4..3a2806ed 100644 --- a/src/stats.c +++ b/src/stats.c @@ -1496,6 +1496,7 @@ stats_general_print(emitter_t *emitter) { OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush") OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra") OPT_WRITE_CHAR_P("metadata_thp") + OPT_WRITE_INT64("mutex_max_spin") OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms") OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")