Mutex: Make spin count configurable.

Don't document it since we don't want to support this as a "real" setting, but
it's handy for testing.
This commit is contained in:
David Goldblatt 2021-08-04 12:53:39 -07:00 committed by David Goldblatt
parent dae24589bc
commit 6f41ba55ee
5 changed files with 18 additions and 9 deletions

View File

@ -6,6 +6,8 @@
#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/witness.h" #include "jemalloc/internal/witness.h"
extern int64_t opt_mutex_max_spin;
typedef enum { typedef enum {
/* Can only acquire one mutex of a given witness rank at a time. */ /* Can only acquire one mutex of a given witness rank at a time. */
malloc_mutex_rank_exclusive, malloc_mutex_rank_exclusive,
@ -43,7 +45,7 @@ struct malloc_mutex_s {
#else #else
pthread_mutex_t lock; pthread_mutex_t lock;
#endif #endif
/* /*
* Hint flag to avoid exclusive cache line contention * Hint flag to avoid exclusive cache line contention
* during spin waiting * during spin waiting
*/ */
@ -67,12 +69,6 @@ struct malloc_mutex_s {
#endif #endif
}; };
/*
* Based on benchmark results, a fixed spin with this amount of retries works
* well for our critical sections.
*/
#define MALLOC_MUTEX_MAX_SPIN 250
#ifdef _WIN32 #ifdef _WIN32
# if _WIN32_WINNT >= 0x0600 # if _WIN32_WINNT >= 0x0600
# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock) # define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)

View File

@ -110,6 +110,7 @@ CTL_PROTO(opt_narenas)
CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_percpu_arena)
CTL_PROTO(opt_oversize_threshold) CTL_PROTO(opt_oversize_threshold)
CTL_PROTO(opt_background_thread) CTL_PROTO(opt_background_thread)
CTL_PROTO(opt_mutex_max_spin)
CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_max_background_threads)
CTL_PROTO(opt_background_thread_hpa_interval_max_ms) CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_dirty_decay_ms)
@ -421,6 +422,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("narenas"), CTL(opt_narenas)}, {NAME("narenas"), CTL(opt_narenas)},
{NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)},
{NAME("oversize_threshold"), CTL(opt_oversize_threshold)}, {NAME("oversize_threshold"), CTL(opt_oversize_threshold)},
{NAME("mutex_max_spin"), CTL(opt_mutex_max_spin)},
{NAME("background_thread"), CTL(opt_background_thread)}, {NAME("background_thread"), CTL(opt_background_thread)},
{NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("max_background_threads"), CTL(opt_max_background_threads)},
{NAME("background_thread_hpa_interval_max_ms"), {NAME("background_thread_hpa_interval_max_ms"),
@ -2138,6 +2140,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
const char *) const char *)
CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t) CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)

View File

@ -1256,6 +1256,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
} while (vlen_left > 0); } while (vlen_left > 0);
CONF_CONTINUE; CONF_CONTINUE;
} }
CONF_HANDLE_INT64_T(opt_mutex_max_spin,
"mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
CONF_DONT_CHECK_MAX, false);
CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms, CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
"dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) < "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) : QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :

View File

@ -9,6 +9,12 @@
#define _CRT_SPINCOUNT 4000 #define _CRT_SPINCOUNT 4000
#endif #endif
/*
* Based on benchmark results, a fixed spin with this amount of retries works
* well for our critical sections.
*/
int64_t opt_mutex_max_spin = 250;
/******************************************************************************/ /******************************************************************************/
/* Data. */ /* Data. */
@ -51,7 +57,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
goto label_spin_done; goto label_spin_done;
} }
int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN; int cnt = 0;
do { do {
spin_cpu_spinwait(); spin_cpu_spinwait();
if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED) if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
@ -59,7 +65,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
data->n_spin_acquired++; data->n_spin_acquired++;
return; return;
} }
} while (cnt++ < max_cnt); } while (cnt++ < opt_mutex_max_spin || opt_mutex_max_spin == -1);
if (!config_stats) { if (!config_stats) {
/* Only spin is useful when stats is off. */ /* Only spin is useful when stats is off. */

View File

@ -1496,6 +1496,7 @@ stats_general_print(emitter_t *emitter) {
OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush") OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra") OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
OPT_WRITE_CHAR_P("metadata_thp") OPT_WRITE_CHAR_P("metadata_thp")
OPT_WRITE_INT64("mutex_max_spin")
OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms") OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")