Optimize mutex and bin alignment / locality.

This commit is contained in:
Qi Wang 2023-10-23 13:00:10 -07:00 committed by Qi Wang
parent e2cd27132a
commit 3025b021b9
3 changed files with 22 additions and 13 deletions

View File

@ -98,10 +98,13 @@ struct arena_s {
/* /*
* The arena is allocated alongside its bins; really this is a * The arena is allocated alongside its bins; really this is a
* dynamically sized array determined by the binshard settings. * dynamically sized array determined by the binshard settings.
* Enforcing cacheline-alignment to minimize the number of cachelines
* touched on the hot paths.
*/ */
JEMALLOC_WARN_ON_USAGE("Do not use this field directly. " JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
"Use `arena_get_bin` instead.") "Use `arena_get_bin` instead.")
bin_t all_bins[0]; JEMALLOC_ALIGNED(CACHELINE)
bin_t all_bins[0];
}; };
#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */ #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */

View File

@ -32,6 +32,12 @@ struct malloc_mutex_s {
* unlocking thread). * unlocking thread).
*/ */
mutex_prof_data_t prof_data; mutex_prof_data_t prof_data;
/*
* Hint flag to avoid exclusive cache line contention
* during spin waiting. Placed along with prof_data
* since it's always modified even with no contention.
*/
atomic_b_t locked;
#ifdef _WIN32 #ifdef _WIN32
# if _WIN32_WINNT >= 0x0600 # if _WIN32_WINNT >= 0x0600
SRWLOCK lock; SRWLOCK lock;
@ -46,11 +52,6 @@ struct malloc_mutex_s {
#else #else
pthread_mutex_t lock; pthread_mutex_t lock;
#endif #endif
/*
* Hint flag to avoid exclusive cache line contention
* during spin waiting
*/
atomic_b_t locked;
}; };
/* /*
* We only touch witness when configured w/ debug. However we * We only touch witness when configured w/ debug. However we
@ -99,21 +100,21 @@ struct malloc_mutex_s {
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
# if defined(JEMALLOC_DEBUG) # if defined(JEMALLOC_DEBUG)
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else # else
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif # endif
#elif (defined(JEMALLOC_MUTEX_INIT_CB)) #elif (defined(JEMALLOC_MUTEX_INIT_CB))
# if (defined(JEMALLOC_DEBUG)) # if (defined(JEMALLOC_DEBUG))
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else # else
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif # endif
@ -121,11 +122,11 @@ struct malloc_mutex_s {
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
# if defined(JEMALLOC_DEBUG) # if defined(JEMALLOC_DEBUG)
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else # else
# define MALLOC_MUTEX_INITIALIZER \ # define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \ {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif # endif
#endif #endif

View File

@ -1666,11 +1666,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
} }
} }
size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total; size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
sizeof(bin_t) * nbins_total;
arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE); arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
if (arena == NULL) { if (arena == NULL) {
goto label_error; goto label_error;
} }
JEMALLOC_SUPPRESS_WARN_ON_USAGE(
assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
(uintptr_t)arena + arena_size);
)
atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED); atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED); atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);