Refactor/fix background_thread/percpu_arena bootstrapping.

Refactor bootstrapping such that dlsym() is called during the
bootstrapping phase that can tolerate reentrant allocation.
This commit is contained in:
Jason Evans 2017-05-31 16:45:14 -07:00
parent 596b479d83
commit b511232fcd
12 changed files with 161 additions and 124 deletions

View File

@ -11,8 +11,7 @@ extern ssize_t opt_muzzy_decay_ms;
extern const arena_bin_info_t arena_bin_info[NBINS];
extern percpu_arena_mode_t percpu_arena_mode;
extern const char *opt_percpu_arena;
extern percpu_arena_mode_t opt_percpu_arena;
extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];

View File

@ -20,14 +20,26 @@ typedef struct arena_tdata_s arena_tdata_t;
typedef struct alloc_ctx_s alloc_ctx_t;
typedef enum {
percpu_arena_disabled = 0,
percpu_arena = 1,
per_phycpu_arena = 2, /* i.e. hyper threads share arena. */
percpu_arena_mode_names_base = 0, /* Used for options processing. */
percpu_arena_mode_limit = 3
/*
* *_uninit are used only during bootstrapping, and must correspond
* to initialized variant plus percpu_arena_mode_enabled_base.
*/
percpu_arena_uninit = 0,
per_phycpu_arena_uninit = 1,
/* All non-disabled modes must come after percpu_arena_disabled. */
percpu_arena_disabled = 2,
percpu_arena_mode_names_limit = 3, /* Used for options processing. */
percpu_arena_mode_enabled_base = 3,
percpu_arena = 3,
per_phycpu_arena = 4 /* Hyper threads share arena. */
} percpu_arena_mode_t;
#define PERCPU_ARENA_MODE_DEFAULT percpu_arena_disabled
#define OPT_PERCPU_ARENA_DEFAULT "disabled"
#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */

View File

@ -8,7 +8,6 @@ extern size_t n_background_threads;
extern background_thread_info_t *background_thread_info;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_init(tsd_t *tsd);
bool background_threads_enable(tsd_t *tsd);
bool background_threads_disable(tsd_t *tsd);
bool background_threads_disable_single(tsd_t *tsd,
@ -22,10 +21,11 @@ void background_thread_postfork_child(tsdn_t *tsdn);
bool background_thread_stats_read(tsdn_t *tsdn,
background_thread_stats_t *stats);
#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
void *load_pthread_create_fptr(void);
#endif
bool background_thread_boot0(void);
bool background_thread_boot1(tsdn_t *tsdn);
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */

View File

@ -3,6 +3,10 @@
/* This file really combines "structs" and "types", but only transitionally. */
#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
# define JEMALLOC_PTHREAD_CREATE_WRAPPER
#endif
#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
struct background_thread_info_s {

View File

@ -21,17 +21,17 @@ malloc_getcpu(void) {
/* Return the chosen arena index based on current cpu. */
JEMALLOC_ALWAYS_INLINE unsigned
percpu_arena_choose(void) {
unsigned arena_ind;
assert(have_percpu_arena && (percpu_arena_mode !=
percpu_arena_disabled));
assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
malloc_cpuid_t cpuid = malloc_getcpu();
assert(cpuid >= 0);
if ((percpu_arena_mode == percpu_arena) ||
((unsigned)cpuid < ncpus / 2)) {
unsigned arena_ind;
if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
2)) {
arena_ind = cpuid;
} else {
assert(percpu_arena_mode == per_phycpu_arena);
assert(opt_percpu_arena == per_phycpu_arena);
/* Hyper threads on the same physical CPU share arena. */
arena_ind = cpuid - ncpus / 2;
}
@ -41,9 +41,9 @@ percpu_arena_choose(void) {
/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
JEMALLOC_ALWAYS_INLINE unsigned
percpu_arena_ind_limit(void) {
assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
percpu_arena_ind_limit(percpu_arena_mode_t mode) {
assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode));
if (mode == per_phycpu_arena && ncpus > 1) {
if (ncpus % 2) {
/* This likely means a misconfig. */
return ncpus / 2 + 1;

View File

@ -43,9 +43,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
* auto percpu arena range, (i.e. thread is assigned to a manually
* managed arena), then percpu arena is skipped.
*/
if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
!internal && (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
(ret->last_thd != tsd_tsdn(tsd))) {
if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
!internal && (arena_ind_get(ret) <
percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
tsd_tsdn(tsd))) {
unsigned ind = percpu_arena_choose();
if (arena_ind_get(ret) != ind) {
percpu_arena_update(tsd, ind);

View File

@ -13,13 +13,18 @@
/******************************************************************************/
/* Data. */
/*
* Define names for both unininitialized and initialized phases, so that
* options and mallctl processing are straightforward.
*/
const char *percpu_arena_mode_names[] = {
"percpu",
"phycpu",
"disabled",
"percpu",
"phycpu"
};
const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;

View File

@ -25,7 +25,6 @@ background_thread_info_t *background_thread_info;
#ifndef JEMALLOC_BACKGROUND_THREAD
#define NOT_REACHED { not_reached(); }
bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
bool background_threads_init(tsd_t *tsd) NOT_REACHED
bool background_threads_enable(tsd_t *tsd) NOT_REACHED
bool background_threads_disable(tsd_t *tsd) NOT_REACHED
bool background_threads_disable_single(tsd_t *tsd,
@ -53,44 +52,6 @@ background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) {
}
}
bool
background_threads_init(tsd_t *tsd) {
assert(have_background_thread);
assert(narenas_total_get() > 0);
background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread);
if (malloc_mutex_init(&background_thread_lock,
"background_thread_global",
WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
malloc_mutex_rank_exclusive)) {
return true;
}
background_thread_info = (background_thread_info_t *)base_alloc(
tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t),
CACHELINE);
if (background_thread_info == NULL) {
return true;
}
for (unsigned i = 0; i < ncpus; i++) {
background_thread_info_t *info = &background_thread_info[i];
if (malloc_mutex_init(&info->mtx, "background_thread",
WITNESS_RANK_BACKGROUND_THREAD,
malloc_mutex_rank_exclusive)) {
return true;
}
if (pthread_cond_init(&info->cond, NULL)) {
return true;
}
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
info->started = false;
background_thread_info_reinit(tsd_tsdn(tsd), info);
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
}
return false;
}
static inline bool
set_current_thread_affinity(UNUSED int cpu) {
#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
@ -363,12 +324,11 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
pre_reentrancy(tsd);
int err;
load_pthread_create_fptr();
/*
* To avoid complications (besides reentrancy), create internal
* background threads with the underlying pthread_create.
*/
if ((err = pthread_create_fptr(&info->thread, NULL,
if ((err = pthread_create_wrapper(&info->thread, NULL,
background_thread_entry, (void *)thread_ind)) != 0) {
malloc_printf("<jemalloc>: arena %u background thread creation "
"failed (%d).\n", arena_ind, err);
@ -638,28 +598,84 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
#include <dlfcn.h>
int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
void *
load_pthread_create_fptr(void) {
if (pthread_create_fptr) {
return pthread_create_fptr;
}
static void
pthread_create_wrapper_once(void) {
#ifdef JEMALLOC_LAZY_LOCK
isthreaded = true;
#endif
}
int
pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *__restrict arg) {
static pthread_once_t once_control = PTHREAD_ONCE_INIT;
pthread_once(&once_control, pthread_create_wrapper_once);
return pthread_create_fptr(thread, attr, start_routine, arg);
}
#endif
bool
background_thread_boot0(void) {
if (!have_background_thread && opt_background_thread) {
malloc_printf("<jemalloc>: option background_thread currently "
"supports pthread only. \n");
return true;
}
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
if (pthread_create_fptr == NULL) {
malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
"\"pthread_create\")\n");
abort();
}
return pthread_create_fptr;
#endif
return false;
}
bool
background_thread_boot1(tsdn_t *tsdn) {
#ifdef JEMALLOC_BACKGROUND_THREAD
assert(have_background_thread);
assert(narenas_total_get() > 0);
background_thread_enabled_set(tsdn, opt_background_thread);
if (malloc_mutex_init(&background_thread_lock,
"background_thread_global",
WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
malloc_mutex_rank_exclusive)) {
return true;
}
background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
if (background_thread_info == NULL) {
return true;
}
for (unsigned i = 0; i < ncpus; i++) {
background_thread_info_t *info = &background_thread_info[i];
if (malloc_mutex_init(&info->mtx, "background_thread",
WITNESS_RANK_BACKGROUND_THREAD,
malloc_mutex_rank_exclusive)) {
return true;
}
if (pthread_cond_init(&info->cond, NULL)) {
return true;
}
malloc_mutex_lock(tsdn, &info->mtx);
info->started = false;
background_thread_info_reinit(tsdn, info);
malloc_mutex_unlock(tsdn, &info->mtx);
}
#endif
return false;
}

View File

@ -1559,7 +1559,8 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
const char *)
CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
@ -1610,8 +1611,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
}
if (have_percpu_arena &&
(percpu_arena_mode != percpu_arena_disabled)) {
if (newind < percpu_arena_ind_limit()) {
PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
/*
* If perCPU arena is enabled, thread_arena
* control is not allowed for the auto arena

View File

@ -463,7 +463,7 @@ arena_t *
arena_choose_hard(tsd_t *tsd, bool internal) {
arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
unsigned choose = percpu_arena_choose();
ret = arena_get(tsd_tsdn(tsd), choose, true);
assert(ret != NULL);
@ -1100,7 +1100,8 @@ malloc_conf_init(void) {
if (strncmp("percpu_arena", k, klen) == 0) {
int i;
bool match = false;
for (i = 0; i < percpu_arena_mode_limit; i++) {
for (i = percpu_arena_mode_names_base; i <
percpu_arena_mode_names_limit; i++) {
if (strncmp(percpu_arena_mode_names[i],
v, vlen) == 0) {
if (!have_percpu_arena) {
@ -1108,9 +1109,7 @@ malloc_conf_init(void) {
"No getcpu support",
k, klen, v, vlen);
}
percpu_arena_mode = i;
opt_percpu_arena =
percpu_arena_mode_names[i];
opt_percpu_arena = i;
match = true;
break;
}
@ -1276,6 +1275,10 @@ malloc_init_hard_recursible(void) {
}
#endif
if (background_thread_boot0()) {
return true;
}
return false;
}
@ -1293,13 +1296,25 @@ malloc_narenas_default(void) {
}
}
static percpu_arena_mode_t
percpu_arena_as_initialized(percpu_arena_mode_t mode) {
assert(!malloc_initialized());
assert(mode <= percpu_arena_disabled);
if (mode != percpu_arena_disabled) {
mode += percpu_arena_mode_enabled_base;
}
return mode;
}
static bool
malloc_init_narenas(void) {
assert(ncpus > 0);
if (percpu_arena_mode != percpu_arena_disabled) {
if (opt_percpu_arena != percpu_arena_disabled) {
if (!have_percpu_arena || malloc_getcpu() < 0) {
percpu_arena_mode = percpu_arena_disabled;
opt_percpu_arena = percpu_arena_disabled;
malloc_printf("<jemalloc>: perCPU arena getcpu() not "
"available. Setting narenas to %u.\n", opt_narenas ?
opt_narenas : malloc_narenas_default());
@ -1315,8 +1330,9 @@ malloc_init_narenas(void) {
}
return true;
}
if ((percpu_arena_mode == per_phycpu_arena) &&
(ncpus % 2 != 0)) {
/* NB: opt_percpu_arena isn't fully initialized yet. */
if (percpu_arena_as_initialized(opt_percpu_arena) ==
per_phycpu_arena && ncpus % 2 != 0) {
malloc_printf("<jemalloc>: invalid "
"configuration -- per physical CPU arena "
"with odd number (%u) of CPUs (no hyper "
@ -1324,7 +1340,8 @@ malloc_init_narenas(void) {
if (opt_abort)
abort();
}
unsigned n = percpu_arena_ind_limit();
unsigned n = percpu_arena_ind_limit(
percpu_arena_as_initialized(opt_percpu_arena));
if (opt_narenas < n) {
/*
* If narenas is specified with percpu_arena
@ -1363,26 +1380,16 @@ malloc_init_narenas(void) {
return false;
}
static bool
malloc_init_background_threads(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock);
if (!have_background_thread) {
if (opt_background_thread) {
malloc_printf("<jemalloc>: option background_thread "
"currently supports pthread only. \n");
return true;
} else {
return false;
}
}
return background_threads_init(tsd);
static void
malloc_init_percpu(void) {
opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
}
static bool
malloc_init_hard_finish(void) {
if (malloc_mutex_boot())
if (malloc_mutex_boot()) {
return true;
}
malloc_init_state = malloc_init_initialized;
malloc_slow_flag_init();
@ -1421,7 +1428,7 @@ malloc_init_hard(void) {
malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
/* Initialize narenas before prof_boot2 (for allocation). */
if (malloc_init_narenas() || malloc_init_background_threads(tsd)) {
if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}
@ -1431,6 +1438,8 @@ malloc_init_hard(void) {
return true;
}
malloc_init_percpu();
if (malloc_init_hard_finish()) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;

View File

@ -27,21 +27,11 @@ static malloc_mutex_t *postponed_mutexes = NULL;
*/
#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
static void
pthread_create_once(void) {
pthread_create_fptr = load_pthread_create_fptr();
assert(isthreaded);
}
JEMALLOC_EXPORT int
pthread_create(pthread_t *__restrict thread,
const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
void *__restrict arg) {
static pthread_once_t once_control = PTHREAD_ONCE_INIT;
pthread_once(&once_control, pthread_create_once);
return pthread_create_fptr(thread, attr, start_routine, arg);
return pthread_create_wrapper(thread, attr, start_routine, arg);
}
#endif

View File

@ -327,18 +327,18 @@ TEST_END
TEST_BEGIN(test_thread_arena) {
unsigned old_arena_ind, new_arena_ind, narenas;
const char *opt_percpu_arena;
size_t sz = sizeof(opt_percpu_arena);
assert_d_eq(mallctl("opt.percpu_arena", &opt_percpu_arena, &sz, NULL,
0), 0, "Unexpected mallctl() failure");
const char *opa;
size_t sz = sizeof(opa);
assert_d_eq(mallctl("opt.percpu_arena", &opa, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
sz = sizeof(unsigned);
assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
0, "Unexpected mallctl() failure");
assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
if (strcmp(opt_percpu_arena, "disabled") == 0) {
if (strcmp(opa, "disabled") == 0) {
new_arena_ind = narenas - 1;
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
(void *)&new_arena_ind, sizeof(unsigned)), 0,
@ -350,7 +350,7 @@ TEST_BEGIN(test_thread_arena) {
} else {
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
NULL, 0), 0, "Unexpected mallctl() failure");
new_arena_ind = percpu_arena_ind_limit() - 1;
new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
if (old_arena_ind != new_arena_ind) {
assert_d_eq(mallctl("thread.arena",
(void *)&old_arena_ind, &sz, (void *)&new_arena_ind,