diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index cfb7c6fb..9ad9786f 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -11,9 +11,8 @@ extern ssize_t opt_muzzy_decay_ms; extern const arena_bin_info_t arena_bin_info[NBINS]; -extern percpu_arena_mode_t percpu_arena_mode; -extern const char *opt_percpu_arena; -extern const char *percpu_arena_mode_names[]; +extern percpu_arena_mode_t opt_percpu_arena; +extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h index 01b9096a..a691bd81 100644 --- a/include/jemalloc/internal/arena_types.h +++ b/include/jemalloc/internal/arena_types.h @@ -20,14 +20,26 @@ typedef struct arena_tdata_s arena_tdata_t; typedef struct alloc_ctx_s alloc_ctx_t; typedef enum { - percpu_arena_disabled = 0, - percpu_arena = 1, - per_phycpu_arena = 2, /* i.e. hyper threads share arena. */ + percpu_arena_mode_names_base = 0, /* Used for options processing. */ - percpu_arena_mode_limit = 3 + /* + * *_uninit are used only during bootstrapping, and must correspond + * to initialized variant plus percpu_arena_mode_enabled_base. + */ + percpu_arena_uninit = 0, + per_phycpu_arena_uninit = 1, + + /* All non-disabled modes must come after percpu_arena_disabled. */ + percpu_arena_disabled = 2, + + percpu_arena_mode_names_limit = 3, /* Used for options processing. */ + percpu_arena_mode_enabled_base = 3, + + percpu_arena = 3, + per_phycpu_arena = 4 /* Hyper threads share arena. */ } percpu_arena_mode_t; -#define PERCPU_ARENA_MODE_DEFAULT percpu_arena_disabled -#define OPT_PERCPU_ARENA_DEFAULT "disabled" +#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) +#define PERCPU_ARENA_DEFAULT percpu_arena_disabled #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h index fe25acfe..a2d95a73 100644 --- a/include/jemalloc/internal/background_thread_externs.h +++ b/include/jemalloc/internal/background_thread_externs.h @@ -8,7 +8,6 @@ extern size_t n_background_threads; extern background_thread_info_t *background_thread_info; bool background_thread_create(tsd_t *tsd, unsigned arena_ind); -bool background_threads_init(tsd_t *tsd); bool background_threads_enable(tsd_t *tsd); bool background_threads_disable(tsd_t *tsd); bool background_threads_disable_single(tsd_t *tsd, @@ -22,10 +21,11 @@ void background_thread_postfork_child(tsdn_t *tsdn); bool background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats); -#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) -extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER +extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); -void *load_pthread_create_fptr(void); #endif +bool background_thread_boot0(void); +bool background_thread_boot1(tsdn_t *tsdn); #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h index 9507abcd..edf90fe2 100644 --- a/include/jemalloc/internal/background_thread_structs.h +++ b/include/jemalloc/internal/background_thread_structs.h @@ -3,6 +3,10 @@ /* This file really combines "structs" and "types", but only transitionally. */ +#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +# define JEMALLOC_PTHREAD_CREATE_WRAPPER +#endif + #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX struct background_thread_info_s { diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h index d0bf2eee..854fb1e2 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -21,17 +21,17 @@ malloc_getcpu(void) { /* Return the chosen arena index based on current cpu. */ JEMALLOC_ALWAYS_INLINE unsigned percpu_arena_choose(void) { - unsigned arena_ind; - assert(have_percpu_arena && (percpu_arena_mode != - percpu_arena_disabled)); + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)); malloc_cpuid_t cpuid = malloc_getcpu(); assert(cpuid >= 0); - if ((percpu_arena_mode == percpu_arena) || - ((unsigned)cpuid < ncpus / 2)) { + + unsigned arena_ind; + if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus / + 2)) { arena_ind = cpuid; } else { - assert(percpu_arena_mode == per_phycpu_arena); + assert(opt_percpu_arena == per_phycpu_arena); /* Hyper threads on the same physical CPU share arena. */ arena_ind = cpuid - ncpus / 2; } @@ -41,9 +41,9 @@ percpu_arena_choose(void) { /* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */ JEMALLOC_ALWAYS_INLINE unsigned -percpu_arena_ind_limit(void) { - assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled)); - if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) { +percpu_arena_ind_limit(percpu_arena_mode_t mode) { + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode)); + if (mode == per_phycpu_arena && ncpus > 1) { if (ncpus % 2) { /* This likely means a misconfig. */ return ncpus / 2 + 1; diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h index 37493160..2e76e5d8 100644 --- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h +++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -43,9 +43,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { * auto percpu arena range, (i.e. thread is assigned to a manually * managed arena), then percpu arena is skipped. */ - if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) && - !internal && (arena_ind_get(ret) < percpu_arena_ind_limit()) && - (ret->last_thd != tsd_tsdn(tsd))) { + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) && + !internal && (arena_ind_get(ret) < + percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd != + tsd_tsdn(tsd))) { unsigned ind = percpu_arena_choose(); if (arena_ind_get(ret) != ind) { percpu_arena_update(tsd, ind); diff --git a/src/arena.c b/src/arena.c index 151aad3e..dedbb3e3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -13,13 +13,18 @@ /******************************************************************************/ /* Data. */ +/* + * Define names for both unininitialized and initialized phases, so that + * options and mallctl processing are straightforward. + */ const char *percpu_arena_mode_names[] = { + "percpu", + "phycpu", "disabled", "percpu", "phycpu" }; -const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT; -percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT; +percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT; ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT; ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; diff --git a/src/background_thread.c b/src/background_thread.c index ccb50a21..800526f5 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -25,7 +25,6 @@ background_thread_info_t *background_thread_info; #ifndef JEMALLOC_BACKGROUND_THREAD #define NOT_REACHED { not_reached(); } bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED -bool background_threads_init(tsd_t *tsd) NOT_REACHED bool background_threads_enable(tsd_t *tsd) NOT_REACHED bool background_threads_disable(tsd_t *tsd) NOT_REACHED bool background_threads_disable_single(tsd_t *tsd, @@ -53,44 +52,6 @@ background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) { } } -bool -background_threads_init(tsd_t *tsd) { - assert(have_background_thread); - assert(narenas_total_get() > 0); - - background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread); - if (malloc_mutex_init(&background_thread_lock, - "background_thread_global", - WITNESS_RANK_BACKGROUND_THREAD_GLOBAL, - malloc_mutex_rank_exclusive)) { - return true; - } - background_thread_info = (background_thread_info_t *)base_alloc( - tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t), - CACHELINE); - if (background_thread_info == NULL) { - return true; - } - - for (unsigned i = 0; i < ncpus; i++) { - background_thread_info_t *info = &background_thread_info[i]; - if (malloc_mutex_init(&info->mtx, "background_thread", - WITNESS_RANK_BACKGROUND_THREAD, - malloc_mutex_rank_exclusive)) { - return true; - } - if (pthread_cond_init(&info->cond, NULL)) { - return true; - } - malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - info->started = false; - background_thread_info_reinit(tsd_tsdn(tsd), info); - malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); - } - - return false; -} - static inline bool set_current_thread_affinity(UNUSED int cpu) { #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) @@ -363,12 +324,11 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) { pre_reentrancy(tsd); int err; - load_pthread_create_fptr(); /* * To avoid complications (besides reentrancy), create internal * background threads with the underlying pthread_create. */ - if ((err = pthread_create_fptr(&info->thread, NULL, + if ((err = pthread_create_wrapper(&info->thread, NULL, background_thread_entry, (void *)thread_ind)) != 0) { malloc_printf(": arena %u background thread creation " "failed (%d).\n", arena_ind, err); @@ -638,28 +598,84 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */ -#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER #include -int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, +static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); -void * -load_pthread_create_fptr(void) { - if (pthread_create_fptr) { - return pthread_create_fptr; - } +static void +pthread_create_wrapper_once(void) { #ifdef JEMALLOC_LAZY_LOCK isthreaded = true; #endif +} + +int +pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *__restrict arg) { + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + + pthread_once(&once_control, pthread_create_wrapper_once); + + return pthread_create_fptr(thread, attr, start_routine, arg); +} +#endif + +bool +background_thread_boot0(void) { + if (!have_background_thread && opt_background_thread) { + malloc_printf(": option background_thread currently " + "supports pthread only. \n"); + return true; + } + +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); if (pthread_create_fptr == NULL) { malloc_write(": Error in dlsym(RTLD_NEXT, " "\"pthread_create\")\n"); abort(); } - - return pthread_create_fptr; +#endif + return false; } +bool +background_thread_boot1(tsdn_t *tsdn) { +#ifdef JEMALLOC_BACKGROUND_THREAD + assert(have_background_thread); + assert(narenas_total_get() > 0); + + background_thread_enabled_set(tsdn, opt_background_thread); + if (malloc_mutex_init(&background_thread_lock, + "background_thread_global", + WITNESS_RANK_BACKGROUND_THREAD_GLOBAL, + malloc_mutex_rank_exclusive)) { + return true; + } + background_thread_info = (background_thread_info_t *)base_alloc(tsdn, + b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE); + if (background_thread_info == NULL) { + return true; + } + + for (unsigned i = 0; i < ncpus; i++) { + background_thread_info_t *info = &background_thread_info[i]; + if (malloc_mutex_init(&info->mtx, "background_thread", + WITNESS_RANK_BACKGROUND_THREAD, + malloc_mutex_rank_exclusive)) { + return true; + } + if (pthread_cond_init(&info->cond, NULL)) { + return true; + } + malloc_mutex_lock(tsdn, &info->mtx); + info->started = false; + background_thread_info_reinit(tsdn, info); + malloc_mutex_unlock(tsdn, &info->mtx); + } #endif + + return false; +} diff --git a/src/ctl.c b/src/ctl.c index 1520c508..70059886 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1559,7 +1559,8 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) -CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *) +CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], + const char *) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) @@ -1610,8 +1611,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, } if (have_percpu_arena && - (percpu_arena_mode != percpu_arena_disabled)) { - if (newind < percpu_arena_ind_limit()) { + PERCPU_ARENA_ENABLED(opt_percpu_arena)) { + if (newind < percpu_arena_ind_limit(opt_percpu_arena)) { /* * If perCPU arena is enabled, thread_arena * control is not allowed for the auto arena diff --git a/src/jemalloc.c b/src/jemalloc.c index 02e32dad..c3983a5d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -463,7 +463,7 @@ arena_t * arena_choose_hard(tsd_t *tsd, bool internal) { arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL); - if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) { + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)) { unsigned choose = percpu_arena_choose(); ret = arena_get(tsd_tsdn(tsd), choose, true); assert(ret != NULL); @@ -1100,17 +1100,16 @@ malloc_conf_init(void) { if (strncmp("percpu_arena", k, klen) == 0) { int i; bool match = false; - for (i = 0; i < percpu_arena_mode_limit; i++) { + for (i = percpu_arena_mode_names_base; i < + percpu_arena_mode_names_limit; i++) { if (strncmp(percpu_arena_mode_names[i], - v, vlen) == 0) { + v, vlen) == 0) { if (!have_percpu_arena) { malloc_conf_error( "No getcpu support", k, klen, v, vlen); } - percpu_arena_mode = i; - opt_percpu_arena = - percpu_arena_mode_names[i]; + opt_percpu_arena = i; match = true; break; } @@ -1276,6 +1275,10 @@ malloc_init_hard_recursible(void) { } #endif + if (background_thread_boot0()) { + return true; + } + return false; } @@ -1293,13 +1296,25 @@ malloc_narenas_default(void) { } } +static percpu_arena_mode_t +percpu_arena_as_initialized(percpu_arena_mode_t mode) { + assert(!malloc_initialized()); + assert(mode <= percpu_arena_disabled); + + if (mode != percpu_arena_disabled) { + mode += percpu_arena_mode_enabled_base; + } + + return mode; +} + static bool malloc_init_narenas(void) { assert(ncpus > 0); - if (percpu_arena_mode != percpu_arena_disabled) { + if (opt_percpu_arena != percpu_arena_disabled) { if (!have_percpu_arena || malloc_getcpu() < 0) { - percpu_arena_mode = percpu_arena_disabled; + opt_percpu_arena = percpu_arena_disabled; malloc_printf(": perCPU arena getcpu() not " "available. Setting narenas to %u.\n", opt_narenas ? opt_narenas : malloc_narenas_default()); @@ -1315,8 +1330,9 @@ malloc_init_narenas(void) { } return true; } - if ((percpu_arena_mode == per_phycpu_arena) && - (ncpus % 2 != 0)) { + /* NB: opt_percpu_arena isn't fully initialized yet. */ + if (percpu_arena_as_initialized(opt_percpu_arena) == + per_phycpu_arena && ncpus % 2 != 0) { malloc_printf(": invalid " "configuration -- per physical CPU arena " "with odd number (%u) of CPUs (no hyper " @@ -1324,7 +1340,8 @@ malloc_init_narenas(void) { if (opt_abort) abort(); } - unsigned n = percpu_arena_ind_limit(); + unsigned n = percpu_arena_ind_limit( + percpu_arena_as_initialized(opt_percpu_arena)); if (opt_narenas < n) { /* * If narenas is specified with percpu_arena @@ -1363,26 +1380,16 @@ malloc_init_narenas(void) { return false; } -static bool -malloc_init_background_threads(tsd_t *tsd) { - malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock); - if (!have_background_thread) { - if (opt_background_thread) { - malloc_printf(": option background_thread " - "currently supports pthread only. \n"); - return true; - } else { - return false; - } - } - - return background_threads_init(tsd); +static void +malloc_init_percpu(void) { + opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena); } static bool malloc_init_hard_finish(void) { - if (malloc_mutex_boot()) + if (malloc_mutex_boot()) { return true; + } malloc_init_state = malloc_init_initialized; malloc_slow_flag_init(); @@ -1421,7 +1428,7 @@ malloc_init_hard(void) { malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); /* Initialize narenas before prof_boot2 (for allocation). */ - if (malloc_init_narenas() || malloc_init_background_threads(tsd)) { + if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return true; } @@ -1431,6 +1438,8 @@ malloc_init_hard(void) { return true; } + malloc_init_percpu(); + if (malloc_init_hard_finish()) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return true; diff --git a/src/mutex.c b/src/mutex.c index 24852226..a528ef0c 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -27,21 +27,11 @@ static malloc_mutex_t *postponed_mutexes = NULL; */ #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -static void -pthread_create_once(void) { - pthread_create_fptr = load_pthread_create_fptr(); - assert(isthreaded); -} - JEMALLOC_EXPORT int pthread_create(pthread_t *__restrict thread, const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), void *__restrict arg) { - static pthread_once_t once_control = PTHREAD_ONCE_INIT; - - pthread_once(&once_control, pthread_create_once); - - return pthread_create_fptr(thread, attr, start_routine, arg); + return pthread_create_wrapper(thread, attr, start_routine, arg); } #endif diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 80b84a06..f6116549 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -327,18 +327,18 @@ TEST_END TEST_BEGIN(test_thread_arena) { unsigned old_arena_ind, new_arena_ind, narenas; - const char *opt_percpu_arena; - size_t sz = sizeof(opt_percpu_arena); - assert_d_eq(mallctl("opt.percpu_arena", &opt_percpu_arena, &sz, NULL, - 0), 0, "Unexpected mallctl() failure"); + const char *opa; + size_t sz = sizeof(opa); + assert_d_eq(mallctl("opt.percpu_arena", &opa, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); sz = sizeof(unsigned); assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect"); - if (strcmp(opt_percpu_arena, "disabled") == 0) { + if (strcmp(opa, "disabled") == 0) { new_arena_ind = narenas - 1; assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz, (void *)&new_arena_ind, sizeof(unsigned)), 0, @@ -350,7 +350,7 @@ TEST_BEGIN(test_thread_arena) { } else { assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); - new_arena_ind = percpu_arena_ind_limit() - 1; + new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1; if (old_arena_ind != new_arena_ind) { assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,