From 8b14f3abc05f01419f9321a6a65ab9dd68dcebac Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Thu, 29 Mar 2018 12:58:13 -0700 Subject: [PATCH] background_thread: add max thread count config Looking at the thread counts in our services, jemalloc's background thread is useful, but mostly idle. Add a config option to tune down the number of threads. --- doc/jemalloc.xml.in | 23 ++++++ .../internal/background_thread_externs.h | 2 + .../internal/background_thread_structs.h | 1 + src/background_thread.c | 49 +++++++------ src/ctl.c | 70 +++++++++++++++++++ src/jemalloc.c | 4 ++ test/unit/background_thread_enable.c | 50 ++++++++++++- 7 files changed, 177 insertions(+), 22 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 9ecd8a1f..2e7edc33 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -761,6 +761,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", selected pthread-based platforms. + + + max_background_threads + (size_t) + rw + + Maximum number of background worker threads that will + be created. This value is capped at opt.max_background_threads at + startup. + + config.cache_oblivious @@ -1009,6 +1021,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay", default. + + + opt.max_background_threads + (const size_t) + r- + + Maximum number of background threads that will be created + if background_thread is set. + Defaults to number of cpus. + + opt.dirty_decay_ms diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h index 8b4b8471..3209aa49 100644 --- a/include/jemalloc/internal/background_thread_externs.h +++ b/include/jemalloc/internal/background_thread_externs.h @@ -2,9 +2,11 @@ #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H extern bool opt_background_thread; +extern size_t opt_max_background_threads; extern malloc_mutex_t background_thread_lock; extern atomic_b_t background_thread_enabled_state; extern size_t n_background_threads; +extern size_t max_background_threads; extern background_thread_info_t *background_thread_info; extern bool can_enable_background_thread; diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h index e69a7d02..c1107dfe 100644 --- a/include/jemalloc/internal/background_thread_structs.h +++ b/include/jemalloc/internal/background_thread_structs.h @@ -8,6 +8,7 @@ #endif #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX +#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT typedef enum { background_thread_stopped, diff --git a/src/background_thread.c b/src/background_thread.c index c16f0063..d2aa2745 100644 --- a/src/background_thread.c +++ b/src/background_thread.c @@ -11,12 +11,14 @@ #define BACKGROUND_THREAD_DEFAULT false /* Read-only after initialization. */ bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; +size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT; /* Used for thread creation, termination and stats. */ malloc_mutex_t background_thread_lock; /* Indicates global state. Atomic because decay reads this w/o locking. */ atomic_b_t background_thread_enabled_state; size_t n_background_threads; +size_t max_background_threads; /* Thread info per-index. */ background_thread_info_t *background_thread_info; @@ -287,7 +289,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; unsigned narenas = narenas_total_get(); - for (unsigned i = ind; i < narenas; i += ncpus) { + for (unsigned i = ind; i < narenas; i += max_background_threads) { arena_t *arena = arena_get(tsdn, i, false); if (!arena) { continue; @@ -390,7 +392,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created, tsdn_t *tsdn = tsd_tsdn(tsd); malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx); - for (unsigned i = 1; i < ncpus; i++) { + for (unsigned i = 1; i < max_background_threads; i++) { if (created_threads[i]) { continue; } @@ -430,9 +432,9 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created, static void background_thread0_work(tsd_t *tsd) { /* Thread0 is also responsible for launching / terminating threads. */ - VARIABLE_ARRAY(bool, created_threads, ncpus); + VARIABLE_ARRAY(bool, created_threads, max_background_threads); unsigned i; - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { created_threads[i] = false; } /* Start working, and create more threads when asked. */ @@ -455,7 +457,7 @@ background_thread0_work(tsd_t *tsd) { * the global background_thread mutex (and is waiting) for us. */ assert(!background_thread_enabled()); - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; assert(info->state != background_thread_paused); if (created_threads[i]) { @@ -502,7 +504,7 @@ background_work(tsd_t *tsd, unsigned ind) { static void * background_thread_entry(void *ind_arg) { unsigned thread_ind = (unsigned)(uintptr_t)ind_arg; - assert(thread_ind < ncpus); + assert(thread_ind < max_background_threads); #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), "jemalloc_bg_thd"); #endif @@ -536,7 +538,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) { malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); /* We create at most NCPUs threads. */ - size_t thread_ind = arena_ind % ncpus; + size_t thread_ind = arena_ind % max_background_threads; background_thread_info_t *info = &background_thread_info[thread_ind]; bool need_new_thread; @@ -590,9 +592,9 @@ background_threads_enable(tsd_t *tsd) { assert(background_thread_enabled()); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); - VARIABLE_ARRAY(bool, marked, ncpus); + VARIABLE_ARRAY(bool, marked, max_background_threads); unsigned i, nmarked; - for (i = 0; i < ncpus; i++) { + for (i = 0; i < max_background_threads; i++) { marked[i] = false; } nmarked = 0; @@ -601,18 +603,18 @@ background_threads_enable(tsd_t *tsd) { /* Mark the threads we need to create for thread 0. */ unsigned n = narenas_total_get(); for (i = 1; i < n; i++) { - if (marked[i % ncpus] || + if (marked[i % max_background_threads] || arena_get(tsd_tsdn(tsd), i, false) == NULL) { continue; } background_thread_info_t *info = &background_thread_info[ - i % ncpus]; + i % max_background_threads]; malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); assert(info->state == background_thread_stopped); background_thread_init(tsd, info); malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); - marked[i % ncpus] = true; - if (++nmarked == ncpus) { + marked[i % max_background_threads] = true; + if (++nmarked == max_background_threads) { break; } } @@ -727,14 +729,14 @@ background_thread_prefork0(tsdn_t *tsdn) { void background_thread_prefork1(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx); } } void background_thread_postfork_parent(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_parent(tsdn, &background_thread_info[i].mtx); } @@ -743,7 +745,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) { void background_thread_postfork_child(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_child(tsdn, &background_thread_info[i].mtx); } @@ -756,7 +758,7 @@ background_thread_postfork_child(tsdn_t *tsdn) { malloc_mutex_lock(tsdn, &background_thread_lock); n_background_threads = 0; background_thread_enabled_set(tsdn, false); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); info->state = background_thread_stopped; @@ -780,7 +782,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { stats->num_threads = n_background_threads; uint64_t num_runs = 0; nstime_init(&stats->run_interval, 0); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); if (info->state != background_thread_stopped) { @@ -848,6 +850,12 @@ background_thread_boot1(tsdn_t *tsdn) { assert(have_background_thread); assert(narenas_total_get() > 0); + if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT && + ncpus < MAX_BACKGROUND_THREAD_LIMIT) { + opt_max_background_threads = ncpus; + } + max_background_threads = opt_max_background_threads; + background_thread_enabled_set(tsdn, opt_background_thread); if (malloc_mutex_init(&background_thread_lock, "background_thread_global", @@ -857,12 +865,13 @@ background_thread_boot1(tsdn_t *tsdn) { } background_thread_info = (background_thread_info_t *)base_alloc(tsdn, - b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE); + b0get(), opt_max_background_threads * + sizeof(background_thread_info_t), CACHELINE); if (background_thread_info == NULL) { return true; } - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; /* Thread mutex is rank_inclusive because of thread0. */ if (malloc_mutex_init(&info->mtx, "background_thread", diff --git a/src/ctl.c b/src/ctl.c index aaf6e35a..02610cf0 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -57,6 +57,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(background_thread) +CTL_PROTO(max_background_threads) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_prof_name) @@ -85,6 +86,7 @@ CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_background_thread) +CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_stats_print) @@ -284,6 +286,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("background_thread"), CTL(opt_background_thread)}, + {NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("stats_print"), CTL(opt_stats_print)}, @@ -535,6 +538,7 @@ static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("background_thread"), CTL(background_thread)}, + {NAME("max_background_threads"), CTL(max_background_threads)}, {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, @@ -1564,6 +1568,71 @@ label_return: return ret; } +static int +max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + size_t oldval; + + if (!have_background_thread) { + return ENOENT; + } + background_thread_ctl_init(tsd_tsdn(tsd)); + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (newp == NULL) { + oldval = max_background_threads; + READ(oldval, size_t); + } else { + if (newlen != sizeof(size_t)) { + ret = EINVAL; + goto label_return; + } + oldval = max_background_threads; + READ(oldval, size_t); + + size_t newval = *(size_t *)newp; + if (newval == oldval) { + ret = 0; + goto label_return; + } + if (newval > opt_max_background_threads) { + ret = EINVAL; + goto label_return; + } + + if (background_thread_enabled()) { + if (!can_enable_background_thread) { + malloc_printf(": Error in dlsym(" + "RTLD_NEXT, \"pthread_create\"). Cannot " + "enable background_thread\n"); + ret = EFAULT; + goto label_return; + } + background_thread_enabled_set(tsd_tsdn(tsd), false); + if (background_threads_disable(tsd)) { + ret = EFAULT; + goto label_return; + } + max_background_threads = newval; + background_thread_enabled_set(tsd_tsdn(tsd), true); + if (background_threads_enable(tsd)) { + ret = EFAULT; + goto label_return; + } + } else { + max_background_threads = newval; + } + } + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + + return ret; +} + /******************************************************************************/ CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) @@ -1590,6 +1659,7 @@ CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) +CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index 4dde8fbc..912488d5 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1175,6 +1175,10 @@ malloc_conf_init(void) { } CONF_HANDLE_BOOL(opt_background_thread, "background_thread"); + CONF_HANDLE_SIZE_T(opt_max_background_threads, + "max_background_threads", 1, + opt_max_background_threads, yes, yes, + true); if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_CHAR_P(opt_prof_prefix, diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c index 7e4f6ed7..ff95e672 100644 --- a/test/unit/background_thread_enable.c +++ b/test/unit/background_thread_enable.c @@ -1,6 +1,6 @@ #include "test/jemalloc_test.h" -const char *malloc_conf = "background_thread:false,narenas:1"; +const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20"; TEST_BEGIN(test_deferred) { test_skip_if(!have_background_thread); @@ -30,8 +30,54 @@ TEST_BEGIN(test_deferred) { } TEST_END +TEST_BEGIN(test_max_background_threads) { + test_skip_if(!have_background_thread); + + size_t maxt; + size_t opt_maxt; + size_t sz_m = sizeof(maxt); + assert_d_eq(mallctl("opt.max_background_threads", + &opt_maxt, &sz_m, NULL, 0), 0, + "Failed to get opt.max_background_threads"); + assert_d_eq(mallctl("max_background_threads", &maxt, &sz_m, NULL, 0), 0, + "Failed to get max background threads"); + assert_zu_eq(20, maxt, "should be ncpus"); + assert_zu_eq(opt_maxt, maxt, + "max_background_threads and " + "opt.max_background_threads should match"); + assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m), + 0, "Failed to set max background threads"); + + unsigned id; + size_t sz_u = sizeof(unsigned); + + for (unsigned i = 0; i < 10 * ncpus; i++) { + assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0, + "Failed to create arena"); + } + + bool enable = true; + size_t sz_b = sizeof(bool); + assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0, + "Failed to enable background threads"); + assert_zu_eq(n_background_threads, maxt, + "Number of background threads should be 3.\n"); + maxt = 10; + assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m), + 0, "Failed to set max background threads"); + assert_zu_eq(n_background_threads, maxt, + "Number of background threads should be 10.\n"); + maxt = 3; + assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m), + 0, "Failed to set max background threads"); + assert_zu_eq(n_background_threads, maxt, + "Number of background threads should be 3.\n"); +} +TEST_END + int main(void) { return test_no_reentrancy( - test_deferred); + test_deferred, + test_max_background_threads); }