Implement per-CPU arena.

The new feature, opt.percpu_arena, determines thread-arena association
dynamically based CPU id. Three modes are supported: "percpu", "phycpu"
and disabled.

"percpu" uses the current core id (with help from sched_getcpu())
directly as the arena index, while "phycpu" will assign threads on the
same physical CPU to the same arena. In other words, "percpu" means # of
arenas == # of CPUs, while "phycpu" has # of arenas == 1/2 * (# of
CPUs). Note that no runtime check on whether hyper threading is enabled
is added yet.

When enabled, threads will be migrated between arenas when a CPU change
is detected. In the current design, to reduce overhead from reading CPU
id, each arena tracks the thread accessed most recently. When a new
thread comes in, we will read CPU id and update arena if necessary.
This commit is contained in:
Qi Wang
2017-02-02 17:02:05 -08:00
committed by Qi Wang
parent 8721e19c04
commit ec532e2c5c
16 changed files with 415 additions and 119 deletions

View File

@@ -4,6 +4,15 @@
/******************************************************************************/
/* Data. */
const char *percpu_arena_mode_names[] = {
"disabled",
"percpu",
"phycpu"
};
const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
ssize_t opt_decay_time = DECAY_TIME_DEFAULT;
static ssize_t decay_time_default;
@@ -1629,6 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
arena->nthreads[0] = arena->nthreads[1] = 0;
arena->last_thd = NULL;
if (config_stats) {
if (arena_stats_init(tsdn, &arena->stats)) {

View File

@@ -72,6 +72,7 @@ CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
CTL_PROTO(opt_dss)
CTL_PROTO(opt_narenas)
CTL_PROTO(opt_percpu_arena)
CTL_PROTO(opt_decay_time)
CTL_PROTO(opt_stats_print)
CTL_PROTO(opt_junk)
@@ -229,6 +230,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("abort"), CTL(opt_abort)},
{NAME("dss"), CTL(opt_dss)},
{NAME("narenas"), CTL(opt_narenas)},
{NAME("percpu_arena"), CTL(opt_percpu_arena)},
{NAME("decay_time"), CTL(opt_decay_time)},
{NAME("stats_print"), CTL(opt_stats_print)},
{NAME("junk"), CTL(opt_junk)},
@@ -1284,6 +1286,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
@@ -1317,10 +1320,10 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
if (oldarena == NULL) {
return EAGAIN;
}
newind = oldind = arena_ind_get(oldarena);
WRITE(newind, unsigned);
READ(oldind, unsigned);
if (newind != oldind) {
arena_t *newarena;
@@ -1330,6 +1333,19 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
goto label_return;
}
if (have_percpu_arena &&
(percpu_arena_mode != percpu_arena_disabled)) {
if (newind < percpu_arena_ind_limit()) {
/*
* If perCPU arena is enabled, thread_arena
* control is not allowed for the auto arena
* range.
*/
ret = EPERM;
goto label_return;
}
}
/* Initialize arena if necessary. */
newarena = arena_get(tsd_tsdn(tsd), newind, true);
if (newarena == NULL) {

View File

@@ -56,7 +56,8 @@ static malloc_mutex_t arenas_lock;
* arenas. arenas[narenas_auto..narenas_total) are only used if the application
* takes some action to create them and allocate from them.
*/
arena_t **arenas;
JEMALLOC_ALIGNED(CACHELINE)
arena_t *arenas[MALLOCX_ARENA_MAX + 1];
static unsigned narenas_total; /* Use narenas_total_*(). */
static arena_t *a0; /* arenas[0]; read-only after initialization. */
unsigned narenas_auto; /* Read-only after initialization. */
@@ -543,6 +544,16 @@ arena_t *
arena_choose_hard(tsd_t *tsd, bool internal) {
arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
unsigned choose = percpu_arena_choose();
ret = arena_get(tsd_tsdn(tsd), choose, true);
assert(ret != NULL);
arena_bind(tsd, arena_ind_get(ret), false);
arena_bind(tsd, arena_ind_get(ret), true);
return ret;
}
if (narenas_auto > 1) {
unsigned i, j, choose[2], first_null;
@@ -1095,6 +1106,30 @@ malloc_conf_init(void) {
"lg_tcache_max", -1,
(sizeof(size_t) << 3) - 1)
}
if (strncmp("percpu_arena", k, klen) == 0) {
int i;
bool match = false;
for (i = 0; i < percpu_arena_mode_limit; i++) {
if (strncmp(percpu_arena_mode_names[i],
v, vlen) == 0) {
if (!have_percpu_arena) {
malloc_conf_error(
"No getcpu support",
k, klen, v, vlen);
}
percpu_arena_mode = i;
opt_percpu_arena =
percpu_arena_mode_names[i];
match = true;
break;
}
}
if (!match) {
malloc_conf_error("Invalid conf value",
k, klen, v, vlen);
}
continue;
}
if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof", true)
CONF_HANDLE_CHAR_P(opt_prof_prefix,
@@ -1204,8 +1239,6 @@ malloc_init_hard_a0_locked() {
* malloc_ncpus().
*/
narenas_auto = 1;
narenas_total_set(narenas_auto);
arenas = &a0;
memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
/*
* Initialize one arena here. The rest are lazily created in
@@ -1215,7 +1248,7 @@ malloc_init_hard_a0_locked() {
== NULL) {
return true;
}
a0 = arena_get(TSDN_NULL, 0, false);
malloc_init_state = malloc_init_a0_initialized;
return false;
@@ -1255,23 +1288,76 @@ malloc_init_hard_recursible(void) {
return false;
}
static bool
malloc_init_hard_finish(tsdn_t *tsdn) {
if (malloc_mutex_boot()) {
return true;
static unsigned
malloc_narenas_default(void) {
assert(ncpus > 0);
/*
* For SMP systems, create more than one arena per CPU by
* default.
*/
if (ncpus > 1) {
return ncpus << 2;
} else {
return 1;
}
}
if (opt_narenas == 0) {
/*
* For SMP systems, create more than one arena per CPU by
* default.
*/
if (ncpus > 1) {
opt_narenas = ncpus << 2;
static bool
malloc_init_narenas(void) {
assert(ncpus > 0);
if (percpu_arena_mode != percpu_arena_disabled) {
if (!have_percpu_arena || malloc_getcpu() < 0) {
percpu_arena_mode = percpu_arena_disabled;
malloc_printf("<jemalloc>: perCPU arena getcpu() not "
"available. Setting narenas to %u.\n", opt_narenas ?
opt_narenas : malloc_narenas_default());
if (opt_abort) {
abort();
}
} else {
opt_narenas = 1;
if (ncpus > MALLOCX_ARENA_MAX) {
malloc_printf("<jemalloc>: narenas w/ percpu"
"arena beyond limit (%d)\n", ncpus);
if (opt_abort) {
abort();
}
return true;
}
if ((percpu_arena_mode == per_phycpu_arena) &&
(ncpus % 2 != 0)) {
malloc_printf("<jemalloc>: invalid "
"configuration -- per physical CPU arena "
"with odd number (%u) of CPUs (no hyper "
"threading?).\n", ncpus);
if (opt_abort)
abort();
}
unsigned n = percpu_arena_ind_limit();
if (opt_narenas < n) {
/*
* If narenas is specified with percpu_arena
* enabled, actual narenas is set as the greater
* of the two. percpu_arena_choose will be free
* to use any of the arenas based on CPU
* id. This is conservative (at a small cost)
* but ensures correctness.
*
* If for some reason the ncpus determined at
* boot is not the actual number (e.g. because
* of affinity setting from numactl), reserving
* narenas this way provides a workaround for
* percpu_arena.
*/
opt_narenas = n;
}
}
}
if (opt_narenas == 0) {
opt_narenas = malloc_narenas_default();
}
assert(opt_narenas > 0);
narenas_auto = opt_narenas;
/*
* Limit the number of arenas to the indexing range of MALLOCX_ARENA().
@@ -1283,14 +1369,13 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
}
narenas_total_set(narenas_auto);
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
(MALLOCX_ARENA_MAX+1), CACHELINE);
if (arenas == NULL) {
return false;
}
static bool
malloc_init_hard_finish(void) {
if (malloc_mutex_boot())
return true;
}
/* Copy the pointer to the one arena that was already initialized. */
arena_set(0, a0);
malloc_init_state = malloc_init_initialized;
malloc_slow_flag_init();
@@ -1328,12 +1413,18 @@ malloc_init_hard(void) {
}
malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
/* Need this before prof_boot2 (for allocation). */
if (malloc_init_narenas()) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}
if (config_prof && prof_boot2(tsd)) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}
if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
if (malloc_init_hard_finish()) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true;
}

View File

@@ -621,6 +621,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",")
OPT_WRITE_CHAR_P(percpu_arena, ",")
OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
OPT_WRITE_CHAR_P(junk, ",")
OPT_WRITE_BOOL(zero, ",")

View File

@@ -357,12 +357,8 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
static void
tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
arena_t *arena;
unsigned i;
arena = arena_choose(tsd, NULL);
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
for (i = 0; i < NBINS; i++) {
tcache_bin_t *tbin = &tcache->tbins[i];
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
@@ -381,6 +377,13 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
}
}
/*
* Get arena after flushing -- when using percpu arena, the associated
* arena could change during flush.
*/
arena_t *arena = arena_choose(tsd, NULL);
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
if (config_prof && tcache->prof_accumbytes > 0 &&
arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
prof_idump(tsd_tsdn(tsd));