Implement per-CPU arena.
The new feature, opt.percpu_arena, determines thread-arena association dynamically based CPU id. Three modes are supported: "percpu", "phycpu" and disabled. "percpu" uses the current core id (with help from sched_getcpu()) directly as the arena index, while "phycpu" will assign threads on the same physical CPU to the same arena. In other words, "percpu" means # of arenas == # of CPUs, while "phycpu" has # of arenas == 1/2 * (# of CPUs). Note that no runtime check on whether hyper threading is enabled is added yet. When enabled, threads will be migrated between arenas when a CPU change is detected. In the current design, to reduce overhead from reading CPU id, each arena tracks the thread accessed most recently. When a new thread comes in, we will read CPU id and update arena if necessary.
This commit is contained in:
parent
8721e19c04
commit
ec532e2c5c
@ -1598,6 +1598,15 @@ if test "x$have_secure_getenv" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
|
||||
fi
|
||||
|
||||
dnl Check if the GNU-specific sched_getcpu function exists.
|
||||
AC_CHECK_FUNC([sched_getcpu],
|
||||
[have_sched_getcpu="1"],
|
||||
[have_sched_getcpu="0"]
|
||||
)
|
||||
if test "x$have_sched_getcpu" = "x1" ; then
|
||||
AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
|
||||
fi
|
||||
|
||||
dnl Check if the Solaris/BSD issetugid function exists.
|
||||
AC_CHECK_FUNC([issetugid],
|
||||
[have_issetugid="1"],
|
||||
|
@ -13,6 +13,10 @@ extern ssize_t opt_decay_time;
|
||||
|
||||
extern const arena_bin_info_t arena_bin_info[NBINS];
|
||||
|
||||
extern percpu_arena_mode_t percpu_arena_mode;
|
||||
extern const char *opt_percpu_arena;
|
||||
extern const char *percpu_arena_mode_names[];
|
||||
|
||||
void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
|
||||
szind_t szind, uint64_t nrequests);
|
||||
void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
|
||||
|
@ -7,6 +7,7 @@ void arena_internal_add(arena_t *arena, size_t size);
|
||||
void arena_internal_sub(arena_t *arena, size_t size);
|
||||
size_t arena_internal_get(arena_t *arena);
|
||||
bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
|
||||
void percpu_arena_update(tsd_t *tsd, unsigned cpu);
|
||||
#endif /* JEMALLOC_ENABLE_INLINE */
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
|
||||
@ -42,6 +43,30 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
|
||||
return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void
|
||||
percpu_arena_update(tsd_t *tsd, unsigned cpu) {
|
||||
assert(have_percpu_arena);
|
||||
arena_t *oldarena = tsd_arena_get(tsd);
|
||||
assert(oldarena != NULL);
|
||||
unsigned oldind = arena_ind_get(oldarena);
|
||||
|
||||
if (oldind != cpu) {
|
||||
unsigned newind = cpu;
|
||||
arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
|
||||
assert(newarena != NULL);
|
||||
|
||||
/* Set new arena/tcache associations. */
|
||||
arena_migrate(tsd, oldind, newind);
|
||||
if (config_tcache) {
|
||||
tcache_t *tcache = tsd_tcache_get(tsd);
|
||||
if (tcache) {
|
||||
tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
|
||||
newarena);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
|
||||
|
@ -125,6 +125,13 @@ struct arena_s {
|
||||
*/
|
||||
unsigned nthreads[2];
|
||||
|
||||
/*
|
||||
* When percpu_arena is enabled, to amortize the cost of reading /
|
||||
* updating the current CPU id, track the most recent thread accessing
|
||||
* this arena, and only read CPU if there is a mismatch.
|
||||
*/
|
||||
tsdn_t *last_thd;
|
||||
|
||||
/* Synchronization: internal. */
|
||||
arena_stats_t stats;
|
||||
|
||||
|
@ -19,4 +19,20 @@ typedef struct arena_bin_s arena_bin_t;
|
||||
typedef struct arena_s arena_t;
|
||||
typedef struct arena_tdata_s arena_tdata_t;
|
||||
|
||||
typedef enum {
|
||||
percpu_arena_disabled = 0,
|
||||
percpu_arena = 1,
|
||||
per_phycpu_arena = 2, /* i.e. hyper threads share arena. */
|
||||
|
||||
percpu_arena_mode_limit = 3
|
||||
} percpu_arena_mode_t;
|
||||
|
||||
#ifdef JEMALLOC_PERCPU_ARENA
|
||||
#define PERCPU_ARENA_MODE_DEFAULT percpu_arena
|
||||
#define OPT_PERCPU_ARENA_DEFAULT "percpu"
|
||||
#else
|
||||
#define PERCPU_ARENA_MODE_DEFAULT percpu_arena_disabled
|
||||
#define OPT_PERCPU_ARENA_DEFAULT "disabled"
|
||||
#endif
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
|
||||
|
@ -145,6 +145,17 @@ static const bool have_thp =
|
||||
false
|
||||
#endif
|
||||
;
|
||||
#ifdef JEMALLOC_HAVE_SCHED_GETCPU
|
||||
/* Currently percpu_arena depends on sched_getcpu. */
|
||||
#define JEMALLOC_PERCPU_ARENA
|
||||
#endif
|
||||
static const bool have_percpu_arena =
|
||||
#ifdef JEMALLOC_PERCPU_ARENA
|
||||
true
|
||||
#else
|
||||
false
|
||||
#endif
|
||||
;
|
||||
|
||||
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
|
||||
#include <libkern/OSAtomic.h>
|
||||
@ -220,6 +231,9 @@ typedef unsigned pszind_t;
|
||||
/* Size class index type. */
|
||||
typedef unsigned szind_t;
|
||||
|
||||
/* Processor / core id type. */
|
||||
typedef int malloc_cpuid_t;
|
||||
|
||||
/*
|
||||
* Flags bits:
|
||||
*
|
||||
@ -455,7 +469,7 @@ extern unsigned narenas_auto;
|
||||
* Arenas that are used to service external requests. Not all elements of the
|
||||
* arenas array are necessarily used; arenas are created lazily as needed.
|
||||
*/
|
||||
extern arena_t **arenas;
|
||||
extern arena_t *arenas[];
|
||||
|
||||
/*
|
||||
* pind2sz_tab encodes the same information as could be computed by
|
||||
@ -548,6 +562,10 @@ arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
|
||||
bool refresh_if_missing);
|
||||
arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
|
||||
ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
|
||||
malloc_cpuid_t malloc_getcpu(void);
|
||||
unsigned percpu_arena_choose(void);
|
||||
unsigned percpu_arena_ind_limit(void);
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
|
||||
@ -818,32 +836,53 @@ sa2u(size_t size, size_t alignment) {
|
||||
return usize;
|
||||
}
|
||||
|
||||
/* Choose an arena based on a per-thread value. */
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
|
||||
arena_t *ret;
|
||||
JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
|
||||
malloc_getcpu(void) {
|
||||
assert(have_percpu_arena);
|
||||
#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
|
||||
return (malloc_cpuid_t)sched_getcpu();
|
||||
#else
|
||||
not_reached();
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (arena != NULL) {
|
||||
return arena;
|
||||
/* Return the chosen arena index based on current cpu. */
|
||||
JEMALLOC_ALWAYS_INLINE unsigned
|
||||
percpu_arena_choose(void) {
|
||||
unsigned arena_ind;
|
||||
assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
|
||||
|
||||
malloc_cpuid_t cpuid = malloc_getcpu();
|
||||
assert(cpuid >= 0);
|
||||
if ((percpu_arena_mode == percpu_arena) ||
|
||||
((unsigned)cpuid < ncpus / 2)) {
|
||||
arena_ind = cpuid;
|
||||
} else {
|
||||
assert(percpu_arena_mode == per_phycpu_arena);
|
||||
/* Hyper threads on the same physical CPU share arena. */
|
||||
arena_ind = cpuid - ncpus / 2;
|
||||
}
|
||||
|
||||
ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
|
||||
if (unlikely(ret == NULL)) {
|
||||
ret = arena_choose_hard(tsd, internal);
|
||||
return arena_ind;
|
||||
}
|
||||
|
||||
/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
|
||||
JEMALLOC_ALWAYS_INLINE unsigned
|
||||
percpu_arena_ind_limit(void) {
|
||||
assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
|
||||
if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
|
||||
if (ncpus % 2) {
|
||||
/* This likely means a misconfig. */
|
||||
return ncpus / 2 + 1;
|
||||
}
|
||||
return ncpus / 2;
|
||||
} else {
|
||||
return ncpus;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_choose(tsd_t *tsd, arena_t *arena) {
|
||||
return arena_choose_impl(tsd, arena, false);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_ichoose(tsd_t *tsd, arena_t *arena) {
|
||||
return arena_choose_impl(tsd, arena, true);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE arena_tdata_t *
|
||||
arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
|
||||
@ -912,6 +951,50 @@ extent_t *iealloc(tsdn_t *tsdn, const void *ptr);
|
||||
#endif
|
||||
|
||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
|
||||
/* Choose an arena based on a per-thread value. */
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
|
||||
arena_t *ret;
|
||||
|
||||
if (arena != NULL) {
|
||||
return arena;
|
||||
}
|
||||
|
||||
ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
|
||||
if (unlikely(ret == NULL)) {
|
||||
ret = arena_choose_hard(tsd, internal);
|
||||
}
|
||||
|
||||
assert(ret != NULL);
|
||||
/*
|
||||
* Note that for percpu arena, if the current arena is outside of the
|
||||
* auto percpu arena range, (i.e. thread is assigned to a manually
|
||||
* managed arena), then percpu arena is skipped.
|
||||
*/
|
||||
if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
|
||||
(arena_ind_get(ret) < percpu_arena_ind_limit()) &&
|
||||
(ret->last_thd != tsd_tsdn(tsd))) {
|
||||
unsigned ind = percpu_arena_choose();
|
||||
if (arena_ind_get(ret) != ind) {
|
||||
percpu_arena_update(tsd, ind);
|
||||
ret = tsd_arena_get(tsd);
|
||||
}
|
||||
ret->last_thd = tsd_tsdn(tsd);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_choose(tsd_t *tsd, arena_t *arena) {
|
||||
return arena_choose_impl(tsd, arena, false);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE arena_t *
|
||||
arena_ichoose(tsd_t *tsd, arena_t *arena) {
|
||||
return arena_choose_impl(tsd, arena, true);
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE extent_t *
|
||||
iealloc(tsdn_t *tsdn, const void *ptr) {
|
||||
return extent_lookup(tsdn, ptr, true);
|
||||
|
@ -308,6 +308,9 @@
|
||||
/* Adaptive mutex support in pthreads. */
|
||||
#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
|
||||
|
||||
/* GNU specific sched_getcpu support */
|
||||
#undef JEMALLOC_HAVE_SCHED_GETCPU
|
||||
|
||||
/*
|
||||
* If defined, jemalloc symbols are not exported (doesn't work when
|
||||
* JEMALLOC_PREFIX is not defined).
|
||||
|
@ -258,6 +258,7 @@ large_salloc
|
||||
lg_floor
|
||||
lg_prof_sample
|
||||
malloc_cprintf
|
||||
malloc_getcpu
|
||||
malloc_mutex_assert_not_owner
|
||||
malloc_mutex_assert_owner
|
||||
malloc_mutex_boot
|
||||
@ -330,6 +331,9 @@ pages_purge_forced
|
||||
pages_purge_lazy
|
||||
pages_trim
|
||||
pages_unmap
|
||||
percpu_arena_choose
|
||||
percpu_arena_ind_limit
|
||||
percpu_arena_update
|
||||
pind2sz
|
||||
pind2sz_compute
|
||||
pind2sz_lookup
|
||||
|
10
src/arena.c
10
src/arena.c
@ -4,6 +4,15 @@
|
||||
/******************************************************************************/
|
||||
/* Data. */
|
||||
|
||||
const char *percpu_arena_mode_names[] = {
|
||||
"disabled",
|
||||
"percpu",
|
||||
"phycpu"
|
||||
};
|
||||
|
||||
const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
|
||||
percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
|
||||
|
||||
ssize_t opt_decay_time = DECAY_TIME_DEFAULT;
|
||||
static ssize_t decay_time_default;
|
||||
|
||||
@ -1629,6 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
|
||||
}
|
||||
|
||||
arena->nthreads[0] = arena->nthreads[1] = 0;
|
||||
arena->last_thd = NULL;
|
||||
|
||||
if (config_stats) {
|
||||
if (arena_stats_init(tsdn, &arena->stats)) {
|
||||
|
18
src/ctl.c
18
src/ctl.c
@ -72,6 +72,7 @@ CTL_PROTO(config_xmalloc)
|
||||
CTL_PROTO(opt_abort)
|
||||
CTL_PROTO(opt_dss)
|
||||
CTL_PROTO(opt_narenas)
|
||||
CTL_PROTO(opt_percpu_arena)
|
||||
CTL_PROTO(opt_decay_time)
|
||||
CTL_PROTO(opt_stats_print)
|
||||
CTL_PROTO(opt_junk)
|
||||
@ -229,6 +230,7 @@ static const ctl_named_node_t opt_node[] = {
|
||||
{NAME("abort"), CTL(opt_abort)},
|
||||
{NAME("dss"), CTL(opt_dss)},
|
||||
{NAME("narenas"), CTL(opt_narenas)},
|
||||
{NAME("percpu_arena"), CTL(opt_percpu_arena)},
|
||||
{NAME("decay_time"), CTL(opt_decay_time)},
|
||||
{NAME("stats_print"), CTL(opt_stats_print)},
|
||||
{NAME("junk"), CTL(opt_junk)},
|
||||
@ -1284,6 +1286,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
|
||||
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
||||
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
|
||||
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
|
||||
CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
|
||||
CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
|
||||
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
|
||||
CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
|
||||
@ -1317,10 +1320,10 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
|
||||
if (oldarena == NULL) {
|
||||
return EAGAIN;
|
||||
}
|
||||
|
||||
newind = oldind = arena_ind_get(oldarena);
|
||||
WRITE(newind, unsigned);
|
||||
READ(oldind, unsigned);
|
||||
|
||||
if (newind != oldind) {
|
||||
arena_t *newarena;
|
||||
|
||||
@ -1330,6 +1333,19 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
|
||||
goto label_return;
|
||||
}
|
||||
|
||||
if (have_percpu_arena &&
|
||||
(percpu_arena_mode != percpu_arena_disabled)) {
|
||||
if (newind < percpu_arena_ind_limit()) {
|
||||
/*
|
||||
* If perCPU arena is enabled, thread_arena
|
||||
* control is not allowed for the auto arena
|
||||
* range.
|
||||
*/
|
||||
ret = EPERM;
|
||||
goto label_return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize arena if necessary. */
|
||||
newarena = arena_get(tsd_tsdn(tsd), newind, true);
|
||||
if (newarena == NULL) {
|
||||
|
133
src/jemalloc.c
133
src/jemalloc.c
@ -56,7 +56,8 @@ static malloc_mutex_t arenas_lock;
|
||||
* arenas. arenas[narenas_auto..narenas_total) are only used if the application
|
||||
* takes some action to create them and allocate from them.
|
||||
*/
|
||||
arena_t **arenas;
|
||||
JEMALLOC_ALIGNED(CACHELINE)
|
||||
arena_t *arenas[MALLOCX_ARENA_MAX + 1];
|
||||
static unsigned narenas_total; /* Use narenas_total_*(). */
|
||||
static arena_t *a0; /* arenas[0]; read-only after initialization. */
|
||||
unsigned narenas_auto; /* Read-only after initialization. */
|
||||
@ -543,6 +544,16 @@ arena_t *
|
||||
arena_choose_hard(tsd_t *tsd, bool internal) {
|
||||
arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
|
||||
|
||||
if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
|
||||
unsigned choose = percpu_arena_choose();
|
||||
ret = arena_get(tsd_tsdn(tsd), choose, true);
|
||||
assert(ret != NULL);
|
||||
arena_bind(tsd, arena_ind_get(ret), false);
|
||||
arena_bind(tsd, arena_ind_get(ret), true);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (narenas_auto > 1) {
|
||||
unsigned i, j, choose[2], first_null;
|
||||
|
||||
@ -1095,6 +1106,30 @@ malloc_conf_init(void) {
|
||||
"lg_tcache_max", -1,
|
||||
(sizeof(size_t) << 3) - 1)
|
||||
}
|
||||
if (strncmp("percpu_arena", k, klen) == 0) {
|
||||
int i;
|
||||
bool match = false;
|
||||
for (i = 0; i < percpu_arena_mode_limit; i++) {
|
||||
if (strncmp(percpu_arena_mode_names[i],
|
||||
v, vlen) == 0) {
|
||||
if (!have_percpu_arena) {
|
||||
malloc_conf_error(
|
||||
"No getcpu support",
|
||||
k, klen, v, vlen);
|
||||
}
|
||||
percpu_arena_mode = i;
|
||||
opt_percpu_arena =
|
||||
percpu_arena_mode_names[i];
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
malloc_conf_error("Invalid conf value",
|
||||
k, klen, v, vlen);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (config_prof) {
|
||||
CONF_HANDLE_BOOL(opt_prof, "prof", true)
|
||||
CONF_HANDLE_CHAR_P(opt_prof_prefix,
|
||||
@ -1204,8 +1239,6 @@ malloc_init_hard_a0_locked() {
|
||||
* malloc_ncpus().
|
||||
*/
|
||||
narenas_auto = 1;
|
||||
narenas_total_set(narenas_auto);
|
||||
arenas = &a0;
|
||||
memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
|
||||
/*
|
||||
* Initialize one arena here. The rest are lazily created in
|
||||
@ -1215,7 +1248,7 @@ malloc_init_hard_a0_locked() {
|
||||
== NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
a0 = arena_get(TSDN_NULL, 0, false);
|
||||
malloc_init_state = malloc_init_a0_initialized;
|
||||
|
||||
return false;
|
||||
@ -1255,23 +1288,76 @@ malloc_init_hard_recursible(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
malloc_init_hard_finish(tsdn_t *tsdn) {
|
||||
if (malloc_mutex_boot()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (opt_narenas == 0) {
|
||||
static unsigned
|
||||
malloc_narenas_default(void) {
|
||||
assert(ncpus > 0);
|
||||
/*
|
||||
* For SMP systems, create more than one arena per CPU by
|
||||
* default.
|
||||
*/
|
||||
if (ncpus > 1) {
|
||||
opt_narenas = ncpus << 2;
|
||||
return ncpus << 2;
|
||||
} else {
|
||||
opt_narenas = 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
malloc_init_narenas(void) {
|
||||
assert(ncpus > 0);
|
||||
|
||||
if (percpu_arena_mode != percpu_arena_disabled) {
|
||||
if (!have_percpu_arena || malloc_getcpu() < 0) {
|
||||
percpu_arena_mode = percpu_arena_disabled;
|
||||
malloc_printf("<jemalloc>: perCPU arena getcpu() not "
|
||||
"available. Setting narenas to %u.\n", opt_narenas ?
|
||||
opt_narenas : malloc_narenas_default());
|
||||
if (opt_abort) {
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
if (ncpus > MALLOCX_ARENA_MAX) {
|
||||
malloc_printf("<jemalloc>: narenas w/ percpu"
|
||||
"arena beyond limit (%d)\n", ncpus);
|
||||
if (opt_abort) {
|
||||
abort();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if ((percpu_arena_mode == per_phycpu_arena) &&
|
||||
(ncpus % 2 != 0)) {
|
||||
malloc_printf("<jemalloc>: invalid "
|
||||
"configuration -- per physical CPU arena "
|
||||
"with odd number (%u) of CPUs (no hyper "
|
||||
"threading?).\n", ncpus);
|
||||
if (opt_abort)
|
||||
abort();
|
||||
}
|
||||
unsigned n = percpu_arena_ind_limit();
|
||||
if (opt_narenas < n) {
|
||||
/*
|
||||
* If narenas is specified with percpu_arena
|
||||
* enabled, actual narenas is set as the greater
|
||||
* of the two. percpu_arena_choose will be free
|
||||
* to use any of the arenas based on CPU
|
||||
* id. This is conservative (at a small cost)
|
||||
* but ensures correctness.
|
||||
*
|
||||
* If for some reason the ncpus determined at
|
||||
* boot is not the actual number (e.g. because
|
||||
* of affinity setting from numactl), reserving
|
||||
* narenas this way provides a workaround for
|
||||
* percpu_arena.
|
||||
*/
|
||||
opt_narenas = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (opt_narenas == 0) {
|
||||
opt_narenas = malloc_narenas_default();
|
||||
}
|
||||
assert(opt_narenas > 0);
|
||||
|
||||
narenas_auto = opt_narenas;
|
||||
/*
|
||||
* Limit the number of arenas to the indexing range of MALLOCX_ARENA().
|
||||
@ -1283,14 +1369,13 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
|
||||
}
|
||||
narenas_total_set(narenas_auto);
|
||||
|
||||
/* Allocate and initialize arenas. */
|
||||
arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
|
||||
(MALLOCX_ARENA_MAX+1), CACHELINE);
|
||||
if (arenas == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
malloc_init_hard_finish(void) {
|
||||
if (malloc_mutex_boot())
|
||||
return true;
|
||||
}
|
||||
/* Copy the pointer to the one arena that was already initialized. */
|
||||
arena_set(0, a0);
|
||||
|
||||
malloc_init_state = malloc_init_initialized;
|
||||
malloc_slow_flag_init();
|
||||
@ -1328,12 +1413,18 @@ malloc_init_hard(void) {
|
||||
}
|
||||
malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
|
||||
|
||||
/* Need this before prof_boot2 (for allocation). */
|
||||
if (malloc_init_narenas()) {
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (config_prof && prof_boot2(tsd)) {
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
|
||||
if (malloc_init_hard_finish()) {
|
||||
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
|
||||
return true;
|
||||
}
|
||||
|
@ -621,6 +621,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
OPT_WRITE_BOOL(abort, ",")
|
||||
OPT_WRITE_CHAR_P(dss, ",")
|
||||
OPT_WRITE_UNSIGNED(narenas, ",")
|
||||
OPT_WRITE_CHAR_P(percpu_arena, ",")
|
||||
OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
|
||||
OPT_WRITE_CHAR_P(junk, ",")
|
||||
OPT_WRITE_BOOL(zero, ",")
|
||||
|
11
src/tcache.c
11
src/tcache.c
@ -357,12 +357,8 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
|
||||
|
||||
static void
|
||||
tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
|
||||
arena_t *arena;
|
||||
unsigned i;
|
||||
|
||||
arena = arena_choose(tsd, NULL);
|
||||
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
|
||||
|
||||
for (i = 0; i < NBINS; i++) {
|
||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
||||
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
||||
@ -381,6 +377,13 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get arena after flushing -- when using percpu arena, the associated
|
||||
* arena could change during flush.
|
||||
*/
|
||||
arena_t *arena = arena_choose(tsd, NULL);
|
||||
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
|
||||
|
||||
if (config_prof && tcache->prof_accumbytes > 0 &&
|
||||
arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
|
||||
prof_idump(tsd_tsdn(tsd));
|
||||
|
@ -37,10 +37,16 @@ thd_start(void *arg) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
mallctl_failure(int err) {
|
||||
char buf[BUFERROR_BUF];
|
||||
|
||||
buferror(err, buf, sizeof(buf));
|
||||
test_fail("Error in mallctl(): %s", buf);
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_thread_arena) {
|
||||
void *p;
|
||||
unsigned arena_ind;
|
||||
size_t size;
|
||||
int err;
|
||||
thd_t thds[NTHREADS];
|
||||
unsigned i;
|
||||
@ -48,13 +54,15 @@ TEST_BEGIN(test_thread_arena) {
|
||||
p = malloc(1);
|
||||
assert_ptr_not_null(p, "Error in malloc()");
|
||||
|
||||
size = sizeof(arena_ind);
|
||||
if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
|
||||
0))) {
|
||||
char buf[BUFERROR_BUF];
|
||||
unsigned arena_ind, old_arena_ind;
|
||||
size_t sz = sizeof(unsigned);
|
||||
assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
|
||||
0, "Arena creation failure");
|
||||
|
||||
buferror(err, buf, sizeof(buf));
|
||||
test_fail("Error in mallctl(): %s", buf);
|
||||
size_t size = sizeof(arena_ind);
|
||||
if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
|
||||
(void *)&arena_ind, sizeof(arena_ind))) != 0) {
|
||||
mallctl_failure(err);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTHREADS; i++) {
|
||||
@ -67,6 +75,7 @@ TEST_BEGIN(test_thread_arena) {
|
||||
thd_join(thds[i], (void *)&join_ret);
|
||||
assert_zd_eq(join_ret, 0, "Unexpected thread join error");
|
||||
}
|
||||
free(p);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
@ -160,6 +160,7 @@ TEST_BEGIN(test_mallctl_opt) {
|
||||
TEST_MALLCTL_OPT(bool, abort, always);
|
||||
TEST_MALLCTL_OPT(const char *, dss, always);
|
||||
TEST_MALLCTL_OPT(unsigned, narenas, always);
|
||||
TEST_MALLCTL_OPT(const char *, percpu_arena, always);
|
||||
TEST_MALLCTL_OPT(ssize_t, decay_time, always);
|
||||
TEST_MALLCTL_OPT(bool, stats_print, always);
|
||||
TEST_MALLCTL_OPT(const char *, junk, fill);
|
||||
@ -327,20 +328,38 @@ TEST_BEGIN(test_tcache) {
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_thread_arena) {
|
||||
unsigned arena_old, arena_new, narenas;
|
||||
size_t sz = sizeof(unsigned);
|
||||
unsigned old_arena_ind, new_arena_ind, narenas;
|
||||
const char *opt_percpu_arena;
|
||||
|
||||
size_t sz = sizeof(opt_percpu_arena);
|
||||
assert_d_eq(mallctl("opt.percpu_arena", &opt_percpu_arena, &sz, NULL,
|
||||
0), 0, "Unexpected mallctl() failure");
|
||||
|
||||
sz = sizeof(unsigned);
|
||||
assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
|
||||
0, "Unexpected mallctl() failure");
|
||||
assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
|
||||
arena_new = narenas - 1;
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
|
||||
(void *)&arena_new, sizeof(unsigned)), 0,
|
||||
|
||||
if (strcmp(opt_percpu_arena, "disabled") == 0) {
|
||||
new_arena_ind = narenas - 1;
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
|
||||
(void *)&new_arena_ind, sizeof(unsigned)), 0,
|
||||
"Unexpected mallctl() failure");
|
||||
arena_new = 0;
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
|
||||
(void *)&arena_new, sizeof(unsigned)), 0,
|
||||
new_arena_ind = 0;
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
|
||||
(void *)&new_arena_ind, sizeof(unsigned)), 0,
|
||||
"Unexpected mallctl() failure");
|
||||
} else {
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
|
||||
NULL, 0), 0, "Unexpected mallctl() failure");
|
||||
new_arena_ind = percpu_arena_ind_limit() - 1;
|
||||
if (old_arena_ind != new_arena_ind) {
|
||||
assert_d_eq(mallctl("thread.arena",
|
||||
(void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
|
||||
sizeof(unsigned)), EPERM, "thread.arena ctl "
|
||||
"should not be allowed with percpu arena");
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_END
|
||||
|
||||
|
@ -33,7 +33,7 @@ TEST_BEGIN(test_stats_large) {
|
||||
size_t sz;
|
||||
int expected = config_stats ? 0 : ENOENT;
|
||||
|
||||
p = mallocx(SMALL_MAXCLASS+1, 0);
|
||||
p = mallocx(SMALL_MAXCLASS+1, MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(p, "Unexpected mallocx() failure");
|
||||
|
||||
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
|
||||
@ -66,7 +66,6 @@ TEST_BEGIN(test_stats_large) {
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_stats_arenas_summary) {
|
||||
unsigned arena;
|
||||
void *little, *large;
|
||||
uint64_t epoch;
|
||||
size_t sz;
|
||||
@ -74,13 +73,9 @@ TEST_BEGIN(test_stats_arenas_summary) {
|
||||
size_t mapped;
|
||||
uint64_t npurge, nmadvise, purged;
|
||||
|
||||
arena = 0;
|
||||
assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
|
||||
sizeof(arena)), 0, "Unexpected mallctl() failure");
|
||||
|
||||
little = mallocx(SMALL_MAXCLASS, 0);
|
||||
little = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(little, "Unexpected mallocx() failure");
|
||||
large = mallocx((1U << LG_LARGE_MINCLASS), 0);
|
||||
large = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(large, "Unexpected mallocx() failure");
|
||||
|
||||
dallocx(little, 0);
|
||||
@ -128,7 +123,6 @@ no_lazy_lock(void) {
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_stats_arenas_small) {
|
||||
unsigned arena;
|
||||
void *p;
|
||||
size_t sz, allocated;
|
||||
uint64_t epoch, nmalloc, ndalloc, nrequests;
|
||||
@ -136,11 +130,7 @@ TEST_BEGIN(test_stats_arenas_small) {
|
||||
|
||||
no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
|
||||
|
||||
arena = 0;
|
||||
assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
|
||||
sizeof(arena)), 0, "Unexpected mallctl() failure");
|
||||
|
||||
p = mallocx(SMALL_MAXCLASS, 0);
|
||||
p = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(p, "Unexpected mallocx() failure");
|
||||
|
||||
assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
|
||||
@ -178,17 +168,12 @@ TEST_BEGIN(test_stats_arenas_small) {
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_stats_arenas_large) {
|
||||
unsigned arena;
|
||||
void *p;
|
||||
size_t sz, allocated;
|
||||
uint64_t epoch, nmalloc, ndalloc;
|
||||
int expected = config_stats ? 0 : ENOENT;
|
||||
|
||||
arena = 0;
|
||||
assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
|
||||
sizeof(arena)), 0, "Unexpected mallctl() failure");
|
||||
|
||||
p = mallocx((1U << LG_LARGE_MINCLASS), 0);
|
||||
p = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(p, "Unexpected mallocx() failure");
|
||||
|
||||
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
|
||||
@ -217,20 +202,29 @@ TEST_BEGIN(test_stats_arenas_large) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
static void
|
||||
gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
|
||||
sprintf(cmd, "stats.arenas.%u.bins.0.%s", arena_ind, name);
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_stats_arenas_bins) {
|
||||
unsigned arena;
|
||||
void *p;
|
||||
size_t sz, curslabs, curregs;
|
||||
uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
|
||||
uint64_t nslabs, nreslabs;
|
||||
int expected = config_stats ? 0 : ENOENT;
|
||||
|
||||
arena = 0;
|
||||
assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
|
||||
sizeof(arena)), 0, "Unexpected mallctl() failure");
|
||||
unsigned arena_ind, old_arena_ind;
|
||||
sz = sizeof(unsigned);
|
||||
assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
|
||||
0, "Arena creation failure");
|
||||
sz = sizeof(arena_ind);
|
||||
assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
|
||||
(void *)&arena_ind, sizeof(arena_ind)), 0,
|
||||
"Unexpected mallctl() failure");
|
||||
|
||||
p = mallocx(arena_bin_info[0].reg_size, 0);
|
||||
assert_ptr_not_null(p, "Unexpected mallocx() failure");
|
||||
p = malloc(arena_bin_info[0].reg_size);
|
||||
assert_ptr_not_null(p, "Unexpected malloc() failure");
|
||||
|
||||
assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
|
||||
config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
|
||||
@ -238,33 +232,40 @@ TEST_BEGIN(test_stats_arenas_bins) {
|
||||
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
|
||||
0, "Unexpected mallctl() failure");
|
||||
|
||||
char cmd[128];
|
||||
sz = sizeof(uint64_t);
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests",
|
||||
(void *)&nrequests, &sz, NULL, 0), expected,
|
||||
gen_mallctl_str(cmd, "nmalloc", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "ndalloc", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "nrequests", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
sz = sizeof(size_t);
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "curregs", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
|
||||
sz = sizeof(uint64_t);
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills,
|
||||
&sz, NULL, 0), config_tcache ? expected : ENOENT,
|
||||
"Unexpected mallctl() result");
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes,
|
||||
&sz, NULL, 0), config_tcache ? expected : ENOENT,
|
||||
"Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "nfills", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0),
|
||||
config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "nflushes", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0),
|
||||
config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
|
||||
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nslabs", (void *)&nslabs,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.nreslabs", (void *)&nreslabs,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "nslabs", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "nreslabs", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
sz = sizeof(size_t);
|
||||
assert_d_eq(mallctl("stats.arenas.0.bins.0.curslabs", (void *)&curslabs,
|
||||
&sz, NULL, 0), expected, "Unexpected mallctl() result");
|
||||
gen_mallctl_str(cmd, "curslabs", arena_ind);
|
||||
assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
|
||||
"Unexpected mallctl() result");
|
||||
|
||||
if (config_stats) {
|
||||
assert_u64_gt(nmalloc, 0,
|
||||
@ -292,21 +293,16 @@ TEST_BEGIN(test_stats_arenas_bins) {
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_stats_arenas_lextents) {
|
||||
unsigned arena;
|
||||
void *p;
|
||||
uint64_t epoch, nmalloc, ndalloc;
|
||||
size_t curlextents, sz, hsize;
|
||||
int expected = config_stats ? 0 : ENOENT;
|
||||
|
||||
arena = 0;
|
||||
assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
|
||||
sizeof(arena)), 0, "Unexpected mallctl() failure");
|
||||
|
||||
sz = sizeof(size_t);
|
||||
assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
|
||||
0), 0, "Unexpected mallctl() failure");
|
||||
|
||||
p = mallocx(hsize, 0);
|
||||
p = mallocx(hsize, MALLOCX_ARENA(0));
|
||||
assert_ptr_not_null(p, "Unexpected mallocx() failure");
|
||||
|
||||
assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
|
||||
|
Loading…
Reference in New Issue
Block a user