Disable percpu arena in case of non deterministic CPU count

Determinitic number of CPUs is important for percpu arena to work
correctly, since it uses cpu index - sched_getcpu(), and if it will
greater then number of CPUs bad thing will happen, or assertion will be
failed in debug build:

    <jemalloc>: ../contrib/jemalloc/src/jemalloc.c:321: Failed assertion: "ind <= narenas_total_get()"
    Aborted (core dumped)

Number of CPUs can be obtained from the following places:
- sched_getaffinity()
- sysconf(_SC_NPROCESSORS_ONLN)
- sysconf(_SC_NPROCESSORS_CONF)

For the sched_getaffinity() you may simply use taskset(1) to run program
on a different cpu, and in case it will be not first, percpu will work
incorrectly, i.e.:

    $ taskset --cpu-list $(( $(getconf _NPROCESSORS_ONLN)-1 )) <your_program>

_SC_NPROCESSORS_ONLN uses /sys/devices/system/cpu/online, LXD/LXC
virtualize /sys/devices/system/cpu/online file [1], and so when you run
container with limited limits.cpus it will bind randomly selected CPU to
it

  [1]: https://github.com/lxc/lxcfs/issues/301

_SC_NPROCESSORS_CONF uses /sys/devices/system/cpu/cpu*, and AFAIK nobody
playing with dentries there.

So if all three of these are equal, percpu arenas should work correctly.

And a small note regardless _SC_NPROCESSORS_ONLN/_SC_NPROCESSORS_CONF,
musl uses sched_getaffinity() for both. So this will also increase the
entropy.

Also note, that you can check is percpu arena really applied using
abort_conf:true.

Refs: https://github.com/jemalloc/jemalloc/pull/1939
Refs: https://github.com/ClickHouse/ClickHouse/issues/32806

v2: move malloc_cpu_count_is_deterministic() into
    malloc_init_hard_recursible() since _SC_NPROCESSORS_CONF does
    allocations for readdir()
v3:
- mark cpu_count_is_deterministic static
- check only if percpu arena is enabled
- check narenas
This commit is contained in:
Azat Khuzhin 2021-12-17 21:00:21 +03:00 committed by Alexander Lapenkov
parent bb5052ce90
commit cafe9a3158

View File

@ -148,6 +148,8 @@ unsigned opt_narenas = 0;
fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
unsigned ncpus;
/* ncpus is determinstinc, see malloc_cpu_count_is_deterministic() */
static int cpu_count_is_deterministic = -1;
/* Protects arenas initialization. */
malloc_mutex_t arenas_lock;
@ -741,6 +743,42 @@ malloc_ncpus(void) {
return ((result == -1) ? 1 : (unsigned)result);
}
/*
* Ensure that number of CPUs is determistinc, i.e. it is the same based on:
* - sched_getaffinity()
* - _SC_NPROCESSORS_ONLN
* - _SC_NPROCESSORS_CONF
* Since otherwise tricky things is possible with percpu arenas in use.
*/
static bool
malloc_cpu_count_is_deterministic()
{
#ifdef _WIN32
return true;
#else
long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
if (cpu_onln != cpu_conf)
return false;
# if defined(CPU_COUNT)
# if defined(__FreeBSD__)
cpuset_t set;
# else
cpu_set_t set;
# endif /* __FreeBSD__ */
# if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
sched_getaffinity(0, sizeof(set), &set);
# else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
# endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
long cpu_affinity = CPU_COUNT(&set);
if (cpu_affinity != cpu_conf)
return false;
# endif /* CPU_COUNT */
return true;
#endif
}
static void
init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
size_t opts_len = strlen(dest);
@ -1833,6 +1871,7 @@ malloc_init_hard_recursible(void) {
malloc_init_state = malloc_init_recursible;
ncpus = malloc_ncpus();
cpu_count_is_deterministic = malloc_cpu_count_is_deterministic();
#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
&& !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
@ -1892,7 +1931,22 @@ malloc_init_narenas(void) {
assert(ncpus > 0);
if (opt_percpu_arena != percpu_arena_disabled) {
if (!have_percpu_arena || malloc_getcpu() < 0) {
if (!cpu_count_is_deterministic) {
if (opt_narenas) {
malloc_write("<jemalloc>: Number of CPUs is not deterministic, "
"but narenas is set. Hope you not what you are doing and "
"you have set narenas to largest possible CPU ID.\n");
if (opt_abort) {
abort();
}
} else {
opt_percpu_arena = percpu_arena_disabled;
if (opt_abort_conf) {
malloc_write("<jemalloc>: Number of CPUs is not deterministic\n");
malloc_abort_invalid_conf();
}
}
} else if (!have_percpu_arena || malloc_getcpu() < 0) {
opt_percpu_arena = percpu_arena_disabled;
malloc_printf("<jemalloc>: perCPU arena getcpu() not "
"available. Setting narenas to %u.\n", opt_narenas ?