Improve thread-->arena assignment.
Rather than blindly assigning threads to arenas in round-robin fashion, choose the lowest-numbered arena that currently has the smallest number of threads assigned to it. Add the "stats.arenas.<i>.nthreads" mallctl.
This commit is contained in:
parent
9c43c13a35
commit
597632be18
@ -1642,6 +1642,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>
|
||||
<mallctl>stats.arenas.<i>.nthreads</mallctl>
|
||||
(<type>unsigned</type>)
|
||||
<literal>r-</literal>
|
||||
</term>
|
||||
<listitem><para>Number of threads currently assigned to
|
||||
arena.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term>
|
||||
<mallctl>stats.arenas.<i>.pactive</mallctl>
|
||||
|
@ -295,8 +295,18 @@ struct arena_s {
|
||||
unsigned ind;
|
||||
|
||||
/*
|
||||
* All non-bin-related operations on this arena require that lock be
|
||||
* locked.
|
||||
* Number of threads currently assigned to this arena. This field is
|
||||
* protected by arenas_lock.
|
||||
*/
|
||||
unsigned nthreads;
|
||||
|
||||
/*
|
||||
* There are three classes of arena operations from a locking
|
||||
* perspective:
|
||||
* 1) Thread asssignment (modifies nthreads) is protected by
|
||||
* arenas_lock.
|
||||
* 2) Bin-related operations are protected by bin locks.
|
||||
* 3) Chunk- and run-related operations are protected by this mutex.
|
||||
*/
|
||||
malloc_mutex_t lock;
|
||||
|
||||
|
@ -29,6 +29,7 @@ struct ctl_node_s {
|
||||
|
||||
struct ctl_arena_stats_s {
|
||||
bool initialized;
|
||||
unsigned nthreads;
|
||||
size_t pactive;
|
||||
size_t pdirty;
|
||||
#ifdef JEMALLOC_STATS
|
||||
|
@ -293,6 +293,7 @@ extern size_t lg_pagesize;
|
||||
extern unsigned ncpus;
|
||||
|
||||
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
|
||||
extern pthread_key_t arenas_tsd;
|
||||
#ifndef NO_TLS
|
||||
/*
|
||||
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
|
||||
@ -302,9 +303,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
# define ARENA_GET() arenas_tls
|
||||
# define ARENA_SET(v) do { \
|
||||
arenas_tls = (v); \
|
||||
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
||||
} while (0)
|
||||
#else
|
||||
extern pthread_key_t arenas_tsd;
|
||||
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
|
||||
# define ARENA_SET(v) do { \
|
||||
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
||||
|
@ -2175,6 +2175,7 @@ arena_new(arena_t *arena, unsigned ind)
|
||||
arena_bin_t *bin;
|
||||
|
||||
arena->ind = ind;
|
||||
arena->nthreads = 0;
|
||||
|
||||
if (malloc_mutex_init(&arena->lock))
|
||||
return (true);
|
||||
|
@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
|
||||
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
|
||||
INDEX_PROTO(stats_arenas_i_lruns_j)
|
||||
#endif
|
||||
CTL_PROTO(stats_arenas_i_nthreads)
|
||||
CTL_PROTO(stats_arenas_i_pactive)
|
||||
CTL_PROTO(stats_arenas_i_pdirty)
|
||||
#ifdef JEMALLOC_STATS
|
||||
@ -434,6 +435,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
|
||||
#endif
|
||||
|
||||
static const ctl_node_t stats_arenas_i_node[] = {
|
||||
{NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
|
||||
{NAME("pactive"), CTL(stats_arenas_i_pactive)},
|
||||
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
|
||||
#ifdef JEMALLOC_STATS
|
||||
@ -620,6 +622,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
|
||||
|
||||
ctl_arena_clear(astats);
|
||||
|
||||
sstats->nthreads += astats->nthreads;
|
||||
#ifdef JEMALLOC_STATS
|
||||
ctl_arena_stats_amerge(astats, arena);
|
||||
/* Merge into sum stats as well. */
|
||||
@ -657,10 +660,17 @@ ctl_refresh(void)
|
||||
* Clear sum stats, since they will be merged into by
|
||||
* ctl_arena_refresh().
|
||||
*/
|
||||
ctl_stats.arenas[narenas].nthreads = 0;
|
||||
ctl_arena_clear(&ctl_stats.arenas[narenas]);
|
||||
|
||||
malloc_mutex_lock(&arenas_lock);
|
||||
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
|
||||
for (i = 0; i < narenas; i++) {
|
||||
if (arenas[i] != NULL)
|
||||
ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
|
||||
else
|
||||
ctl_stats.arenas[i].nthreads = 0;
|
||||
}
|
||||
malloc_mutex_unlock(&arenas_lock);
|
||||
for (i = 0; i < narenas; i++) {
|
||||
bool initialized = (tarenas[i] != NULL);
|
||||
@ -1129,6 +1139,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
|
||||
malloc_mutex_lock(&arenas_lock);
|
||||
if ((arena = arenas[newind]) == NULL)
|
||||
arena = arenas_extend(newind);
|
||||
arenas[oldind]->nthreads--;
|
||||
arenas[newind]->nthreads++;
|
||||
malloc_mutex_unlock(&arenas_lock);
|
||||
if (arena == NULL) {
|
||||
ret = EAGAIN;
|
||||
@ -1536,6 +1548,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
|
||||
}
|
||||
|
||||
#endif
|
||||
CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
|
||||
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
|
||||
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
|
||||
#ifdef JEMALLOC_STATS
|
||||
|
@ -7,12 +7,10 @@
|
||||
malloc_mutex_t arenas_lock;
|
||||
arena_t **arenas;
|
||||
unsigned narenas;
|
||||
static unsigned next_arena;
|
||||
|
||||
pthread_key_t arenas_tsd;
|
||||
#ifndef NO_TLS
|
||||
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
#else
|
||||
pthread_key_t arenas_tsd;
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_STATS
|
||||
@ -70,6 +68,7 @@ size_t opt_narenas = 0;
|
||||
static void wrtmessage(void *cbopaque, const char *s);
|
||||
static void stats_print_atexit(void);
|
||||
static unsigned malloc_ncpus(void);
|
||||
static void arenas_cleanup(void *arg);
|
||||
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
||||
static void thread_allocated_cleanup(void *arg);
|
||||
#endif
|
||||
@ -147,13 +146,53 @@ choose_arena_hard(void)
|
||||
arena_t *ret;
|
||||
|
||||
if (narenas > 1) {
|
||||
unsigned i, choose, first_null;
|
||||
|
||||
choose = 0;
|
||||
first_null = narenas;
|
||||
malloc_mutex_lock(&arenas_lock);
|
||||
if ((ret = arenas[next_arena]) == NULL)
|
||||
ret = arenas_extend(next_arena);
|
||||
next_arena = (next_arena + 1) % narenas;
|
||||
assert(arenas[i] != NULL);
|
||||
for (i = 1; i < narenas; i++) {
|
||||
if (arenas[i] != NULL) {
|
||||
/*
|
||||
* Choose the first arena that has the lowest
|
||||
* number of threads assigned to it.
|
||||
*/
|
||||
if (arenas[i]->nthreads <
|
||||
arenas[choose]->nthreads)
|
||||
choose = i;
|
||||
} else if (first_null == narenas) {
|
||||
/*
|
||||
* Record the index of the first uninitialized
|
||||
* arena, in case all extant arenas are in use.
|
||||
*
|
||||
* NB: It is possible for there to be
|
||||
* discontinuities in terms of initialized
|
||||
* versus uninitialized arenas, due to the
|
||||
* "thread.arena" mallctl.
|
||||
*/
|
||||
first_null = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (arenas[choose] == 0 || first_null == narenas) {
|
||||
/*
|
||||
* Use an unloaded arena, or the least loaded arena if
|
||||
* all arenas are already initialized.
|
||||
*/
|
||||
ret = arenas[choose];
|
||||
} else {
|
||||
/* Initialize a new arena. */
|
||||
ret = arenas_extend(first_null);
|
||||
}
|
||||
ret->nthreads++;
|
||||
malloc_mutex_unlock(&arenas_lock);
|
||||
} else
|
||||
} else {
|
||||
ret = arenas[0];
|
||||
malloc_mutex_lock(&arenas_lock);
|
||||
ret->nthreads++;
|
||||
malloc_mutex_unlock(&arenas_lock);
|
||||
}
|
||||
|
||||
ARENA_SET(ret);
|
||||
|
||||
@ -259,6 +298,16 @@ malloc_ncpus(void)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
arenas_cleanup(void *arg)
|
||||
{
|
||||
arena_t *arena = (arena_t *)arg;
|
||||
|
||||
malloc_mutex_lock(&arenas_lock);
|
||||
arena->nthreads--;
|
||||
malloc_mutex_unlock(&arenas_lock);
|
||||
}
|
||||
|
||||
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
||||
static void
|
||||
thread_allocated_cleanup(void *arg)
|
||||
@ -737,6 +786,7 @@ malloc_init_hard(void)
|
||||
* threaded mode.
|
||||
*/
|
||||
ARENA_SET(arenas[0]);
|
||||
arenas[0]->nthreads++;
|
||||
|
||||
if (malloc_mutex_init(&arenas_lock))
|
||||
return (true);
|
||||
@ -779,14 +829,10 @@ malloc_init_hard(void)
|
||||
malloc_write(")\n");
|
||||
}
|
||||
|
||||
next_arena = (narenas > 0) ? 1 : 0;
|
||||
|
||||
#ifdef NO_TLS
|
||||
if (pthread_key_create(&arenas_tsd, NULL) != 0) {
|
||||
if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
|
||||
malloc_mutex_unlock(&init_lock);
|
||||
return (true);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Allocate and initialize arenas. */
|
||||
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
|
||||
@ -819,7 +865,6 @@ malloc_init_hard(void)
|
||||
return (false);
|
||||
}
|
||||
|
||||
|
||||
#ifdef JEMALLOC_ZONE
|
||||
JEMALLOC_ATTR(constructor)
|
||||
void
|
||||
|
@ -319,6 +319,7 @@ static void
|
||||
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
unsigned i)
|
||||
{
|
||||
unsigned nthreads;
|
||||
size_t pagesize, pactive, pdirty, mapped;
|
||||
uint64_t npurge, nmadvise, purged;
|
||||
size_t small_allocated;
|
||||
@ -328,6 +329,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
|
||||
CTL_GET("arenas.pagesize", &pagesize, size_t);
|
||||
|
||||
CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
|
||||
malloc_cprintf(write_cb, cbopaque,
|
||||
"assigned threads: %u\n", nthreads);
|
||||
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
|
||||
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
|
||||
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
|
||||
|
Loading…
Reference in New Issue
Block a user