Improve thread-->arena assignment.

Rather than blindly assigning threads to arenas in round-robin fashion,
choose the lowest-numbered arena that currently has the smallest number
of threads assigned to it.

Add the "stats.arenas.<i>.nthreads" mallctl.
This commit is contained in:
Jason Evans 2011-03-18 13:41:33 -07:00
parent 9c43c13a35
commit 597632be18
8 changed files with 101 additions and 16 deletions

View File

@ -1642,6 +1642,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem> </para></listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term>
<mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
(<type>unsigned</type>)
<literal>r-</literal>
</term>
<listitem><para>Number of threads currently assigned to
arena.</para></listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term> <term>
<mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl> <mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl>

View File

@ -295,8 +295,18 @@ struct arena_s {
unsigned ind; unsigned ind;
/* /*
* All non-bin-related operations on this arena require that lock be * Number of threads currently assigned to this arena. This field is
* locked. * protected by arenas_lock.
*/
unsigned nthreads;
/*
* There are three classes of arena operations from a locking
* perspective:
* 1) Thread asssignment (modifies nthreads) is protected by
* arenas_lock.
* 2) Bin-related operations are protected by bin locks.
* 3) Chunk- and run-related operations are protected by this mutex.
*/ */
malloc_mutex_t lock; malloc_mutex_t lock;

View File

@ -29,6 +29,7 @@ struct ctl_node_s {
struct ctl_arena_stats_s { struct ctl_arena_stats_s {
bool initialized; bool initialized;
unsigned nthreads;
size_t pactive; size_t pactive;
size_t pdirty; size_t pdirty;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS

View File

@ -293,6 +293,7 @@ extern size_t lg_pagesize;
extern unsigned ncpus; extern unsigned ncpus;
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
extern pthread_key_t arenas_tsd;
#ifndef NO_TLS #ifndef NO_TLS
/* /*
* Map of pthread_self() --> arenas[???], used for selecting an arena to use * Map of pthread_self() --> arenas[???], used for selecting an arena to use
@ -302,9 +303,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
# define ARENA_GET() arenas_tls # define ARENA_GET() arenas_tls
# define ARENA_SET(v) do { \ # define ARENA_SET(v) do { \
arenas_tls = (v); \ arenas_tls = (v); \
pthread_setspecific(arenas_tsd, (void *)(v)); \
} while (0) } while (0)
#else #else
extern pthread_key_t arenas_tsd;
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) # define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
# define ARENA_SET(v) do { \ # define ARENA_SET(v) do { \
pthread_setspecific(arenas_tsd, (void *)(v)); \ pthread_setspecific(arenas_tsd, (void *)(v)); \

View File

@ -2175,6 +2175,7 @@ arena_new(arena_t *arena, unsigned ind)
arena_bin_t *bin; arena_bin_t *bin;
arena->ind = ind; arena->ind = ind;
arena->nthreads = 0;
if (malloc_mutex_init(&arena->lock)) if (malloc_mutex_init(&arena->lock))
return (true); return (true);

View File

@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
CTL_PROTO(stats_arenas_i_lruns_j_curruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns)
INDEX_PROTO(stats_arenas_i_lruns_j) INDEX_PROTO(stats_arenas_i_lruns_j)
#endif #endif
CTL_PROTO(stats_arenas_i_nthreads)
CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_pdirty)
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -434,6 +435,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
#endif #endif
static const ctl_node_t stats_arenas_i_node[] = { static const ctl_node_t stats_arenas_i_node[] = {
{NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
{NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)},
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)} {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -620,6 +622,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
ctl_arena_clear(astats); ctl_arena_clear(astats);
sstats->nthreads += astats->nthreads;
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
ctl_arena_stats_amerge(astats, arena); ctl_arena_stats_amerge(astats, arena);
/* Merge into sum stats as well. */ /* Merge into sum stats as well. */
@ -657,10 +660,17 @@ ctl_refresh(void)
* Clear sum stats, since they will be merged into by * Clear sum stats, since they will be merged into by
* ctl_arena_refresh(). * ctl_arena_refresh().
*/ */
ctl_stats.arenas[narenas].nthreads = 0;
ctl_arena_clear(&ctl_stats.arenas[narenas]); ctl_arena_clear(&ctl_stats.arenas[narenas]);
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
for (i = 0; i < narenas; i++) {
if (arenas[i] != NULL)
ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
else
ctl_stats.arenas[i].nthreads = 0;
}
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
for (i = 0; i < narenas; i++) { for (i = 0; i < narenas; i++) {
bool initialized = (tarenas[i] != NULL); bool initialized = (tarenas[i] != NULL);
@ -1129,6 +1139,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
if ((arena = arenas[newind]) == NULL) if ((arena = arenas[newind]) == NULL)
arena = arenas_extend(newind); arena = arenas_extend(newind);
arenas[oldind]->nthreads--;
arenas[newind]->nthreads++;
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
if (arena == NULL) { if (arena == NULL) {
ret = EAGAIN; ret = EAGAIN;
@ -1536,6 +1548,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
} }
#endif #endif
CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS

View File

@ -7,12 +7,10 @@
malloc_mutex_t arenas_lock; malloc_mutex_t arenas_lock;
arena_t **arenas; arena_t **arenas;
unsigned narenas; unsigned narenas;
static unsigned next_arena;
pthread_key_t arenas_tsd;
#ifndef NO_TLS #ifndef NO_TLS
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
#else
pthread_key_t arenas_tsd;
#endif #endif
#ifdef JEMALLOC_STATS #ifdef JEMALLOC_STATS
@ -70,6 +68,7 @@ size_t opt_narenas = 0;
static void wrtmessage(void *cbopaque, const char *s); static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void); static void stats_print_atexit(void);
static unsigned malloc_ncpus(void); static unsigned malloc_ncpus(void);
static void arenas_cleanup(void *arg);
#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void thread_allocated_cleanup(void *arg); static void thread_allocated_cleanup(void *arg);
#endif #endif
@ -147,13 +146,53 @@ choose_arena_hard(void)
arena_t *ret; arena_t *ret;
if (narenas > 1) { if (narenas > 1) {
unsigned i, choose, first_null;
choose = 0;
first_null = narenas;
malloc_mutex_lock(&arenas_lock); malloc_mutex_lock(&arenas_lock);
if ((ret = arenas[next_arena]) == NULL) assert(arenas[i] != NULL);
ret = arenas_extend(next_arena); for (i = 1; i < narenas; i++) {
next_arena = (next_arena + 1) % narenas; if (arenas[i] != NULL) {
/*
* Choose the first arena that has the lowest
* number of threads assigned to it.
*/
if (arenas[i]->nthreads <
arenas[choose]->nthreads)
choose = i;
} else if (first_null == narenas) {
/*
* Record the index of the first uninitialized
* arena, in case all extant arenas are in use.
*
* NB: It is possible for there to be
* discontinuities in terms of initialized
* versus uninitialized arenas, due to the
* "thread.arena" mallctl.
*/
first_null = i;
}
}
if (arenas[choose] == 0 || first_null == narenas) {
/*
* Use an unloaded arena, or the least loaded arena if
* all arenas are already initialized.
*/
ret = arenas[choose];
} else {
/* Initialize a new arena. */
ret = arenas_extend(first_null);
}
ret->nthreads++;
malloc_mutex_unlock(&arenas_lock); malloc_mutex_unlock(&arenas_lock);
} else } else {
ret = arenas[0]; ret = arenas[0];
malloc_mutex_lock(&arenas_lock);
ret->nthreads++;
malloc_mutex_unlock(&arenas_lock);
}
ARENA_SET(ret); ARENA_SET(ret);
@ -259,6 +298,16 @@ malloc_ncpus(void)
return (ret); return (ret);
} }
static void
arenas_cleanup(void *arg)
{
arena_t *arena = (arena_t *)arg;
malloc_mutex_lock(&arenas_lock);
arena->nthreads--;
malloc_mutex_unlock(&arenas_lock);
}
#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) #if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void static void
thread_allocated_cleanup(void *arg) thread_allocated_cleanup(void *arg)
@ -737,6 +786,7 @@ malloc_init_hard(void)
* threaded mode. * threaded mode.
*/ */
ARENA_SET(arenas[0]); ARENA_SET(arenas[0]);
arenas[0]->nthreads++;
if (malloc_mutex_init(&arenas_lock)) if (malloc_mutex_init(&arenas_lock))
return (true); return (true);
@ -779,14 +829,10 @@ malloc_init_hard(void)
malloc_write(")\n"); malloc_write(")\n");
} }
next_arena = (narenas > 0) ? 1 : 0; if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
#ifdef NO_TLS
if (pthread_key_create(&arenas_tsd, NULL) != 0) {
malloc_mutex_unlock(&init_lock); malloc_mutex_unlock(&init_lock);
return (true); return (true);
} }
#endif
/* Allocate and initialize arenas. */ /* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
@ -819,7 +865,6 @@ malloc_init_hard(void)
return (false); return (false);
} }
#ifdef JEMALLOC_ZONE #ifdef JEMALLOC_ZONE
JEMALLOC_ATTR(constructor) JEMALLOC_ATTR(constructor)
void void

View File

@ -319,6 +319,7 @@ static void
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
unsigned i) unsigned i)
{ {
unsigned nthreads;
size_t pagesize, pactive, pdirty, mapped; size_t pagesize, pactive, pdirty, mapped;
uint64_t npurge, nmadvise, purged; uint64_t npurge, nmadvise, purged;
size_t small_allocated; size_t small_allocated;
@ -328,6 +329,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("arenas.pagesize", &pagesize, size_t); CTL_GET("arenas.pagesize", &pagesize, size_t);
CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
malloc_cprintf(write_cb, cbopaque,
"assigned threads: %u\n", nthreads);
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);