Improve thread-->arena assignment.
Rather than blindly assigning threads to arenas in round-robin fashion, choose the lowest-numbered arena that currently has the smallest number of threads assigned to it. Add the "stats.arenas.<i>.nthreads" mallctl.
This commit is contained in:
parent
9c43c13a35
commit
597632be18
@ -1642,6 +1642,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
|
|||||||
</para></listitem>
|
</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>
|
||||||
|
<mallctl>stats.arenas.<i>.nthreads</mallctl>
|
||||||
|
(<type>unsigned</type>)
|
||||||
|
<literal>r-</literal>
|
||||||
|
</term>
|
||||||
|
<listitem><para>Number of threads currently assigned to
|
||||||
|
arena.</para></listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>
|
<term>
|
||||||
<mallctl>stats.arenas.<i>.pactive</mallctl>
|
<mallctl>stats.arenas.<i>.pactive</mallctl>
|
||||||
|
@ -295,8 +295,18 @@ struct arena_s {
|
|||||||
unsigned ind;
|
unsigned ind;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All non-bin-related operations on this arena require that lock be
|
* Number of threads currently assigned to this arena. This field is
|
||||||
* locked.
|
* protected by arenas_lock.
|
||||||
|
*/
|
||||||
|
unsigned nthreads;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are three classes of arena operations from a locking
|
||||||
|
* perspective:
|
||||||
|
* 1) Thread asssignment (modifies nthreads) is protected by
|
||||||
|
* arenas_lock.
|
||||||
|
* 2) Bin-related operations are protected by bin locks.
|
||||||
|
* 3) Chunk- and run-related operations are protected by this mutex.
|
||||||
*/
|
*/
|
||||||
malloc_mutex_t lock;
|
malloc_mutex_t lock;
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ struct ctl_node_s {
|
|||||||
|
|
||||||
struct ctl_arena_stats_s {
|
struct ctl_arena_stats_s {
|
||||||
bool initialized;
|
bool initialized;
|
||||||
|
unsigned nthreads;
|
||||||
size_t pactive;
|
size_t pactive;
|
||||||
size_t pdirty;
|
size_t pdirty;
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
|
@ -293,6 +293,7 @@ extern size_t lg_pagesize;
|
|||||||
extern unsigned ncpus;
|
extern unsigned ncpus;
|
||||||
|
|
||||||
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
|
extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
|
||||||
|
extern pthread_key_t arenas_tsd;
|
||||||
#ifndef NO_TLS
|
#ifndef NO_TLS
|
||||||
/*
|
/*
|
||||||
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
|
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
|
||||||
@ -302,9 +303,9 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
|||||||
# define ARENA_GET() arenas_tls
|
# define ARENA_GET() arenas_tls
|
||||||
# define ARENA_SET(v) do { \
|
# define ARENA_SET(v) do { \
|
||||||
arenas_tls = (v); \
|
arenas_tls = (v); \
|
||||||
|
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
#else
|
||||||
extern pthread_key_t arenas_tsd;
|
|
||||||
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
|
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
|
||||||
# define ARENA_SET(v) do { \
|
# define ARENA_SET(v) do { \
|
||||||
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
||||||
|
@ -2175,6 +2175,7 @@ arena_new(arena_t *arena, unsigned ind)
|
|||||||
arena_bin_t *bin;
|
arena_bin_t *bin;
|
||||||
|
|
||||||
arena->ind = ind;
|
arena->ind = ind;
|
||||||
|
arena->nthreads = 0;
|
||||||
|
|
||||||
if (malloc_mutex_init(&arena->lock))
|
if (malloc_mutex_init(&arena->lock))
|
||||||
return (true);
|
return (true);
|
||||||
|
@ -182,6 +182,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_highruns)
|
|||||||
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
|
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
|
||||||
INDEX_PROTO(stats_arenas_i_lruns_j)
|
INDEX_PROTO(stats_arenas_i_lruns_j)
|
||||||
#endif
|
#endif
|
||||||
|
CTL_PROTO(stats_arenas_i_nthreads)
|
||||||
CTL_PROTO(stats_arenas_i_pactive)
|
CTL_PROTO(stats_arenas_i_pactive)
|
||||||
CTL_PROTO(stats_arenas_i_pdirty)
|
CTL_PROTO(stats_arenas_i_pdirty)
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
@ -434,6 +435,7 @@ static const ctl_node_t stats_arenas_i_lruns_node[] = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const ctl_node_t stats_arenas_i_node[] = {
|
static const ctl_node_t stats_arenas_i_node[] = {
|
||||||
|
{NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
|
||||||
{NAME("pactive"), CTL(stats_arenas_i_pactive)},
|
{NAME("pactive"), CTL(stats_arenas_i_pactive)},
|
||||||
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
|
{NAME("pdirty"), CTL(stats_arenas_i_pdirty)}
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
@ -620,6 +622,7 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
|
|||||||
|
|
||||||
ctl_arena_clear(astats);
|
ctl_arena_clear(astats);
|
||||||
|
|
||||||
|
sstats->nthreads += astats->nthreads;
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
ctl_arena_stats_amerge(astats, arena);
|
ctl_arena_stats_amerge(astats, arena);
|
||||||
/* Merge into sum stats as well. */
|
/* Merge into sum stats as well. */
|
||||||
@ -657,10 +660,17 @@ ctl_refresh(void)
|
|||||||
* Clear sum stats, since they will be merged into by
|
* Clear sum stats, since they will be merged into by
|
||||||
* ctl_arena_refresh().
|
* ctl_arena_refresh().
|
||||||
*/
|
*/
|
||||||
|
ctl_stats.arenas[narenas].nthreads = 0;
|
||||||
ctl_arena_clear(&ctl_stats.arenas[narenas]);
|
ctl_arena_clear(&ctl_stats.arenas[narenas]);
|
||||||
|
|
||||||
malloc_mutex_lock(&arenas_lock);
|
malloc_mutex_lock(&arenas_lock);
|
||||||
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
|
memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
|
||||||
|
for (i = 0; i < narenas; i++) {
|
||||||
|
if (arenas[i] != NULL)
|
||||||
|
ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
|
||||||
|
else
|
||||||
|
ctl_stats.arenas[i].nthreads = 0;
|
||||||
|
}
|
||||||
malloc_mutex_unlock(&arenas_lock);
|
malloc_mutex_unlock(&arenas_lock);
|
||||||
for (i = 0; i < narenas; i++) {
|
for (i = 0; i < narenas; i++) {
|
||||||
bool initialized = (tarenas[i] != NULL);
|
bool initialized = (tarenas[i] != NULL);
|
||||||
@ -1129,6 +1139,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
|
|||||||
malloc_mutex_lock(&arenas_lock);
|
malloc_mutex_lock(&arenas_lock);
|
||||||
if ((arena = arenas[newind]) == NULL)
|
if ((arena = arenas[newind]) == NULL)
|
||||||
arena = arenas_extend(newind);
|
arena = arenas_extend(newind);
|
||||||
|
arenas[oldind]->nthreads--;
|
||||||
|
arenas[newind]->nthreads++;
|
||||||
malloc_mutex_unlock(&arenas_lock);
|
malloc_mutex_unlock(&arenas_lock);
|
||||||
if (arena == NULL) {
|
if (arena == NULL) {
|
||||||
ret = EAGAIN;
|
ret = EAGAIN;
|
||||||
@ -1536,6 +1548,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
|
||||||
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
|
CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
|
||||||
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
|
CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
|
@ -7,12 +7,10 @@
|
|||||||
malloc_mutex_t arenas_lock;
|
malloc_mutex_t arenas_lock;
|
||||||
arena_t **arenas;
|
arena_t **arenas;
|
||||||
unsigned narenas;
|
unsigned narenas;
|
||||||
static unsigned next_arena;
|
|
||||||
|
|
||||||
|
pthread_key_t arenas_tsd;
|
||||||
#ifndef NO_TLS
|
#ifndef NO_TLS
|
||||||
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
#else
|
|
||||||
pthread_key_t arenas_tsd;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
@ -70,6 +68,7 @@ size_t opt_narenas = 0;
|
|||||||
static void wrtmessage(void *cbopaque, const char *s);
|
static void wrtmessage(void *cbopaque, const char *s);
|
||||||
static void stats_print_atexit(void);
|
static void stats_print_atexit(void);
|
||||||
static unsigned malloc_ncpus(void);
|
static unsigned malloc_ncpus(void);
|
||||||
|
static void arenas_cleanup(void *arg);
|
||||||
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
||||||
static void thread_allocated_cleanup(void *arg);
|
static void thread_allocated_cleanup(void *arg);
|
||||||
#endif
|
#endif
|
||||||
@ -147,13 +146,53 @@ choose_arena_hard(void)
|
|||||||
arena_t *ret;
|
arena_t *ret;
|
||||||
|
|
||||||
if (narenas > 1) {
|
if (narenas > 1) {
|
||||||
|
unsigned i, choose, first_null;
|
||||||
|
|
||||||
|
choose = 0;
|
||||||
|
first_null = narenas;
|
||||||
malloc_mutex_lock(&arenas_lock);
|
malloc_mutex_lock(&arenas_lock);
|
||||||
if ((ret = arenas[next_arena]) == NULL)
|
assert(arenas[i] != NULL);
|
||||||
ret = arenas_extend(next_arena);
|
for (i = 1; i < narenas; i++) {
|
||||||
next_arena = (next_arena + 1) % narenas;
|
if (arenas[i] != NULL) {
|
||||||
|
/*
|
||||||
|
* Choose the first arena that has the lowest
|
||||||
|
* number of threads assigned to it.
|
||||||
|
*/
|
||||||
|
if (arenas[i]->nthreads <
|
||||||
|
arenas[choose]->nthreads)
|
||||||
|
choose = i;
|
||||||
|
} else if (first_null == narenas) {
|
||||||
|
/*
|
||||||
|
* Record the index of the first uninitialized
|
||||||
|
* arena, in case all extant arenas are in use.
|
||||||
|
*
|
||||||
|
* NB: It is possible for there to be
|
||||||
|
* discontinuities in terms of initialized
|
||||||
|
* versus uninitialized arenas, due to the
|
||||||
|
* "thread.arena" mallctl.
|
||||||
|
*/
|
||||||
|
first_null = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (arenas[choose] == 0 || first_null == narenas) {
|
||||||
|
/*
|
||||||
|
* Use an unloaded arena, or the least loaded arena if
|
||||||
|
* all arenas are already initialized.
|
||||||
|
*/
|
||||||
|
ret = arenas[choose];
|
||||||
|
} else {
|
||||||
|
/* Initialize a new arena. */
|
||||||
|
ret = arenas_extend(first_null);
|
||||||
|
}
|
||||||
|
ret->nthreads++;
|
||||||
malloc_mutex_unlock(&arenas_lock);
|
malloc_mutex_unlock(&arenas_lock);
|
||||||
} else
|
} else {
|
||||||
ret = arenas[0];
|
ret = arenas[0];
|
||||||
|
malloc_mutex_lock(&arenas_lock);
|
||||||
|
ret->nthreads++;
|
||||||
|
malloc_mutex_unlock(&arenas_lock);
|
||||||
|
}
|
||||||
|
|
||||||
ARENA_SET(ret);
|
ARENA_SET(ret);
|
||||||
|
|
||||||
@ -259,6 +298,16 @@ malloc_ncpus(void)
|
|||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
arenas_cleanup(void *arg)
|
||||||
|
{
|
||||||
|
arena_t *arena = (arena_t *)arg;
|
||||||
|
|
||||||
|
malloc_mutex_lock(&arenas_lock);
|
||||||
|
arena->nthreads--;
|
||||||
|
malloc_mutex_unlock(&arenas_lock);
|
||||||
|
}
|
||||||
|
|
||||||
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
|
||||||
static void
|
static void
|
||||||
thread_allocated_cleanup(void *arg)
|
thread_allocated_cleanup(void *arg)
|
||||||
@ -737,6 +786,7 @@ malloc_init_hard(void)
|
|||||||
* threaded mode.
|
* threaded mode.
|
||||||
*/
|
*/
|
||||||
ARENA_SET(arenas[0]);
|
ARENA_SET(arenas[0]);
|
||||||
|
arenas[0]->nthreads++;
|
||||||
|
|
||||||
if (malloc_mutex_init(&arenas_lock))
|
if (malloc_mutex_init(&arenas_lock))
|
||||||
return (true);
|
return (true);
|
||||||
@ -779,14 +829,10 @@ malloc_init_hard(void)
|
|||||||
malloc_write(")\n");
|
malloc_write(")\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
next_arena = (narenas > 0) ? 1 : 0;
|
if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
|
||||||
|
|
||||||
#ifdef NO_TLS
|
|
||||||
if (pthread_key_create(&arenas_tsd, NULL) != 0) {
|
|
||||||
malloc_mutex_unlock(&init_lock);
|
malloc_mutex_unlock(&init_lock);
|
||||||
return (true);
|
return (true);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Allocate and initialize arenas. */
|
/* Allocate and initialize arenas. */
|
||||||
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
|
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
|
||||||
@ -819,7 +865,6 @@ malloc_init_hard(void)
|
|||||||
return (false);
|
return (false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_ZONE
|
#ifdef JEMALLOC_ZONE
|
||||||
JEMALLOC_ATTR(constructor)
|
JEMALLOC_ATTR(constructor)
|
||||||
void
|
void
|
||||||
|
@ -319,6 +319,7 @@ static void
|
|||||||
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||||
unsigned i)
|
unsigned i)
|
||||||
{
|
{
|
||||||
|
unsigned nthreads;
|
||||||
size_t pagesize, pactive, pdirty, mapped;
|
size_t pagesize, pactive, pdirty, mapped;
|
||||||
uint64_t npurge, nmadvise, purged;
|
uint64_t npurge, nmadvise, purged;
|
||||||
size_t small_allocated;
|
size_t small_allocated;
|
||||||
@ -328,6 +329,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
|||||||
|
|
||||||
CTL_GET("arenas.pagesize", &pagesize, size_t);
|
CTL_GET("arenas.pagesize", &pagesize, size_t);
|
||||||
|
|
||||||
|
CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
|
||||||
|
malloc_cprintf(write_cb, cbopaque,
|
||||||
|
"assigned threads: %u\n", nthreads);
|
||||||
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
|
CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
|
||||||
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
|
CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
|
||||||
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
|
CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
|
||||||
|
Loading…
Reference in New Issue
Block a user