From 9c0549007dcb64f4ff35d37390a9a6a8d3cea880 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Fri, 11 Aug 2017 17:34:21 -0700 Subject: [PATCH] Make arena stats collection go through cache bins. This eliminates the need for the arena stats code to "know" about tcaches; all that it needs is a cache_bin_array_descriptor_t to tell it where to find cache_bins whose stats it should aggregate. --- include/jemalloc/internal/arena_structs_b.h | 11 +++--- include/jemalloc/internal/cache_bin.h | 34 +++++++++++++++++- include/jemalloc/internal/tcache_structs.h | 39 +++++++++++++++++---- src/arena.c | 8 ++--- src/tcache.c | 9 +++++ 5 files changed, 84 insertions(+), 17 deletions(-) diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h index d1fffec1..c4e4310d 100644 --- a/include/jemalloc/internal/arena_structs_b.h +++ b/include/jemalloc/internal/arena_structs_b.h @@ -162,14 +162,15 @@ struct arena_s { arena_stats_t stats; /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. + * Lists of tcaches and cache_bin_array_descriptors for extant threads + * associated with this arena. Stats from these are merged + * incrementally, and at exit if opt_stats_print is enabled. * * Synchronization: tcache_ql_mtx. */ - ql_head(tcache_t) tcache_ql; - malloc_mutex_t tcache_ql_mtx; + ql_head(tcache_t) tcache_ql; + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t tcache_ql_mtx; /* Synchronization: internal. */ prof_accum_t prof_accum; diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 37025b5c..85d9de01 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -1,6 +1,19 @@ #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H #define JEMALLOC_INTERNAL_CACHE_BIN_H +#include "jemalloc/internal/ql.h" + +/* + * The cache_bins are the mechanism that the tcache and the arena use to + * communicate. The tcache fills from and flushes to the arena by passing a + * cache_bin_t to fill/flush. When the arena needs to pull stats from the + * tcaches associated with it, it does so by iterating over its + * cache_bin_array_descriptor_t objects and reading out per-bin stats it + * contains. This makes it so that the arena need not know about the existence + * of the tcache at all. + */ + + /* * The count of the number of cached allocations in a bin. We make this signed * so that negative numbers can encode "invalid" states (e.g. a low water mark @@ -51,6 +64,26 @@ struct cache_bin_s { void **avail; }; +typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; +struct cache_bin_array_descriptor_s { + /* + * The arena keeps a list of the cache bins associated with it, for + * stats collection. + */ + ql_elm(cache_bin_array_descriptor_t) link; + /* Pointers to the tcache bins. */ + cache_bin_t *bins_small; + cache_bin_t *bins_large; +}; + +static inline void +cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, + cache_bin_t *bins_small, cache_bin_t *bins_large) { + ql_elm_new(descriptor, link); + descriptor->bins_small = bins_small; + descriptor->bins_large = bins_large; +} + JEMALLOC_ALWAYS_INLINE void * cache_alloc_easy(cache_bin_t *bin, bool *success) { void *ret; @@ -76,7 +109,6 @@ cache_alloc_easy(cache_bin_t *bin, bool *success) { } return ret; - } #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */ diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h index ad0fe66c..07b73870 100644 --- a/include/jemalloc/internal/tcache_structs.h +++ b/include/jemalloc/internal/tcache_structs.h @@ -7,21 +7,46 @@ #include "jemalloc/internal/ticker.h" struct tcache_s { - /* Data accessed frequently first: prof, ticker and small bins. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ + /* + * To minimize our cache-footprint, we put the frequently accessed data + * together at the start of this struct. + */ + + /* Cleared after arena_prof_accum(). */ + uint64_t prof_accumbytes; + /* Drives incremental GC. */ + ticker_t gc_ticker; /* * The pointer stacks associated with bins follow as a contiguous array. * During tcache initialization, the avail pointer in each element of * tbins is initialized to point to the proper offset within this array. */ cache_bin_t bins_small[NBINS]; - /* Data accessed less often below. */ - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - arena_t *arena; /* Associated arena. */ - szind_t next_gc_bin; /* Next bin to GC. */ + + /* + * This data is less hot; we can be a little less careful with our + * footprint here. + */ + /* Lets us track all the tcaches in an arena. */ + ql_elm(tcache_t) link; + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* Next bin to GC. */ + szind_t next_gc_bin; /* For small bins, fill (ncached_max >> lg_fill_div). */ uint8_t lg_fill_div[NBINS]; + /* + * We put the cache bins for large size classes at the end of the + * struct, since some of them might not get used. This might end up + * letting us avoid touching an extra page if we don't have to. + */ cache_bin_t bins_large[NSIZES-NBINS]; }; diff --git a/src/arena.c b/src/arena.c index 60b482e9..19aafaf0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -303,16 +303,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* tcache_bytes counts currently cached bytes. */ atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - tcache_t *tcache; - ql_foreach(tcache, &arena->tcache_ql, link) { + cache_bin_array_descriptor_t *descriptor; + ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { szind_t i = 0; for (; i < NBINS; i++) { - cache_bin_t *tbin = tcache_small_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_small[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } for (; i < nhbins; i++) { - cache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_large[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } diff --git a/src/tcache.c b/src/tcache.c index 7d32d4d5..e22f8067 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -291,8 +291,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, tcache->bins_small, + tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); + malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } } @@ -316,6 +323,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) { assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); + ql_remove(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); tcache_stats_merge(tsdn, tcache, arena); malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); }