Optimizing TSD and thread cache layout.

1) Re-organize TSD so that frequently accessed fields are closer to the
beginning and more compact.  Assuming 64-bit, the first 2.5 cachelines now
contains everything needed on tcache fast path, expect the tcache struct itself.

2) Re-organize tcache and tbins.  Take lg_fill_div out of tbin, and reduce tbin
to 24 bytes (down from 32). Split tbins into tbins_small and tbins_large, and
place tbins_small close to the beginning.
This commit is contained in:
Qi Wang
2017-04-06 12:35:22 -07:00
committed by Qi Wang
parent 4dec507546
commit 36bd90b962
10 changed files with 189 additions and 103 deletions

View File

@@ -10,10 +10,14 @@ struct tcache_bin_info_s {
};
struct tcache_bin_s {
low_water_t low_water; /* Min # cached since last GC. */
uint32_t ncached; /* # of cached objects. */
/*
* ncached and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
tcache_bin_stats_t tstats;
int low_water; /* Min # cached since last GC. */
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
unsigned ncached; /* # of cached objects. */
/*
* To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points
@@ -25,11 +29,9 @@ struct tcache_bin_s {
};
struct tcache_s {
ql_elm(tcache_t) link; /* Used for aggregating stats. */
/* Data accessed frequently first: prof, ticker and small bins. */
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
ticker_t gc_ticker; /* Drives incremental GC. */
szind_t next_gc_bin; /* Next bin to GC. */
arena_t *arena; /* Associated arena. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
@@ -37,9 +39,21 @@ struct tcache_s {
* this array.
*/
#ifdef JEMALLOC_TCACHE
tcache_bin_t tbins[NSIZES];
tcache_bin_t tbins_small[NBINS];
#else
tcache_bin_t tbins[0];
tcache_bin_t tbins_small[0];
#endif
/* Data accessed less often below. */
ql_elm(tcache_t) link; /* Used for aggregating stats. */
arena_t *arena; /* Associated arena. */
szind_t next_gc_bin; /* Next bin to GC. */
#ifdef JEMALLOC_TCACHE
/* For small bins, fill (ncached_max >> lg_fill_div). */
uint8_t lg_fill_div[NBINS];
tcache_bin_t tbins_large[NSIZES-NBINS];
#else
uint8_t lg_fill_div[0];
tcache_bin_t tbins_large[0];
#endif
};