diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h index 9241ee73..63f97395 100644 --- a/include/jemalloc/internal/bin.h +++ b/include/jemalloc/internal/bin.h @@ -16,6 +16,12 @@ struct bin_s { /* All operations on bin_t fields require lock ownership. */ malloc_mutex_t lock; + /* + * Bin statistics. These get touched every time the lock is acquired, + * so put them close by in the hopes of getting some cache locality. + */ + bin_stats_t stats; + /* * Current slab being used to service allocations of this bin's size * class. slabcur is independent of slabs_{nonfull,full}; whenever @@ -33,9 +39,6 @@ struct bin_s { /* List used to track full slabs. */ edata_list_active_t slabs_full; - - /* Bin statistics. */ - bin_stats_t stats; }; /* A set of sharded bins of the same size class. */ diff --git a/src/tcache.c b/src/tcache.c index 19e330a2..7c4047f4 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -347,6 +347,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin, cur_bin = arena_get_bin(cur_arena, binind, cur_binshard); assert(cur_binshard < bin_infos[binind].n_shards); + /* + * If you're looking at profiles, you might think this + * is a good place to prefetch the bin stats, which are + * often a cache miss. This turns out not to be + * helpful on the workloads we've looked at, with moving + * the bin stats next to the lock seeming to do better. + */ } if (small) {