Pull out caching for a bin into its own file.

This is the first step towards breaking up the tcache and arena (since they
interact primarily at the bin level).  It should also make a future arena
caching implementation more straightforward.
This commit is contained in:
David Goldblatt 2017-08-10 14:27:58 -07:00 committed by David Goldblatt
parent b0825351d9
commit f3170baa30
11 changed files with 148 additions and 139 deletions

View File

@ -50,7 +50,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
bool zero);

View File

@ -0,0 +1,82 @@
#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
#define JEMALLOC_INTERNAL_CACHE_BIN_H
/*
* The count of the number of cached allocations in a bin. We make this signed
* so that negative numbers can encode "invalid" states (e.g. a low water mark
* for a bin that has never been filled).
*/
typedef int32_t cache_bin_sz_t;
typedef struct cache_bin_stats_s cache_bin_stats_t;
struct cache_bin_stats_s {
/*
* Number of allocation requests that corresponded to the size of this
* bin.
*/
uint64_t nrequests;
};
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
typedef struct cache_bin_info_s cache_bin_info_t;
struct cache_bin_info_s {
/* Upper limit on ncached. */
cache_bin_sz_t ncached_max;
};
typedef struct cache_bin_s cache_bin_t;
struct cache_bin_s {
/* Min # cached since last GC. */
cache_bin_sz_t low_water;
/* # of cached objects. */
cache_bin_sz_t ncached;
/*
* ncached and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
cache_bin_stats_t tstats;
/*
* Stack of available objects.
*
* To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points
* just above the available space, which means that
* avail[-ncached, ... -1] are available items and the lowest item will
* be allocated first.
*/
void **avail;
};
JEMALLOC_ALWAYS_INLINE void *
cache_alloc_easy(cache_bin_t *bin, bool *success) {
void *ret;
if (unlikely(bin->ncached == 0)) {
bin->low_water = -1;
*success = false;
return NULL;
}
/*
* success (instead of ret) should be checked upon the return of this
* function. We avoid checking (ret == NULL) because there is never a
* null stored on the avail stack (which is unknown to the compiler),
* and eagerly checking ret would cause pipeline stall (waiting for the
* cacheline).
*/
*success = true;
ret = *(bin->avail - bin->ncached);
bin->ncached--;
if (unlikely(bin->ncached < bin->low_water)) {
bin->low_water = bin->ncached;
}
return ret;
}
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */

View File

@ -106,16 +106,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
return &tdata->decay_ticker;
}
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind < NBINS);
return &tcache->tbins_small[binind];
return &tcache->bins_small[binind];
}
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind >= NBINS &&binind < nhbins);
return &tcache->tbins_large[binind - NBINS];
return &tcache->bins_large[binind - NBINS];
}
JEMALLOC_ALWAYS_INLINE bool

View File

@ -5,7 +5,6 @@
#include "jemalloc/internal/mutex_prof.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats_tsd.h"
/* OPTION(opt, var_name, default, set_value_to) */
#define STATS_PRINT_OPTIONS \

View File

@ -1,12 +0,0 @@
#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
#define JEMALLOC_INTERNAL_STATS_TSD_H
typedef struct tcache_bin_stats_s {
/*
* Number of allocation requests that corresponded to the size of this
* bin.
*/
uint64_t nrequests;
} tcache_bin_stats_t;
#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */

View File

@ -6,7 +6,7 @@
extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
extern tcache_bin_info_t *tcache_bin_info;
extern cache_bin_info_t *tcache_bin_info;
/*
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
@ -30,10 +30,10 @@ extern tcaches_t *tcaches;
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
cache_bin_t *tbin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
szind_t binind, unsigned rem);
void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
unsigned rem, tcache_t *tcache);
void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
arena_t *arena);

View File

@ -37,44 +37,17 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
}
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
void *ret;
if (unlikely(tbin->ncached == 0)) {
tbin->low_water = -1;
*tcache_success = false;
return NULL;
}
/*
* tcache_success (instead of ret) should be checked upon the return of
* this function. We avoid checking (ret == NULL) because there is
* never a null stored on the avail stack (which is unknown to the
* compiler), and eagerly checking ret would cause pipeline stall
* (waiting for the cacheline).
*/
*tcache_success = true;
ret = *(tbin->avail - tbin->ncached);
tbin->ncached--;
if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
tbin->low_water = tbin->ncached;
}
return ret;
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
tcache_bin_t *tbin;
cache_bin_t *bin;
bool tcache_success;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
assert(binind < NBINS);
tbin = tcache_small_bin_get(tcache, binind);
ret = tcache_alloc_easy(tbin, &tcache_success);
bin = tcache_small_bin_get(tcache, binind);
ret = cache_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
bool tcache_hard_success;
@ -84,7 +57,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
tbin, binind, &tcache_hard_success);
bin, binind, &tcache_hard_success);
if (tcache_hard_success == false) {
return NULL;
}
@ -118,7 +91,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
if (config_stats) {
tbin->tstats.nrequests++;
bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
tcache_bin_t *tbin;
cache_bin_t *bin;
bool tcache_success;
assert(binind >= NBINS &&binind < nhbins);
tbin = tcache_large_bin_get(tcache, binind);
ret = tcache_alloc_easy(tbin, &tcache_success);
bin = tcache_large_bin_get(tcache, binind);
ret = cache_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
/*
@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
if (config_stats) {
tbin->tstats.nrequests++;
bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@ -190,8 +163,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
@ -199,15 +172,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
}
tbin = tcache_small_bin_get(tcache, binind);
tbin_info = &tcache_bin_info[binind];
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
tcache_bin_flush_small(tsd, tcache, tbin, binind,
(tbin_info->ncached_max >> 1));
bin = tcache_small_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind];
if (unlikely(bin->ncached == bin_info->ncached_max)) {
tcache_bin_flush_small(tsd, tcache, bin, binind,
(bin_info->ncached_max >> 1));
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->ncached++;
*(tbin->avail - tbin->ncached) = ptr;
assert(bin->ncached < bin_info->ncached_max);
bin->ncached++;
*(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}
@ -215,8 +188,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
@ -225,15 +198,15 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
large_dalloc_junk(ptr, sz_index2size(binind));
}
tbin = tcache_large_bin_get(tcache, binind);
tbin_info = &tcache_bin_info[binind];
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
tcache_bin_flush_large(tsd, tbin, binind,
(tbin_info->ncached_max >> 1), tcache);
bin = tcache_large_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind];
if (unlikely(bin->ncached == bin_info->ncached_max)) {
tcache_bin_flush_large(tsd, bin, binind,
(bin_info->ncached_max >> 1), tcache);
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->ncached++;
*(tbin->avail - tbin->ncached) = ptr;
assert(bin->ncached < bin_info->ncached_max);
bin->ncached++;
*(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}

View File

@ -3,54 +3,26 @@
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats_tsd.h"
#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/ticker.h"
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
struct tcache_bin_info_s {
unsigned ncached_max; /* Upper limit on ncached. */
};
struct tcache_bin_s {
low_water_t low_water; /* Min # cached since last GC. */
uint32_t ncached; /* # of cached objects. */
/*
* ncached and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
tcache_bin_stats_t tstats;
/*
* To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points
* just above the available space, which means that
* avail[-ncached, ... -1] are available items and the lowest item will
* be allocated first.
*/
void **avail; /* Stack of available objects. */
};
struct tcache_s {
/* Data accessed frequently first: prof, ticker and small bins. */
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
ticker_t gc_ticker; /* Drives incremental GC. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
* element of tbins is initialized to point to the proper offset within
* this array.
* The pointer stacks associated with bins follow as a contiguous array.
* During tcache initialization, the avail pointer in each element of
* tbins is initialized to point to the proper offset within this array.
*/
tcache_bin_t tbins_small[NBINS];
cache_bin_t bins_small[NBINS];
/* Data accessed less often below. */
ql_elm(tcache_t) link; /* Used for aggregating stats. */
arena_t *arena; /* Associated arena. */
szind_t next_gc_bin; /* Next bin to GC. */
/* For small bins, fill (ncached_max >> lg_fill_div). */
uint8_t lg_fill_div[NBINS];
tcache_bin_t tbins_large[NSIZES-NBINS];
cache_bin_t bins_large[NSIZES-NBINS];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */

View File

@ -3,14 +3,9 @@
#include "jemalloc/internal/size_classes.h"
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
/* ncached is cast to this type for comparison. */
typedef int32_t low_water_t;
/*
* tcache pointers close to NULL are used to encode state information that is
* used for two purposes: preventing thread caching on a per thread basis and

View File

@ -307,12 +307,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
ql_foreach(tcache, &arena->tcache_ql, link) {
szind_t i = 0;
for (; i < NBINS; i++) {
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
arena_stats_accum_zu(&astats->tcache_bytes,
tbin->ncached * sz_index2size(i));
}
for (; i < nhbins; i++) {
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
arena_stats_accum_zu(&astats->tcache_bytes,
tbin->ncached * sz_index2size(i));
}
@ -1420,7 +1420,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
void
arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
unsigned i, nfill;
arena_bin_t *bin;

View File

@ -12,7 +12,7 @@
bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
tcache_bin_info_t *tcache_bin_info;
cache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */
unsigned nhbins;
@ -40,7 +40,7 @@ void
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
szind_t binind = tcache->next_gc_bin;
tcache_bin_t *tbin;
cache_bin_t *tbin;
if (binind < NBINS) {
tbin = tcache_small_bin_get(tcache, binind);
} else {
@ -58,7 +58,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
* Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1.
*/
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
if ((tbin_info->ncached_max >>
(tcache->lg_fill_div[binind] + 1)) >= 1) {
tcache->lg_fill_div[binind]++;
@ -86,7 +86,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
void *
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
void *ret;
assert(tcache->arena != NULL);
@ -95,18 +95,18 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
if (config_prof) {
tcache->prof_accumbytes = 0;
}
ret = tcache_alloc_easy(tbin, tcache_success);
ret = cache_alloc_easy(tbin, tcache_success);
return ret;
}
void
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
szind_t binind, unsigned rem) {
bool merged_stats = false;
assert(binind < NBINS);
assert(rem <= tbin->ncached);
assert((cache_bin_sz_t)rem <= tbin->ncached);
arena_t *arena = tcache->arena;
assert(arena != NULL);
@ -180,18 +180,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
sizeof(void *));
tbin->ncached = rem;
if ((low_water_t)tbin->ncached < tbin->low_water) {
if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
}
}
void
tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
unsigned rem, tcache_t *tcache) {
bool merged_stats = false;
assert(binind < nhbins);
assert(rem <= tbin->ncached);
assert((cache_bin_sz_t)rem <= tbin->ncached);
arena_t *arena = tcache->arena;
assert(arena != NULL);
@ -278,7 +278,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
sizeof(void *));
tbin->ncached = rem;
if ((low_water_t)tbin->ncached < tbin->low_water) {
if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
}
}
@ -354,8 +354,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
size_t stack_offset = 0;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
unsigned i = 0;
for (; i < NBINS; i++) {
tcache->lg_fill_div[i] = 1;
@ -450,7 +450,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
assert(tcache->arena != NULL);
for (unsigned i = 0; i < NBINS; i++) {
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
if (config_stats) {
@ -458,7 +458,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
}
}
for (unsigned i = NBINS; i < nhbins; i++) {
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
if (config_stats) {
@ -525,7 +525,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
/* Merge and reset tcache stats. */
for (i = 0; i < NBINS; i++) {
arena_bin_t *bin = &arena->bins[i];
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
malloc_mutex_lock(tsdn, &bin->lock);
bin->stats.nrequests += tbin->tstats.nrequests;
malloc_mutex_unlock(tsdn, &bin->lock);
@ -533,7 +533,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
}
for (; i < nhbins; i++) {
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
@ -657,8 +657,8 @@ tcache_boot(tsdn_t *tsdn) {
nhbins = sz_size2index(tcache_maxclass) + 1;
/* Initialize tcache_bin_info. */
tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
* sizeof(tcache_bin_info_t), CACHELINE);
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
* sizeof(cache_bin_info_t), CACHELINE);
if (tcache_bin_info == NULL) {
return true;
}