Optimizing TSD and thread cache layout.
1) Re-organize TSD so that frequently accessed fields are closer to the beginning and more compact. Assuming 64-bit, the first 2.5 cachelines now contains everything needed on tcache fast path, expect the tcache struct itself. 2) Re-organize tcache and tbins. Take lg_fill_div out of tbin, and reduce tbin to 24 bytes (down from 32). Split tbins into tbins_small and tbins_large, and place tbins_small close to the beginning.
This commit is contained in:
parent
4dec507546
commit
36bd90b962
@ -51,7 +51,7 @@ bool arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena,
|
|||||||
void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
|
void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
|
||||||
void arena_reset(tsd_t *tsd, arena_t *arena);
|
void arena_reset(tsd_t *tsd, arena_t *arena);
|
||||||
void arena_destroy(tsd_t *tsd, arena_t *arena);
|
void arena_destroy(tsd_t *tsd, arena_t *arena);
|
||||||
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
|
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||||
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
|
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
|
||||||
void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
|
void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
|
||||||
bool zero);
|
bool zero);
|
||||||
|
@ -538,33 +538,35 @@ bool malloc_initialized(void);
|
|||||||
#include "jemalloc/internal/mutex_inlines.h"
|
#include "jemalloc/internal/mutex_inlines.h"
|
||||||
|
|
||||||
#ifndef JEMALLOC_ENABLE_INLINE
|
#ifndef JEMALLOC_ENABLE_INLINE
|
||||||
pszind_t psz2ind(size_t psz);
|
pszind_t psz2ind(size_t psz);
|
||||||
size_t pind2sz_compute(pszind_t pind);
|
size_t pind2sz_compute(pszind_t pind);
|
||||||
size_t pind2sz_lookup(pszind_t pind);
|
size_t pind2sz_lookup(pszind_t pind);
|
||||||
size_t pind2sz(pszind_t pind);
|
size_t pind2sz(pszind_t pind);
|
||||||
size_t psz2u(size_t psz);
|
size_t psz2u(size_t psz);
|
||||||
szind_t size2index_compute(size_t size);
|
szind_t size2index_compute(size_t size);
|
||||||
szind_t size2index_lookup(size_t size);
|
szind_t size2index_lookup(size_t size);
|
||||||
szind_t size2index(size_t size);
|
szind_t size2index(size_t size);
|
||||||
size_t index2size_compute(szind_t index);
|
size_t index2size_compute(szind_t index);
|
||||||
size_t index2size_lookup(szind_t index);
|
size_t index2size_lookup(szind_t index);
|
||||||
size_t index2size(szind_t index);
|
size_t index2size(szind_t index);
|
||||||
size_t s2u_compute(size_t size);
|
size_t s2u_compute(size_t size);
|
||||||
size_t s2u_lookup(size_t size);
|
size_t s2u_lookup(size_t size);
|
||||||
size_t s2u(size_t size);
|
size_t s2u(size_t size);
|
||||||
size_t sa2u(size_t size, size_t alignment);
|
size_t sa2u(size_t size, size_t alignment);
|
||||||
arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
|
arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
|
||||||
arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
|
arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
|
||||||
arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
|
arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
|
||||||
arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
|
arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
|
||||||
bool refresh_if_missing);
|
bool refresh_if_missing);
|
||||||
arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
|
arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
|
||||||
ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
|
ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
|
||||||
bool tcache_available(tsd_t *tsd);
|
bool tcache_available(tsd_t *tsd);
|
||||||
tcache_t *tcache_get(tsd_t *tsd);
|
tcache_bin_t *tcache_small_bin_get(tcache_t *tcache, szind_t binind);
|
||||||
malloc_cpuid_t malloc_getcpu(void);
|
tcache_bin_t *tcache_large_bin_get(tcache_t *tcache, szind_t binind);
|
||||||
unsigned percpu_arena_choose(void);
|
tcache_t *tcache_get(tsd_t *tsd);
|
||||||
unsigned percpu_arena_ind_limit(void);
|
malloc_cpuid_t malloc_getcpu(void);
|
||||||
|
unsigned percpu_arena_choose(void);
|
||||||
|
unsigned percpu_arena_ind_limit(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
|
||||||
@ -933,6 +935,18 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
|
|||||||
return &tdata->decay_ticker;
|
return &tdata->decay_ticker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
|
||||||
|
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
|
||||||
|
assert(binind < NBINS);
|
||||||
|
return &tcache->tbins_small[binind];
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
|
||||||
|
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
|
||||||
|
assert(binind >= NBINS &&binind < nhbins);
|
||||||
|
return &tcache->tbins_large[binind - NBINS];
|
||||||
|
}
|
||||||
|
|
||||||
JEMALLOC_ALWAYS_INLINE bool
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
tcache_available(tsd_t *tsd) {
|
tcache_available(tsd_t *tsd) {
|
||||||
cassert(config_tcache);
|
cassert(config_tcache);
|
||||||
@ -945,7 +959,8 @@ tcache_available(tsd_t *tsd) {
|
|||||||
if (likely(tsd_tcache_enabled_get(tsd) == true)) {
|
if (likely(tsd_tcache_enabled_get(tsd) == true)) {
|
||||||
/* Associated arena == null implies tcache init in progress. */
|
/* Associated arena == null implies tcache init in progress. */
|
||||||
if (tsd_tcachep_get(tsd)->arena != NULL) {
|
if (tsd_tcachep_get(tsd)->arena != NULL) {
|
||||||
assert(tsd_tcachep_get(tsd)->tbins[0].avail != NULL);
|
assert(tcache_small_bin_get(tsd_tcachep_get(tsd),
|
||||||
|
0)->avail != NULL);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -53,9 +53,6 @@ struct rtree_ctx_cache_elm_s {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct rtree_ctx_s {
|
struct rtree_ctx_s {
|
||||||
#ifndef _MSC_VER
|
|
||||||
JEMALLOC_ALIGNED(CACHELINE)
|
|
||||||
#endif
|
|
||||||
rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
|
rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
|
|||||||
ret = *(tbin->avail - tbin->ncached);
|
ret = *(tbin->avail - tbin->ncached);
|
||||||
tbin->ncached--;
|
tbin->ncached--;
|
||||||
|
|
||||||
if (unlikely((int)tbin->ncached < tbin->low_water)) {
|
if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
|
||||||
tbin->low_water = tbin->ncached;
|
tbin->low_water = tbin->ncached;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
|||||||
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
|
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
|
||||||
|
|
||||||
assert(binind < NBINS);
|
assert(binind < NBINS);
|
||||||
tbin = &tcache->tbins[binind];
|
tbin = tcache_small_bin_get(tcache, binind);
|
||||||
ret = tcache_alloc_easy(tbin, &tcache_success);
|
ret = tcache_alloc_easy(tbin, &tcache_success);
|
||||||
assert(tcache_success == (ret != NULL));
|
assert(tcache_success == (ret != NULL));
|
||||||
if (unlikely(!tcache_success)) {
|
if (unlikely(!tcache_success)) {
|
||||||
@ -150,8 +150,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
|
|||||||
tcache_bin_t *tbin;
|
tcache_bin_t *tbin;
|
||||||
bool tcache_success;
|
bool tcache_success;
|
||||||
|
|
||||||
assert(binind < nhbins);
|
assert(binind >= NBINS &&binind < nhbins);
|
||||||
tbin = &tcache->tbins[binind];
|
tbin = tcache_large_bin_get(tcache, binind);
|
||||||
ret = tcache_alloc_easy(tbin, &tcache_success);
|
ret = tcache_alloc_easy(tbin, &tcache_success);
|
||||||
assert(tcache_success == (ret != NULL));
|
assert(tcache_success == (ret != NULL));
|
||||||
if (unlikely(!tcache_success)) {
|
if (unlikely(!tcache_success)) {
|
||||||
@ -215,7 +215,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
|||||||
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
|
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
|
||||||
}
|
}
|
||||||
|
|
||||||
tbin = &tcache->tbins[binind];
|
tbin = tcache_small_bin_get(tcache, binind);
|
||||||
tbin_info = &tcache_bin_info[binind];
|
tbin_info = &tcache_bin_info[binind];
|
||||||
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
||||||
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
||||||
@ -241,7 +241,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
|
|||||||
large_dalloc_junk(ptr, index2size(binind));
|
large_dalloc_junk(ptr, index2size(binind));
|
||||||
}
|
}
|
||||||
|
|
||||||
tbin = &tcache->tbins[binind];
|
tbin = tcache_large_bin_get(tcache, binind);
|
||||||
tbin_info = &tcache_bin_info[binind];
|
tbin_info = &tcache_bin_info[binind];
|
||||||
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
|
||||||
tcache_bin_flush_large(tsd, tbin, binind,
|
tcache_bin_flush_large(tsd, tbin, binind,
|
||||||
|
@ -10,10 +10,14 @@ struct tcache_bin_info_s {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct tcache_bin_s {
|
struct tcache_bin_s {
|
||||||
|
low_water_t low_water; /* Min # cached since last GC. */
|
||||||
|
uint32_t ncached; /* # of cached objects. */
|
||||||
|
/*
|
||||||
|
* ncached and stats are both modified frequently. Let's keep them
|
||||||
|
* close so that they have a higher chance of being on the same
|
||||||
|
* cacheline, thus less write-backs.
|
||||||
|
*/
|
||||||
tcache_bin_stats_t tstats;
|
tcache_bin_stats_t tstats;
|
||||||
int low_water; /* Min # cached since last GC. */
|
|
||||||
unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
|
|
||||||
unsigned ncached; /* # of cached objects. */
|
|
||||||
/*
|
/*
|
||||||
* To make use of adjacent cacheline prefetch, the items in the avail
|
* To make use of adjacent cacheline prefetch, the items in the avail
|
||||||
* stack goes to higher address for newer allocations. avail points
|
* stack goes to higher address for newer allocations. avail points
|
||||||
@ -25,11 +29,9 @@ struct tcache_bin_s {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct tcache_s {
|
struct tcache_s {
|
||||||
ql_elm(tcache_t) link; /* Used for aggregating stats. */
|
/* Data accessed frequently first: prof, ticker and small bins. */
|
||||||
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
|
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
|
||||||
ticker_t gc_ticker; /* Drives incremental GC. */
|
ticker_t gc_ticker; /* Drives incremental GC. */
|
||||||
szind_t next_gc_bin; /* Next bin to GC. */
|
|
||||||
arena_t *arena; /* Associated arena. */
|
|
||||||
/*
|
/*
|
||||||
* The pointer stacks associated with tbins follow as a contiguous
|
* The pointer stacks associated with tbins follow as a contiguous
|
||||||
* array. During tcache initialization, the avail pointer in each
|
* array. During tcache initialization, the avail pointer in each
|
||||||
@ -37,9 +39,21 @@ struct tcache_s {
|
|||||||
* this array.
|
* this array.
|
||||||
*/
|
*/
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
tcache_bin_t tbins[NSIZES];
|
tcache_bin_t tbins_small[NBINS];
|
||||||
#else
|
#else
|
||||||
tcache_bin_t tbins[0];
|
tcache_bin_t tbins_small[0];
|
||||||
|
#endif
|
||||||
|
/* Data accessed less often below. */
|
||||||
|
ql_elm(tcache_t) link; /* Used for aggregating stats. */
|
||||||
|
arena_t *arena; /* Associated arena. */
|
||||||
|
szind_t next_gc_bin; /* Next bin to GC. */
|
||||||
|
#ifdef JEMALLOC_TCACHE
|
||||||
|
/* For small bins, fill (ncached_max >> lg_fill_div). */
|
||||||
|
uint8_t lg_fill_div[NBINS];
|
||||||
|
tcache_bin_t tbins_large[NSIZES-NBINS];
|
||||||
|
#else
|
||||||
|
uint8_t lg_fill_div[0];
|
||||||
|
tcache_bin_t tbins_large[0];
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -6,6 +6,9 @@ typedef struct tcache_bin_s tcache_bin_t;
|
|||||||
typedef struct tcache_s tcache_t;
|
typedef struct tcache_s tcache_t;
|
||||||
typedef struct tcaches_s tcaches_t;
|
typedef struct tcaches_s tcaches_t;
|
||||||
|
|
||||||
|
/* ncached is cast to this type for comparison. */
|
||||||
|
typedef int32_t low_water_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tcache pointers close to NULL are used to encode state information that is
|
* tcache pointers close to NULL are used to encode state information that is
|
||||||
* used for two purposes: preventing thread caching on a per thread basis and
|
* used for two purposes: preventing thread caching on a per thread basis and
|
||||||
@ -48,9 +51,9 @@ typedef struct tcaches_s tcaches_t;
|
|||||||
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
|
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
|
||||||
|
|
||||||
/* Used in TSD static initializer only. Real init in tcache_data_init(). */
|
/* Used in TSD static initializer only. Real init in tcache_data_init(). */
|
||||||
#define TCACHE_ZERO_INITIALIZER {{NULL}}
|
#define TCACHE_ZERO_INITIALIZER {0}
|
||||||
|
|
||||||
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
|
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
|
||||||
#define TCACHE_ENABLED_DEFAULT false
|
#define TCACHE_ENABLED_ZERO_INITIALIZER false
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
|
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
|
||||||
|
@ -14,19 +14,54 @@ struct tsd_init_head_s {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Thread-Specific-Data layout
|
||||||
|
* --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
|
||||||
|
* s: state
|
||||||
|
* e: tcache_enabled
|
||||||
|
* m: thread_allocated (config_stats)
|
||||||
|
* f: thread_deallocated (config_stats)
|
||||||
|
* p: prof_tdata (config_prof)
|
||||||
|
* c: rtree_ctx (rtree cache accessed on deallocation)
|
||||||
|
* t: tcache
|
||||||
|
* --- data not accessed on tcache fast path: arena related fields ---
|
||||||
|
* d: arenas_tdata_bypass
|
||||||
|
* r: narenas_tdata
|
||||||
|
* x: blank space (1 byte)
|
||||||
|
* i: iarena
|
||||||
|
* a: arena
|
||||||
|
* o: arenas_tdata
|
||||||
|
* Loading TSD data is on the critical path of basically all malloc operations.
|
||||||
|
* In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
|
||||||
|
* Use a compact layout to reduce cache footprint.
|
||||||
|
* +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
|
||||||
|
* |---------------------------- 1st cacheline ----------------------------|
|
||||||
|
* | sedxrrrr mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] |
|
||||||
|
* |---------------------------- 2nd cacheline ----------------------------|
|
||||||
|
* | [c * 64 ........ ........ ........ ........ ........ ........ .......] |
|
||||||
|
* |---------------------------- 3nd cacheline ----------------------------|
|
||||||
|
* | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
|
||||||
|
* +-------------------------------------------------------------------------+
|
||||||
|
* Note: the entire tcache is embedded into TSD and spans multiple cachelines.
|
||||||
|
*
|
||||||
|
* The last 3 members (i, a and o) before tcache isn't really needed on tcache
|
||||||
|
* fast path. However we have a number of unused tcache bins and witnesses
|
||||||
|
* (never touched unless config_debug) at the end of tcache, so we place them
|
||||||
|
* there to avoid breaking the cachelines and possibly paging in an extra page.
|
||||||
|
*/
|
||||||
#define MALLOC_TSD \
|
#define MALLOC_TSD \
|
||||||
/* O(name, type, [gs]et, init, cleanup) */ \
|
/* O(name, type, [gs]et, init, cleanup) */ \
|
||||||
O(tcache, tcache_t, yes, no, yes) \
|
O(tcache_enabled, bool, yes, yes, no) \
|
||||||
|
O(arenas_tdata_bypass, bool, no, no, no) \
|
||||||
|
O(narenas_tdata, uint32_t, yes, no, no) \
|
||||||
O(thread_allocated, uint64_t, yes, no, no) \
|
O(thread_allocated, uint64_t, yes, no, no) \
|
||||||
O(thread_deallocated, uint64_t, yes, no, no) \
|
O(thread_deallocated, uint64_t, yes, no, no) \
|
||||||
O(prof_tdata, prof_tdata_t *, yes, no, yes) \
|
O(prof_tdata, prof_tdata_t *, yes, no, yes) \
|
||||||
|
O(rtree_ctx, rtree_ctx_t, no, yes, no) \
|
||||||
O(iarena, arena_t *, yes, no, yes) \
|
O(iarena, arena_t *, yes, no, yes) \
|
||||||
O(arena, arena_t *, yes, no, yes) \
|
O(arena, arena_t *, yes, no, yes) \
|
||||||
O(arenas_tdata, arena_tdata_t *,yes, no, yes) \
|
O(arenas_tdata, arena_tdata_t *,yes, no, yes) \
|
||||||
O(narenas_tdata, unsigned, yes, no, no) \
|
O(tcache, tcache_t, yes, no, yes) \
|
||||||
O(arenas_tdata_bypass, bool, no, no, no) \
|
|
||||||
O(tcache_enabled, bool, yes, yes, no) \
|
|
||||||
O(rtree_ctx, rtree_ctx_t, no, yes, no) \
|
|
||||||
O(witnesses, witness_list_t, no, no, yes) \
|
O(witnesses, witness_list_t, no, no, yes) \
|
||||||
O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t, \
|
O(rtree_leaf_elm_witnesses, rtree_leaf_elm_witness_tsd_t, \
|
||||||
no, no, no) \
|
no, no, no) \
|
||||||
@ -34,17 +69,17 @@ struct tsd_init_head_s {
|
|||||||
|
|
||||||
#define TSD_INITIALIZER { \
|
#define TSD_INITIALIZER { \
|
||||||
tsd_state_uninitialized, \
|
tsd_state_uninitialized, \
|
||||||
TCACHE_ZERO_INITIALIZER, \
|
TCACHE_ENABLED_ZERO_INITIALIZER, \
|
||||||
0, \
|
|
||||||
0, \
|
|
||||||
NULL, \
|
|
||||||
NULL, \
|
|
||||||
NULL, \
|
|
||||||
NULL, \
|
|
||||||
0, \
|
|
||||||
false, \
|
false, \
|
||||||
TCACHE_ENABLED_DEFAULT, \
|
0, \
|
||||||
|
0, \
|
||||||
|
0, \
|
||||||
|
NULL, \
|
||||||
RTREE_CTX_ZERO_INITIALIZER, \
|
RTREE_CTX_ZERO_INITIALIZER, \
|
||||||
|
NULL, \
|
||||||
|
NULL, \
|
||||||
|
NULL, \
|
||||||
|
TCACHE_ZERO_INITIALIZER, \
|
||||||
ql_head_initializer(witnesses), \
|
ql_head_initializer(witnesses), \
|
||||||
RTREE_ELM_WITNESS_TSD_INITIALIZER, \
|
RTREE_ELM_WITNESS_TSD_INITIALIZER, \
|
||||||
false \
|
false \
|
||||||
|
@ -17,12 +17,14 @@ typedef struct tsdn_s tsdn_t;
|
|||||||
|
|
||||||
#define TSDN_NULL ((tsdn_t *)0)
|
#define TSDN_NULL ((tsdn_t *)0)
|
||||||
|
|
||||||
typedef enum {
|
enum {
|
||||||
tsd_state_uninitialized,
|
tsd_state_uninitialized = 0,
|
||||||
tsd_state_nominal,
|
tsd_state_nominal = 1,
|
||||||
tsd_state_purgatory,
|
tsd_state_purgatory = 2,
|
||||||
tsd_state_reincarnated
|
tsd_state_reincarnated = 3
|
||||||
} tsd_state_t;
|
};
|
||||||
|
/* Manually limit tsd_state_t to a single byte. */
|
||||||
|
typedef uint8_t tsd_state_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TLS/TSD-agnostic macro-based implementation of thread-specific data. There
|
* TLS/TSD-agnostic macro-based implementation of thread-specific data. There
|
||||||
|
16
src/arena.c
16
src/arena.c
@ -287,8 +287,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
|||||||
atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
|
atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
|
||||||
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
|
||||||
ql_foreach(tcache, &arena->tcache_ql, link) {
|
ql_foreach(tcache, &arena->tcache_ql, link) {
|
||||||
for (szind_t i = 0; i < nhbins; i++) {
|
szind_t i = 0;
|
||||||
tbin = &tcache->tbins[i];
|
for (; i < NBINS; i++) {
|
||||||
|
tbin = tcache_small_bin_get(tcache, i);
|
||||||
|
arena_stats_accum_zu(&astats->tcache_bytes,
|
||||||
|
tbin->ncached * index2size(i));
|
||||||
|
}
|
||||||
|
for (; i < nhbins; i++) {
|
||||||
|
tbin = tcache_large_bin_get(tcache, i);
|
||||||
arena_stats_accum_zu(&astats->tcache_bytes,
|
arena_stats_accum_zu(&astats->tcache_bytes,
|
||||||
tbin->ncached * index2size(i));
|
tbin->ncached * index2size(i));
|
||||||
}
|
}
|
||||||
@ -1317,8 +1323,8 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
|
arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
||||||
szind_t binind, uint64_t prof_accumbytes) {
|
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
|
||||||
unsigned i, nfill;
|
unsigned i, nfill;
|
||||||
arena_bin_t *bin;
|
arena_bin_t *bin;
|
||||||
|
|
||||||
@ -1330,7 +1336,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
|
|||||||
bin = &arena->bins[binind];
|
bin = &arena->bins[binind];
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
|
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
|
||||||
tbin->lg_fill_div); i < nfill; i++) {
|
tcache->lg_fill_div[binind]); i < nfill; i++) {
|
||||||
extent_t *slab;
|
extent_t *slab;
|
||||||
void *ptr;
|
void *ptr;
|
||||||
if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
|
if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
|
||||||
|
76
src/tcache.c
76
src/tcache.c
@ -40,9 +40,13 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
|
|||||||
void
|
void
|
||||||
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
||||||
szind_t binind = tcache->next_gc_bin;
|
szind_t binind = tcache->next_gc_bin;
|
||||||
tcache_bin_t *tbin = &tcache->tbins[binind];
|
|
||||||
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
|
|
||||||
|
|
||||||
|
tcache_bin_t *tbin;
|
||||||
|
if (binind < NBINS) {
|
||||||
|
tbin = tcache_small_bin_get(tcache, binind);
|
||||||
|
} else {
|
||||||
|
tbin = tcache_large_bin_get(tcache, binind);
|
||||||
|
}
|
||||||
if (tbin->low_water > 0) {
|
if (tbin->low_water > 0) {
|
||||||
/*
|
/*
|
||||||
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
* Flush (ceiling) 3/4 of the objects below the low water mark.
|
||||||
@ -51,24 +55,26 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
|
|||||||
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
tcache_bin_flush_small(tsd, tcache, tbin, binind,
|
||||||
tbin->ncached - tbin->low_water + (tbin->low_water
|
tbin->ncached - tbin->low_water + (tbin->low_water
|
||||||
>> 2));
|
>> 2));
|
||||||
|
/*
|
||||||
|
* Reduce fill count by 2X. Limit lg_fill_div such that
|
||||||
|
* the fill count is always at least 1.
|
||||||
|
*/
|
||||||
|
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
|
||||||
|
if ((tbin_info->ncached_max >>
|
||||||
|
(tcache->lg_fill_div[binind] + 1)) >= 1) {
|
||||||
|
tcache->lg_fill_div[binind]++;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
|
tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
|
||||||
- tbin->low_water + (tbin->low_water >> 2), tcache);
|
- tbin->low_water + (tbin->low_water >> 2), tcache);
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* Reduce fill count by 2X. Limit lg_fill_div such that the
|
|
||||||
* fill count is always at least 1.
|
|
||||||
*/
|
|
||||||
if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) {
|
|
||||||
tbin->lg_fill_div++;
|
|
||||||
}
|
|
||||||
} else if (tbin->low_water < 0) {
|
} else if (tbin->low_water < 0) {
|
||||||
/*
|
/*
|
||||||
* Increase fill count by 2X. Make sure lg_fill_div stays
|
* Increase fill count by 2X for small bins. Make sure
|
||||||
* greater than 0.
|
* lg_fill_div stays greater than 0.
|
||||||
*/
|
*/
|
||||||
if (tbin->lg_fill_div > 1) {
|
if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
|
||||||
tbin->lg_fill_div--;
|
tcache->lg_fill_div[binind]--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tbin->low_water = tbin->ncached;
|
tbin->low_water = tbin->ncached;
|
||||||
@ -85,8 +91,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
|
|||||||
void *ret;
|
void *ret;
|
||||||
|
|
||||||
assert(tcache->arena);
|
assert(tcache->arena);
|
||||||
arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
|
arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
|
||||||
tcache->prof_accumbytes : 0);
|
config_prof ? tcache->prof_accumbytes : 0);
|
||||||
if (config_prof) {
|
if (config_prof) {
|
||||||
tcache->prof_accumbytes = 0;
|
tcache->prof_accumbytes = 0;
|
||||||
}
|
}
|
||||||
@ -175,7 +181,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
|
|||||||
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
||||||
sizeof(void *));
|
sizeof(void *));
|
||||||
tbin->ncached = rem;
|
tbin->ncached = rem;
|
||||||
if ((int)tbin->ncached < tbin->low_water) {
|
if ((low_water_t)tbin->ncached < tbin->low_water) {
|
||||||
tbin->low_water = tbin->ncached;
|
tbin->low_water = tbin->ncached;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -273,7 +279,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
|
|||||||
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
|
||||||
sizeof(void *));
|
sizeof(void *));
|
||||||
tbin->ncached = rem;
|
tbin->ncached = rem;
|
||||||
if ((int)tbin->ncached < tbin->low_water) {
|
if ((low_water_t)tbin->ncached < tbin->low_water) {
|
||||||
tbin->low_water = tbin->ncached;
|
tbin->low_water = tbin->ncached;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -347,17 +353,24 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
|
|||||||
|
|
||||||
size_t stack_offset = 0;
|
size_t stack_offset = 0;
|
||||||
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
|
||||||
memset(tcache->tbins, 0, sizeof(tcache_bin_t) * nhbins);
|
memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
|
||||||
for (unsigned i = 0; i < nhbins; i++) {
|
memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
|
||||||
tcache->tbins[i].lg_fill_div = 1;
|
unsigned i = 0;
|
||||||
|
for (; i < NBINS; i++) {
|
||||||
|
tcache->lg_fill_div[i] = 1;
|
||||||
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
||||||
/*
|
/*
|
||||||
* avail points past the available space. Allocations will
|
* avail points past the available space. Allocations will
|
||||||
* access the slots toward higher addresses (for the benefit of
|
* access the slots toward higher addresses (for the benefit of
|
||||||
* prefetch).
|
* prefetch).
|
||||||
*/
|
*/
|
||||||
tcache->tbins[i].avail = (void **)((uintptr_t)avail_stack +
|
tcache_small_bin_get(tcache, i)->avail =
|
||||||
(uintptr_t)stack_offset);
|
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
||||||
|
}
|
||||||
|
for (; i < nhbins; i++) {
|
||||||
|
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
|
||||||
|
tcache_large_bin_get(tcache, i)->avail =
|
||||||
|
(void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
|
||||||
}
|
}
|
||||||
assert(stack_offset == stack_nelms * sizeof(void *));
|
assert(stack_offset == stack_nelms * sizeof(void *));
|
||||||
}
|
}
|
||||||
@ -370,7 +383,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
tcache_t *tcache = &tsd->tcache;
|
tcache_t *tcache = &tsd->tcache;
|
||||||
assert(tcache->tbins[0].avail == NULL);
|
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
||||||
size_t size = stack_nelms * sizeof(void *);
|
size_t size = stack_nelms * sizeof(void *);
|
||||||
/* Avoid false cacheline sharing. */
|
/* Avoid false cacheline sharing. */
|
||||||
size = sa2u(size, CACHELINE);
|
size = sa2u(size, CACHELINE);
|
||||||
@ -443,7 +456,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
|||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
for (i = 0; i < NBINS; i++) {
|
for (i = 0; i < NBINS; i++) {
|
||||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
||||||
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
|
||||||
|
|
||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
@ -451,7 +464,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (; i < nhbins; i++) {
|
for (; i < nhbins; i++) {
|
||||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
||||||
tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
|
tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
|
||||||
|
|
||||||
if (config_stats) {
|
if (config_stats) {
|
||||||
@ -483,7 +496,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
|
|||||||
|
|
||||||
if (tsd_tcache) {
|
if (tsd_tcache) {
|
||||||
/* Release the avail array for the TSD embedded auto tcache. */
|
/* Release the avail array for the TSD embedded auto tcache. */
|
||||||
void *avail_array = (void *)((uintptr_t)tcache->tbins[0].avail -
|
void *avail_array =
|
||||||
|
(void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
|
||||||
(uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
|
(uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
|
||||||
idalloctm(tsd_tsdn(tsd), avail_array, NULL, true, true);
|
idalloctm(tsd_tsdn(tsd), avail_array, NULL, true, true);
|
||||||
} else {
|
} else {
|
||||||
@ -503,16 +517,16 @@ tcache_cleanup(tsd_t *tsd) {
|
|||||||
if (!tcache_available(tsd)) {
|
if (!tcache_available(tsd)) {
|
||||||
assert(tsd_tcache_enabled_get(tsd) == false);
|
assert(tsd_tcache_enabled_get(tsd) == false);
|
||||||
if (config_debug) {
|
if (config_debug) {
|
||||||
assert(tcache->tbins[0].avail == NULL);
|
assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assert(tsd_tcache_enabled_get(tsd));
|
assert(tsd_tcache_enabled_get(tsd));
|
||||||
assert(tcache->tbins[0].avail != NULL);
|
assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
|
||||||
|
|
||||||
tcache_destroy(tsd, tcache, true);
|
tcache_destroy(tsd, tcache, true);
|
||||||
if (config_debug) {
|
if (config_debug) {
|
||||||
tcache->tbins[0].avail = NULL;
|
tcache_small_bin_get(tcache, 0)->avail = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,7 +539,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
|||||||
/* Merge and reset tcache stats. */
|
/* Merge and reset tcache stats. */
|
||||||
for (i = 0; i < NBINS; i++) {
|
for (i = 0; i < NBINS; i++) {
|
||||||
arena_bin_t *bin = &arena->bins[i];
|
arena_bin_t *bin = &arena->bins[i];
|
||||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||||
@ -533,7 +547,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (; i < nhbins; i++) {
|
for (; i < nhbins; i++) {
|
||||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
|
||||||
arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
|
arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
|
||||||
tbin->tstats.nrequests);
|
tbin->tstats.nrequests);
|
||||||
tbin->tstats.nrequests = 0;
|
tbin->tstats.nrequests = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user