Simplify tcache object caching.
Use chains of cached objects, rather than using arrays of pointers. Since tcache_bin_t is no longer dynamically sized, convert tcache_t's tbin to an array of structures, rather than an array of pointers. This implicitly removes tcache_bin_{create,destroy}(), which further simplifies the fast path for malloc/free. Use cacheline alignment for tcache_t allocations. Remove runtime configuration option for number of tcache bin slots, and replace it with a boolean option for enabling/disabling tcache. Limit the number of tcache objects to the lesser of TCACHE_NSLOTS_MAX and 2X the number of regions per run for the size class. For GC-triggered flush, discard 3/4 of the objects below the low water mark, rather than 1/2.
This commit is contained in:
parent
2caa4715ed
commit
3fa9a2fad8
@ -376,8 +376,7 @@ will disable dirty page purging.
|
||||
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
|
||||
@roff_tcache@will disable garbage collection.
|
||||
@roff_tcache@.It H
|
||||
@roff_tcache@Double/halve the number of thread-specific cache slots per size
|
||||
@roff_tcache@class.
|
||||
@roff_tcache@Enable/disable thread-specific caching.
|
||||
@roff_tcache@When there are multiple threads, each thread uses a
|
||||
@roff_tcache@thread-specific cache for small and medium objects.
|
||||
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
|
||||
@ -386,11 +385,7 @@ will disable dirty page purging.
|
||||
@roff_tcache@See the
|
||||
@roff_tcache@.Dq G
|
||||
@roff_tcache@option for related tuning information.
|
||||
@roff_tcache@The default number of cache slots is 128;
|
||||
@roff_tcache@.Ev JEMALLOC_OPTIONS=7h
|
||||
@roff_tcache@will disable thread-specific caching.
|
||||
@roff_tcache@Note that one cache slot per size class is not a valid
|
||||
@roff_tcache@configuration due to implementation details.
|
||||
@roff_tcache@This option is enabled by default.
|
||||
@roff_prof@.It I
|
||||
@roff_prof@Double/halve the average interval between memory profile dumps, as
|
||||
@roff_prof@measured in bytes of allocation activity.
|
||||
@ -773,7 +768,7 @@ option.
|
||||
@roff_xmalloc@option.
|
||||
@roff_xmalloc@.Ed
|
||||
.\"-----------------------------------------------------------------------------
|
||||
@roff_tcache@.It Sy "opt.lg_tcache_nslots (size_t) r-"
|
||||
@roff_tcache@.It Sy "opt.tcache (bool) r-"
|
||||
@roff_tcache@.Bd -ragged -offset indent -compact
|
||||
@roff_tcache@See the
|
||||
@roff_tcache@.Dq H
|
||||
|
@ -18,7 +18,11 @@
|
||||
|
||||
#ifdef JEMALLOC_TINY
|
||||
/* Smallest size class to support. */
|
||||
# define LG_TINY_MIN 1
|
||||
# ifdef JEMALLOC_TCACHE
|
||||
# define LG_TINY_MIN LG_SIZEOF_PTR
|
||||
# else
|
||||
# define LG_TINY_MIN 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -6,10 +6,13 @@ typedef struct tcache_bin_s tcache_bin_t;
|
||||
typedef struct tcache_s tcache_t;
|
||||
|
||||
/*
|
||||
* Default number of cache slots for each bin in the thread cache (0:
|
||||
* disabled).
|
||||
* Absolute maximum number of cache slots for each bin in the thread cache.
|
||||
* This is an additional constraint beyond that imposed as: twice the number of
|
||||
* regions per run for this size class.
|
||||
*
|
||||
* This constant must be an even number.
|
||||
*/
|
||||
#define LG_TCACHE_NSLOTS_DEFAULT 7
|
||||
#define TCACHE_NSLOTS_MAX 200
|
||||
/*
|
||||
* (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
|
||||
* events between full GC sweeps (-1: disabled). Integer rounding may cause
|
||||
@ -29,7 +32,8 @@ struct tcache_bin_s {
|
||||
unsigned low_water; /* Min # cached since last GC. */
|
||||
unsigned high_water; /* Max # cached since last GC. */
|
||||
unsigned ncached; /* # of cached objects. */
|
||||
void *slots[1]; /* Dynamically sized. */
|
||||
unsigned ncached_max; /* Upper limit on ncached. */
|
||||
void *avail; /* Chain of available objects. */
|
||||
};
|
||||
|
||||
struct tcache_s {
|
||||
@ -42,26 +46,20 @@ struct tcache_s {
|
||||
arena_t *arena; /* This thread's arena. */
|
||||
unsigned ev_cnt; /* Event count since incremental GC. */
|
||||
unsigned next_gc_bin; /* Next bin to GC. */
|
||||
tcache_bin_t *tbins[1]; /* Dynamically sized. */
|
||||
tcache_bin_t tbins[1]; /* Dynamically sized. */
|
||||
};
|
||||
|
||||
#endif /* JEMALLOC_H_STRUCTS */
|
||||
/******************************************************************************/
|
||||
#ifdef JEMALLOC_H_EXTERNS
|
||||
|
||||
extern size_t opt_lg_tcache_nslots;
|
||||
extern bool opt_tcache;
|
||||
extern ssize_t opt_lg_tcache_gc_sweep;
|
||||
|
||||
/* Map of thread-specific caches. */
|
||||
extern __thread tcache_t *tcache_tls
|
||||
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
|
||||
/*
|
||||
* Number of cache slots for each bin in the thread cache, or 0 if tcache is
|
||||
* disabled.
|
||||
*/
|
||||
extern size_t tcache_nslots;
|
||||
|
||||
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
||||
extern unsigned tcache_gc_incr;
|
||||
|
||||
@ -71,10 +69,7 @@ void tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
|
||||
#endif
|
||||
);
|
||||
tcache_t *tcache_create(arena_t *arena);
|
||||
void tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
|
||||
unsigned binind);
|
||||
void *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind);
|
||||
tcache_bin_t *tcache_bin_create(arena_t *arena);
|
||||
void tcache_destroy(tcache_t *tcache);
|
||||
#ifdef JEMALLOC_STATS
|
||||
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
|
||||
@ -99,7 +94,7 @@ tcache_get(void)
|
||||
{
|
||||
tcache_t *tcache;
|
||||
|
||||
if (isthreaded == false || tcache_nslots == 0)
|
||||
if ((isthreaded & opt_tcache) == false)
|
||||
return (NULL);
|
||||
|
||||
tcache = tcache_tls;
|
||||
@ -124,37 +119,24 @@ tcache_event(tcache_t *tcache)
|
||||
|
||||
tcache->ev_cnt++;
|
||||
assert(tcache->ev_cnt <= tcache_gc_incr);
|
||||
if (tcache->ev_cnt >= tcache_gc_incr) {
|
||||
if (tcache->ev_cnt == tcache_gc_incr) {
|
||||
size_t binind = tcache->next_gc_bin;
|
||||
tcache_bin_t *tbin = tcache->tbins[binind];
|
||||
tcache_bin_t *tbin = &tcache->tbins[binind];
|
||||
|
||||
if (tbin != NULL) {
|
||||
if (tbin->high_water == 0) {
|
||||
/*
|
||||
* This bin went completely unused for an
|
||||
* entire GC cycle, so throw away the tbin.
|
||||
*/
|
||||
assert(tbin->ncached == 0);
|
||||
tcache_bin_destroy(tcache, tbin, binind);
|
||||
tcache->tbins[binind] = NULL;
|
||||
} else {
|
||||
if (tbin->low_water > 0) {
|
||||
/*
|
||||
* Flush (ceiling) half of the objects
|
||||
* below the low water mark.
|
||||
*/
|
||||
tcache_bin_flush(tbin, binind,
|
||||
tbin->ncached - (tbin->low_water >>
|
||||
1) - (tbin->low_water & 1)
|
||||
if (tbin->low_water > 0) {
|
||||
/*
|
||||
* Flush (ceiling) 3/4 of the objects below the low
|
||||
* water mark.
|
||||
*/
|
||||
tcache_bin_flush(tbin, binind, tbin->ncached -
|
||||
tbin->low_water + (tbin->low_water >> 2)
|
||||
#ifdef JEMALLOC_PROF
|
||||
, tcache
|
||||
, tcache
|
||||
#endif
|
||||
);
|
||||
}
|
||||
tbin->low_water = tbin->ncached;
|
||||
tbin->high_water = tbin->ncached;
|
||||
}
|
||||
);
|
||||
}
|
||||
tbin->low_water = tbin->ncached;
|
||||
tbin->high_water = tbin->ncached;
|
||||
|
||||
tcache->next_gc_bin++;
|
||||
if (tcache->next_gc_bin == nbins)
|
||||
@ -166,21 +148,24 @@ tcache_event(tcache_t *tcache)
|
||||
JEMALLOC_INLINE void *
|
||||
tcache_bin_alloc(tcache_bin_t *tbin)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
if (tbin->ncached == 0)
|
||||
return (NULL);
|
||||
tbin->ncached--;
|
||||
if (tbin->ncached < tbin->low_water)
|
||||
tbin->low_water = tbin->ncached;
|
||||
return (tbin->slots[tbin->ncached]);
|
||||
ret = tbin->avail;
|
||||
tbin->avail = *(void **)ret;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
JEMALLOC_INLINE void *
|
||||
tcache_alloc(tcache_t *tcache, size_t size, bool zero)
|
||||
{
|
||||
void *ret;
|
||||
tcache_bin_t *tbin;
|
||||
size_t binind;
|
||||
tcache_bin_t *tbin;
|
||||
|
||||
if (size <= small_maxclass)
|
||||
binind = small_size2bin[size];
|
||||
@ -189,14 +174,7 @@ tcache_alloc(tcache_t *tcache, size_t size, bool zero)
|
||||
lg_mspace);
|
||||
}
|
||||
assert(binind < nbins);
|
||||
tbin = tcache->tbins[binind];
|
||||
if (tbin == NULL) {
|
||||
tbin = tcache_bin_create(tcache->arena);
|
||||
if (tbin == NULL)
|
||||
return (NULL);
|
||||
tcache->tbins[binind] = tbin;
|
||||
}
|
||||
|
||||
tbin = &tcache->tbins[binind];
|
||||
ret = tcache_bin_alloc(tbin);
|
||||
if (ret == NULL) {
|
||||
ret = tcache_alloc_hard(tcache, tbin, binind);
|
||||
@ -250,29 +228,20 @@ tcache_dalloc(tcache_t *tcache, void *ptr)
|
||||
|
||||
#ifdef JEMALLOC_FILL
|
||||
if (opt_junk)
|
||||
memset(ptr, 0x5a, arena->bins[binind].reg_size);
|
||||
memset(ptr, 0x5a, bin->reg_size);
|
||||
#endif
|
||||
|
||||
tbin = tcache->tbins[binind];
|
||||
if (tbin == NULL) {
|
||||
tbin = tcache_bin_create(choose_arena());
|
||||
if (tbin == NULL) {
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
arena_dalloc_bin(arena, chunk, ptr, mapelm);
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
return;
|
||||
}
|
||||
tcache->tbins[binind] = tbin;
|
||||
}
|
||||
|
||||
if (tbin->ncached == tcache_nslots)
|
||||
tcache_bin_flush(tbin, binind, (tcache_nslots >> 1)
|
||||
tbin = &tcache->tbins[binind];
|
||||
if (tbin->ncached == tbin->ncached_max) {
|
||||
tcache_bin_flush(tbin, binind, (tbin->ncached_max >> 1)
|
||||
#ifdef JEMALLOC_PROF
|
||||
, tcache
|
||||
#endif
|
||||
);
|
||||
assert(tbin->ncached < tcache_nslots);
|
||||
tbin->slots[tbin->ncached] = ptr;
|
||||
}
|
||||
assert(tbin->ncached < tbin->ncached_max);
|
||||
*(void **)ptr = tbin->avail;
|
||||
tbin->avail = ptr;
|
||||
tbin->ncached++;
|
||||
if (tbin->ncached > tbin->high_water)
|
||||
tbin->high_water = tbin->ncached;
|
||||
|
@ -53,13 +53,26 @@ static malloc_mutex_t purge_lock;
|
||||
static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
||||
S2B_1(0xffU) /* 0 */
|
||||
#if (LG_QUANTUM == 4)
|
||||
/* 64-bit system ************************/
|
||||
/* 16-byte quantum **********************/
|
||||
# ifdef JEMALLOC_TINY
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
S2B_8(3) /* 16 */
|
||||
# if (LG_TINY_MIN == 1)
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
S2B_8(3) /* 16 */
|
||||
# define S2B_QMIN 3
|
||||
# elif (LG_TINY_MIN == 2)
|
||||
S2B_4(0) /* 4 */
|
||||
S2B_4(1) /* 8 */
|
||||
S2B_8(2) /* 16 */
|
||||
# define S2B_QMIN 2
|
||||
# elif (LG_TINY_MIN == 3)
|
||||
S2B_8(0) /* 8 */
|
||||
S2B_8(1) /* 16 */
|
||||
# define S2B_QMIN 1
|
||||
# else
|
||||
# error "Unsupported LG_TINY_MIN"
|
||||
# endif
|
||||
# else
|
||||
S2B_16(0) /* 16 */
|
||||
# define S2B_QMIN 0
|
||||
@ -73,12 +86,20 @@ static const uint8_t const_small_size2bin[STATIC_PAGE_SIZE - 255] = {
|
||||
S2B_16(S2B_QMIN + 7) /* 128 */
|
||||
# define S2B_CMIN (S2B_QMIN + 8)
|
||||
#else
|
||||
/* 32-bit system ************************/
|
||||
/* 8-byte quantum ***********************/
|
||||
# ifdef JEMALLOC_TINY
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
# if (LG_TINY_MIN == 1)
|
||||
S2B_2(0) /* 2 */
|
||||
S2B_2(1) /* 4 */
|
||||
S2B_4(2) /* 8 */
|
||||
# define S2B_QMIN 2
|
||||
# elif (LG_TINY_MIN == 2)
|
||||
S2B_4(0) /* 4 */
|
||||
S2B_4(1) /* 8 */
|
||||
# define S2B_QMIN 1
|
||||
# else
|
||||
# error "Unsupported LG_TINY_MIN"
|
||||
# endif
|
||||
# else
|
||||
S2B_8(0) /* 8 */
|
||||
# define S2B_QMIN 0
|
||||
@ -1048,28 +1069,15 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
|
||||
#ifdef JEMALLOC_PROF
|
||||
arena_prof_accum(arena, prof_accumbytes);
|
||||
#endif
|
||||
for (i = 0, nfill = (tcache_nslots >> 1); i < nfill; i++) {
|
||||
for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) {
|
||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||
ptr = arena_bin_malloc_easy(arena, bin, run);
|
||||
else
|
||||
ptr = arena_bin_malloc_hard(arena, bin);
|
||||
if (ptr == NULL) {
|
||||
if (i > 0) {
|
||||
/*
|
||||
* Move valid pointers to the base of
|
||||
* tbin->slots.
|
||||
*/
|
||||
memmove(&tbin->slots[0],
|
||||
&tbin->slots[nfill - i],
|
||||
i * sizeof(void *));
|
||||
}
|
||||
if (ptr == NULL)
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Fill slots such that the objects lowest in memory come last.
|
||||
* This causes tcache to use low objects first.
|
||||
*/
|
||||
tbin->slots[nfill - 1 - i] = ptr;
|
||||
*(void **)ptr = tbin->avail;
|
||||
tbin->avail = ptr;
|
||||
}
|
||||
#ifdef JEMALLOC_STATS
|
||||
bin->stats.nfills++;
|
||||
|
@ -64,7 +64,7 @@ CTL_PROTO(opt_xmalloc)
|
||||
CTL_PROTO(opt_zero)
|
||||
#endif
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
CTL_PROTO(opt_lg_tcache_nslots)
|
||||
CTL_PROTO(opt_tcache)
|
||||
CTL_PROTO(opt_lg_tcache_gc_sweep)
|
||||
#endif
|
||||
#ifdef JEMALLOC_PROF
|
||||
@ -230,7 +230,7 @@ static const ctl_node_t opt_node[] = {
|
||||
{NAME("zero"), CTL(opt_zero)},
|
||||
#endif
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
{NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
|
||||
{NAME("tcache"), CTL(opt_tcache)},
|
||||
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
|
||||
#endif
|
||||
#ifdef JEMALLOC_PROF
|
||||
@ -1070,7 +1070,7 @@ CTL_RO_GEN(opt_xmalloc, opt_xmalloc, bool)
|
||||
CTL_RO_GEN(opt_zero, opt_zero, bool)
|
||||
#endif
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
|
||||
CTL_RO_GEN(opt_tcache, opt_tcache, bool)
|
||||
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
|
||||
#endif
|
||||
#ifdef JEMALLOC_PROF
|
||||
|
@ -482,13 +482,10 @@ MALLOC_OUT:
|
||||
opt_lg_tcache_gc_sweep++;
|
||||
break;
|
||||
case 'h':
|
||||
if (opt_lg_tcache_nslots > 0)
|
||||
opt_lg_tcache_nslots--;
|
||||
opt_tcache = false;
|
||||
break;
|
||||
case 'H':
|
||||
if (opt_lg_tcache_nslots + 1 <
|
||||
(sizeof(size_t) << 3))
|
||||
opt_lg_tcache_nslots++;
|
||||
opt_tcache = true;
|
||||
break;
|
||||
#endif
|
||||
#ifdef JEMALLOC_PROF
|
||||
@ -729,7 +726,7 @@ MALLOC_OUT:
|
||||
* default.
|
||||
*/
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (tcache_nslots
|
||||
if (opt_tcache
|
||||
# ifdef JEMALLOC_PROF
|
||||
/*
|
||||
* Profile data storage concurrency is directly linked to
|
||||
|
@ -440,6 +440,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
|
||||
== 0)
|
||||
write_cb(cbopaque, bv ? "F" : "f");
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.tcache", &bv, &bsz, NULL,
|
||||
0)) == 0)
|
||||
write_cb(cbopaque, bv ? "H" : "h");
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.junk", &bv, &bsz, NULL, 0))
|
||||
== 0)
|
||||
write_cb(cbopaque, bv ? "J" : "j");
|
||||
@ -550,21 +553,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
write_cb(cbopaque,
|
||||
"Min active:dirty page ratio per arena: N/A\n");
|
||||
}
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_nslots", &sv,
|
||||
if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
|
||||
&ssz, NULL, 0)) == 0) {
|
||||
size_t tcache_nslots, tcache_gc_sweep;
|
||||
|
||||
tcache_nslots = (1U << sv);
|
||||
write_cb(cbopaque,
|
||||
"Thread cache slots per size class: ");
|
||||
write_cb(cbopaque, tcache_nslots ?
|
||||
umax2s(tcache_nslots, 10, s) : "N/A");
|
||||
write_cb(cbopaque, "\n");
|
||||
|
||||
CTL_GET("opt.lg_tcache_gc_sweep", &ssv, ssize_t);
|
||||
tcache_gc_sweep = (1U << ssv);
|
||||
size_t tcache_gc_sweep = (1U << ssv);
|
||||
bool tcache_enabled;
|
||||
CTL_GET("opt.tcache", &tcache_enabled, bool);
|
||||
write_cb(cbopaque, "Thread cache GC sweep interval: ");
|
||||
write_cb(cbopaque, tcache_nslots && ssv >= 0 ?
|
||||
write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
|
||||
umax2s(tcache_gc_sweep, 10, s) : "N/A");
|
||||
write_cb(cbopaque, "\n");
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
/******************************************************************************/
|
||||
/* Data. */
|
||||
|
||||
size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
|
||||
bool opt_tcache = true;
|
||||
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
||||
|
||||
/* Map of thread-specific caches. */
|
||||
@ -16,7 +16,6 @@ __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||
*/
|
||||
static pthread_key_t tcache_tsd;
|
||||
|
||||
size_t tcache_nslots;
|
||||
unsigned tcache_gc_incr;
|
||||
|
||||
/******************************************************************************/
|
||||
@ -51,16 +50,14 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
|
||||
#endif
|
||||
)
|
||||
{
|
||||
arena_chunk_t *chunk;
|
||||
arena_t *arena;
|
||||
void *ptr;
|
||||
unsigned i, ndeferred, ncached;
|
||||
void *flush, *deferred, *ptr;
|
||||
unsigned i, nflush, ndeferred;
|
||||
|
||||
for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
|
||||
ncached = ndeferred;
|
||||
for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
|
||||
flush = deferred, nflush = ndeferred) {
|
||||
/* Lock the arena associated with the first object. */
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
|
||||
arena = chunk->arena;
|
||||
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
|
||||
arena_t *arena = chunk->arena;
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (arena == tcache->arena) {
|
||||
@ -68,9 +65,12 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
|
||||
tcache->prof_accumbytes = 0;
|
||||
}
|
||||
#endif
|
||||
/* Deallocate every object that belongs to the locked arena. */
|
||||
for (i = ndeferred = 0; i < ncached; i++) {
|
||||
ptr = tbin->slots[i];
|
||||
deferred = NULL;
|
||||
ndeferred = 0;
|
||||
for (i = 0; i < nflush; i++) {
|
||||
ptr = flush;
|
||||
assert(ptr != NULL);
|
||||
flush = *(void **)ptr;
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
if (chunk->arena == arena) {
|
||||
size_t pageind = (((uintptr_t)ptr -
|
||||
@ -85,7 +85,8 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
|
||||
* Stash the object, so that it can be handled
|
||||
* in a future pass.
|
||||
*/
|
||||
tbin->slots[ndeferred] = ptr;
|
||||
*(void **)ptr = deferred;
|
||||
deferred = ptr;
|
||||
ndeferred++;
|
||||
}
|
||||
}
|
||||
@ -105,98 +106,41 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
|
||||
}
|
||||
#endif
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
|
||||
if (flush != NULL) {
|
||||
/*
|
||||
* This was the first pass, and rem cached objects
|
||||
* remain.
|
||||
*/
|
||||
tbin->avail = flush;
|
||||
}
|
||||
}
|
||||
|
||||
if (rem > 0) {
|
||||
/*
|
||||
* Shift the remaining valid pointers to the base of the slots
|
||||
* array.
|
||||
*/
|
||||
memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
|
||||
rem * sizeof(void *));
|
||||
}
|
||||
tbin->ncached = rem;
|
||||
}
|
||||
|
||||
tcache_bin_t *
|
||||
tcache_bin_create(arena_t *arena)
|
||||
{
|
||||
tcache_bin_t *ret;
|
||||
size_t tsize;
|
||||
|
||||
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||
if (tsize <= small_maxclass)
|
||||
ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
|
||||
else if (tsize <= bin_maxclass)
|
||||
ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
|
||||
else
|
||||
ret = (tcache_bin_t *)imalloc(tsize);
|
||||
if (ret == NULL)
|
||||
return (NULL);
|
||||
#ifdef JEMALLOC_STATS
|
||||
memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
|
||||
#endif
|
||||
ret->low_water = 0;
|
||||
ret->high_water = 0;
|
||||
ret->ncached = 0;
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
void
|
||||
tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
|
||||
{
|
||||
arena_t *arena;
|
||||
arena_chunk_t *chunk;
|
||||
size_t pageind, tsize;
|
||||
arena_chunk_map_t *mapelm;
|
||||
|
||||
chunk = CHUNK_ADDR2BASE(tbin);
|
||||
arena = chunk->arena;
|
||||
pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
mapelm = &chunk->map[pageind];
|
||||
|
||||
#ifdef JEMALLOC_STATS
|
||||
if (tbin->tstats.nrequests != 0) {
|
||||
arena_t *arena = tcache->arena;
|
||||
arena_bin_t *bin = &arena->bins[binind];
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
if (bin->reg_size <= small_maxclass)
|
||||
arena->stats.nmalloc_small += tbin->tstats.nrequests;
|
||||
else
|
||||
arena->stats.nmalloc_medium += tbin->tstats.nrequests;
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(tbin->ncached == 0);
|
||||
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||
if (tsize <= bin_maxclass) {
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
arena_dalloc_bin(arena, chunk, tbin, mapelm);
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
} else
|
||||
idalloc(tbin);
|
||||
}
|
||||
|
||||
tcache_t *
|
||||
tcache_create(arena_t *arena)
|
||||
{
|
||||
tcache_t *tcache;
|
||||
size_t size;
|
||||
unsigned i;
|
||||
|
||||
if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
|
||||
small_maxclass) {
|
||||
tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
|
||||
+ (sizeof(tcache_bin_t *) * (nbins - 1)), true);
|
||||
} else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
|
||||
bin_maxclass) {
|
||||
tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
|
||||
+ (sizeof(tcache_bin_t *) * (nbins - 1)), true);
|
||||
} else {
|
||||
tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
|
||||
(sizeof(tcache_bin_t *) * (nbins - 1)));
|
||||
}
|
||||
size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nbins - 1));
|
||||
/*
|
||||
* Round up to the nearest multiple of the cacheline size, in order to
|
||||
* avoid the possibility of false cacheline sharing.
|
||||
*
|
||||
* That this works relies on the same logic as in ipalloc().
|
||||
*/
|
||||
size = (size + CACHELINE_MASK) & (-CACHELINE);
|
||||
|
||||
if (size <= small_maxclass)
|
||||
tcache = (tcache_t *)arena_malloc_small(arena, size, true);
|
||||
else if (size <= bin_maxclass)
|
||||
tcache = (tcache_t *)arena_malloc_medium(arena, size, true);
|
||||
else
|
||||
tcache = (tcache_t *)icalloc(size);
|
||||
|
||||
if (tcache == NULL)
|
||||
return (NULL);
|
||||
@ -210,6 +154,14 @@ tcache_create(arena_t *arena)
|
||||
#endif
|
||||
|
||||
tcache->arena = arena;
|
||||
assert((TCACHE_NSLOTS_MAX & 1U) == 0);
|
||||
for (i = 0; i < nbins; i++) {
|
||||
if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_MAX) {
|
||||
tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
|
||||
1);
|
||||
} else
|
||||
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_MAX;
|
||||
}
|
||||
|
||||
tcache_tls = tcache;
|
||||
pthread_setspecific(tcache_tsd, tcache);
|
||||
@ -231,15 +183,29 @@ tcache_destroy(tcache_t *tcache)
|
||||
#endif
|
||||
|
||||
for (i = 0; i < nbins; i++) {
|
||||
tcache_bin_t *tbin = tcache->tbins[i];
|
||||
if (tbin != NULL) {
|
||||
tcache_bin_flush(tbin, i, 0
|
||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
||||
tcache_bin_flush(tbin, i, 0
|
||||
#ifdef JEMALLOC_PROF
|
||||
, tcache
|
||||
, tcache
|
||||
#endif
|
||||
);
|
||||
tcache_bin_destroy(tcache, tbin, i);
|
||||
);
|
||||
|
||||
#ifdef JEMALLOC_STATS
|
||||
if (tbin->tstats.nrequests != 0) {
|
||||
arena_t *arena = tcache->arena;
|
||||
arena_bin_t *bin = &arena->bins[i];
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
if (bin->reg_size <= small_maxclass) {
|
||||
arena->stats.nmalloc_small +=
|
||||
tbin->tstats.nrequests;
|
||||
} else {
|
||||
arena->stats.nmalloc_medium +=
|
||||
tbin->tstats.nrequests;
|
||||
}
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
@ -286,21 +252,17 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
|
||||
/* Merge and reset tcache stats. */
|
||||
for (i = 0; i < mbin0; i++) {
|
||||
arena_bin_t *bin = &arena->bins[i];
|
||||
tcache_bin_t *tbin = tcache->tbins[i];
|
||||
if (tbin != NULL) {
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
arena->stats.nmalloc_small += tbin->tstats.nrequests;
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
arena->stats.nmalloc_small += tbin->tstats.nrequests;
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
for (; i < nbins; i++) {
|
||||
arena_bin_t *bin = &arena->bins[i];
|
||||
tcache_bin_t *tbin = tcache->tbins[i];
|
||||
if (tbin != NULL) {
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
arena->stats.nmalloc_medium += tbin->tstats.nrequests;
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
tcache_bin_t *tbin = &tcache->tbins[i];
|
||||
bin->stats.nrequests += tbin->tstats.nrequests;
|
||||
arena->stats.nmalloc_medium += tbin->tstats.nrequests;
|
||||
tbin->tstats.nrequests = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -309,9 +271,7 @@ void
|
||||
tcache_boot(void)
|
||||
{
|
||||
|
||||
if (opt_lg_tcache_nslots > 0) {
|
||||
tcache_nslots = (1U << opt_lg_tcache_nslots);
|
||||
|
||||
if (opt_tcache) {
|
||||
/* Compute incremental GC event threshold. */
|
||||
if (opt_lg_tcache_gc_sweep >= 0) {
|
||||
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
|
||||
@ -319,10 +279,7 @@ tcache_boot(void)
|
||||
0) ? 0 : 1);
|
||||
} else
|
||||
tcache_gc_incr = 0;
|
||||
} else
|
||||
tcache_nslots = 0;
|
||||
|
||||
if (tcache_nslots != 0) {
|
||||
if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
|
||||
0) {
|
||||
malloc_write(
|
||||
|
Loading…
Reference in New Issue
Block a user