Enhance the H/h MALLOC_OPTIONS flags to control the number of tcache bin slots,
rather than just enabling/disabling the tcache. Fix an off-by-one bug in large object stats recording.
This commit is contained in:
parent
3f3ecfb8e8
commit
279e09d1ff
@ -254,15 +254,21 @@ will disable dirty page purging.
|
||||
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
|
||||
@roff_tcache@will disable garbage collection.
|
||||
@roff_tcache@.It H
|
||||
@roff_tcache@When there are multiple threads, use thread-specific caching for
|
||||
@roff_tcache@small and medium objects.
|
||||
@roff_tcache@This option is enabled by default.
|
||||
@roff_tcache@Double/halve the number of thread-specific cache slots per size
|
||||
@roff_tcache@class.
|
||||
@roff_tcache@When there are multiple threads, each thread uses a
|
||||
@roff_tcache@thread-specific cache for small and medium objects.
|
||||
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
|
||||
@roff_tcache@without performing any thread synchronization, at the cost of
|
||||
@roff_tcache@increased memory use.
|
||||
@roff_tcache@See the
|
||||
@roff_tcache@.Dq G
|
||||
@roff_tcache@option for related tuning information.
|
||||
@roff_tcache@The default number of cache slots is 128;
|
||||
@roff_tcache@.Ev JEMALLOC_OPTIONS=7h
|
||||
@roff_tcache@will disable thread-specific caching.
|
||||
@roff_tcache@Note that one cache slot per size class is not a valid
|
||||
@roff_tcache@configuration due to implementation details.
|
||||
@roff_fill@.It J
|
||||
@roff_fill@Each byte of new memory allocated by
|
||||
@roff_fill@.Fn @jemalloc_prefix@malloc
|
||||
|
@ -272,9 +272,11 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.183 2008/12/01 10:20:59 jas
|
||||
PAGE_SHIFT)))
|
||||
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
/* Number of cache slots for each bin in the thread cache. */
|
||||
# define TCACHE_LG_NSLOTS 7
|
||||
# define TCACHE_NSLOTS (1U << TCACHE_LG_NSLOTS)
|
||||
/*
|
||||
* Default number of cache slots for each bin in the thread cache (0:
|
||||
* disabled).
|
||||
*/
|
||||
# define LG_TCACHE_NSLOTS_DEFAULT 7
|
||||
/*
|
||||
* (1U << opt_lg_tcache_gc_sweep) is the approximate number of
|
||||
* allocation events between full GC sweeps (-1: disabled). Integer
|
||||
@ -721,7 +723,7 @@ struct tcache_bin_s {
|
||||
unsigned low_water; /* Min # cached since last GC. */
|
||||
unsigned high_water; /* Max # cached since last GC. */
|
||||
unsigned ncached; /* # of cached objects. */
|
||||
void *slots[TCACHE_NSLOTS];
|
||||
void *slots[1]; /* Dynamically sized. */
|
||||
};
|
||||
|
||||
struct tcache_s {
|
||||
@ -1038,6 +1040,12 @@ static __thread tcache_t *tcache_tls
|
||||
*/
|
||||
static pthread_key_t tcache_tsd;
|
||||
|
||||
/*
|
||||
* Number of cache slots for each bin in the thread cache, or 0 if tcache is
|
||||
* disabled.
|
||||
*/
|
||||
size_t tcache_nslots;
|
||||
|
||||
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
||||
unsigned tcache_gc_incr;
|
||||
#endif
|
||||
@ -1080,7 +1088,7 @@ static bool opt_junk = false;
|
||||
# endif
|
||||
#endif
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
static bool opt_tcache = true;
|
||||
static size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
|
||||
static ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
||||
#endif
|
||||
static ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
|
||||
@ -3174,7 +3182,7 @@ tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
|
||||
arena = tcache->arena;
|
||||
bin = &arena->bins[binind];
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
for (i = 0; i < (TCACHE_NSLOTS >> 1); i++) {
|
||||
for (i = 0; i < (tcache_nslots >> 1); i++) {
|
||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||
ptr = arena_bin_malloc_easy(arena, bin, run);
|
||||
else
|
||||
@ -3185,7 +3193,7 @@ tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
|
||||
* Fill tbin such that the objects lowest in memory are used
|
||||
* first.
|
||||
*/
|
||||
tbin->slots[(TCACHE_NSLOTS >> 1) - 1 - i] = ptr;
|
||||
tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
|
||||
}
|
||||
#ifdef JEMALLOC_STATS
|
||||
bin->stats.nfills++;
|
||||
@ -3384,12 +3392,12 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
|
||||
#ifdef JEMALLOC_STATS
|
||||
arena->stats.nmalloc_large++;
|
||||
arena->stats.allocated_large += size;
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].nrequests++;
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns++;
|
||||
if (arena->stats.lstats[size >> PAGE_SHIFT].curruns >
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns) {
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns =
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns;
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
|
||||
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
|
||||
}
|
||||
#endif
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
@ -3415,7 +3423,7 @@ arena_malloc(size_t size, bool zero)
|
||||
|
||||
if (size <= bin_maxclass) {
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (isthreaded && opt_tcache) {
|
||||
if (isthreaded && tcache_nslots) {
|
||||
tcache_t *tcache = tcache_tls;
|
||||
if (tcache == NULL) {
|
||||
tcache = tcache_create(choose_arena());
|
||||
@ -3508,12 +3516,12 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
|
||||
#ifdef JEMALLOC_STATS
|
||||
arena->stats.nmalloc_large++;
|
||||
arena->stats.allocated_large += size;
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].nrequests++;
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns++;
|
||||
if (arena->stats.lstats[size >> PAGE_SHIFT].curruns >
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns) {
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns =
|
||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns;
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
|
||||
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
|
||||
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
|
||||
}
|
||||
#endif
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
@ -4013,7 +4021,7 @@ tcache_bin_sort(tcache_bin_t *tbin)
|
||||
{
|
||||
unsigned e, i;
|
||||
void **fr, **to;
|
||||
void *mslots[TCACHE_NSLOTS];
|
||||
void *mslots[tcache_nslots];
|
||||
|
||||
/*
|
||||
* Perform iterative merge sort, swapping source and destination arrays
|
||||
@ -4153,9 +4161,9 @@ tcache_dalloc(tcache_t *tcache, void *ptr)
|
||||
tcache->tbins[binind] = tbin;
|
||||
}
|
||||
|
||||
if (tbin->ncached == TCACHE_NSLOTS)
|
||||
tcache_bin_flush(tbin, binind, (TCACHE_NSLOTS >> 1));
|
||||
assert(tbin->ncached < TCACHE_NSLOTS);
|
||||
if (tbin->ncached == tcache_nslots)
|
||||
tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
|
||||
assert(tbin->ncached < tcache_nslots);
|
||||
tbin->slots[tbin->ncached] = ptr;
|
||||
tbin->ncached++;
|
||||
if (tbin->ncached > tbin->high_water)
|
||||
@ -4220,7 +4228,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
|
||||
if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
|
||||
/* Small allocation. */
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (isthreaded && opt_tcache) {
|
||||
if (isthreaded && tcache_nslots) {
|
||||
tcache_t *tcache = tcache_tls;
|
||||
if ((uintptr_t)tcache > (uintptr_t)1)
|
||||
tcache_dalloc(tcache, ptr);
|
||||
@ -4701,15 +4709,15 @@ static tcache_bin_t *
|
||||
tcache_bin_create(arena_t *arena)
|
||||
{
|
||||
tcache_bin_t *ret;
|
||||
size_t tsize;
|
||||
|
||||
if (sizeof(tcache_bin_t) <= small_maxclass) {
|
||||
ret = (tcache_bin_t *)arena_malloc_small(arena,
|
||||
sizeof(tcache_bin_t), false);
|
||||
} else if (sizeof(tcache_bin_t) <= bin_maxclass) {
|
||||
ret = (tcache_bin_t *)arena_malloc_medium(arena,
|
||||
sizeof(tcache_bin_t), false);
|
||||
} else
|
||||
ret = imalloc(sizeof(tcache_bin_t));
|
||||
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||
if (tsize <= small_maxclass)
|
||||
ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
|
||||
else if (tsize <= bin_maxclass)
|
||||
ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
|
||||
else
|
||||
ret = (tcache_bin_t *)imalloc(tsize);
|
||||
if (ret == NULL)
|
||||
return (NULL);
|
||||
#ifdef JEMALLOC_STATS
|
||||
@ -4727,7 +4735,7 @@ tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
|
||||
{
|
||||
arena_t *arena;
|
||||
arena_chunk_t *chunk;
|
||||
size_t pageind;
|
||||
size_t pageind, tsize;
|
||||
arena_chunk_map_t *mapelm;
|
||||
|
||||
chunk = CHUNK_ADDR2BASE(tbin);
|
||||
@ -4750,7 +4758,8 @@ tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
|
||||
#endif
|
||||
|
||||
assert(tbin->ncached == 0);
|
||||
if (sizeof(tcache_bin_t) <= bin_maxclass) {
|
||||
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||
if (tsize <= bin_maxclass) {
|
||||
malloc_mutex_lock(&arena->lock);
|
||||
arena_dalloc_bin(arena, chunk, tbin, mapelm);
|
||||
malloc_mutex_unlock(&arena->lock);
|
||||
@ -5622,10 +5631,13 @@ MALLOC_OUT:
|
||||
opt_lg_tcache_gc_sweep++;
|
||||
break;
|
||||
case 'h':
|
||||
opt_tcache = false;
|
||||
if (opt_lg_tcache_nslots > 0)
|
||||
opt_lg_tcache_nslots--;
|
||||
break;
|
||||
case 'H':
|
||||
opt_tcache = true;
|
||||
if (opt_lg_tcache_nslots + 1 <
|
||||
(sizeof(size_t) << 3))
|
||||
opt_lg_tcache_nslots++;
|
||||
break;
|
||||
#endif
|
||||
#ifdef JEMALLOC_FILL
|
||||
@ -5810,13 +5822,18 @@ MALLOC_OUT:
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
/* Compute incremental GC event threshold. */
|
||||
if (opt_lg_tcache_gc_sweep >= 0) {
|
||||
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
|
||||
nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == 0)
|
||||
? 0 : 1);
|
||||
if (opt_lg_tcache_nslots > 0) {
|
||||
tcache_nslots = (1U << opt_lg_tcache_nslots);
|
||||
|
||||
/* Compute incremental GC event threshold. */
|
||||
if (opt_lg_tcache_gc_sweep >= 0) {
|
||||
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
|
||||
nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
|
||||
0) ? 0 : 1);
|
||||
} else
|
||||
tcache_gc_incr = 0;
|
||||
} else
|
||||
tcache_gc_incr = 0;
|
||||
tcache_nslots = 0;
|
||||
#endif
|
||||
|
||||
/* Set variables according to the value of opt_lg_chunk. */
|
||||
@ -5914,7 +5931,7 @@ MALLOC_OUT:
|
||||
#endif
|
||||
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (opt_tcache) {
|
||||
if (tcache_nslots) {
|
||||
if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
|
||||
0) {
|
||||
malloc_message("<jemalloc>",
|
||||
@ -5938,7 +5955,7 @@ MALLOC_OUT:
|
||||
* default.
|
||||
*/
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (opt_tcache) {
|
||||
if (tcache_nslots) {
|
||||
/*
|
||||
* Only large object allocation/deallocation is
|
||||
* guaranteed to acquire an arena mutex, so we can get
|
||||
@ -6397,9 +6414,6 @@ malloc_stats_print(const char *opts)
|
||||
"\n", "");
|
||||
malloc_message("Boolean JEMALLOC_OPTIONS: ",
|
||||
opt_abort ? "A" : "a", "", "");
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
malloc_message(opt_tcache ? "H" : "h", "", "", "");
|
||||
#endif
|
||||
#ifdef JEMALLOC_FILL
|
||||
malloc_message(opt_junk ? "J" : "j", "", "", "");
|
||||
#endif
|
||||
@ -6459,18 +6473,17 @@ malloc_stats_print(const char *opts)
|
||||
"", "", "");
|
||||
}
|
||||
#ifdef JEMALLOC_TCACHE
|
||||
if (opt_tcache) {
|
||||
malloc_message("Thread cache GC sweep interval: ",
|
||||
(tcache_gc_incr > 0) ?
|
||||
umax2s((1U << opt_lg_tcache_gc_sweep), 10, s)
|
||||
: "N/A",
|
||||
"", "");
|
||||
malloc_message(" (increment interval: ",
|
||||
(tcache_gc_incr > 0) ?
|
||||
umax2s(tcache_gc_incr, 10, s)
|
||||
: "N/A",
|
||||
")\n", "");
|
||||
}
|
||||
malloc_message("Thread cache slots per size class: ",
|
||||
tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A",
|
||||
"\n", "");
|
||||
malloc_message("Thread cache GC sweep interval: ",
|
||||
(tcache_nslots && tcache_gc_incr > 0) ?
|
||||
umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A",
|
||||
"", "");
|
||||
malloc_message(" (increment interval: ",
|
||||
(tcache_nslots && tcache_gc_incr > 0) ?
|
||||
umax2s(tcache_gc_incr, 10, s) : "N/A",
|
||||
")\n", "");
|
||||
#endif
|
||||
malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "",
|
||||
"");
|
||||
|
Loading…
Reference in New Issue
Block a user