Enhance the H/h MALLOC_OPTIONS flags to control the number of tcache bin slots,
rather than just enabling/disabling the tcache. Fix an off-by-one bug in large object stats recording.
This commit is contained in:
parent
3f3ecfb8e8
commit
279e09d1ff
@ -254,15 +254,21 @@ will disable dirty page purging.
|
|||||||
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
|
@roff_tcache@.Ev JEMALLOC_OPTIONS=14g
|
||||||
@roff_tcache@will disable garbage collection.
|
@roff_tcache@will disable garbage collection.
|
||||||
@roff_tcache@.It H
|
@roff_tcache@.It H
|
||||||
@roff_tcache@When there are multiple threads, use thread-specific caching for
|
@roff_tcache@Double/halve the number of thread-specific cache slots per size
|
||||||
@roff_tcache@small and medium objects.
|
@roff_tcache@class.
|
||||||
@roff_tcache@This option is enabled by default.
|
@roff_tcache@When there are multiple threads, each thread uses a
|
||||||
|
@roff_tcache@thread-specific cache for small and medium objects.
|
||||||
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
|
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
|
||||||
@roff_tcache@without performing any thread synchronization, at the cost of
|
@roff_tcache@without performing any thread synchronization, at the cost of
|
||||||
@roff_tcache@increased memory use.
|
@roff_tcache@increased memory use.
|
||||||
@roff_tcache@See the
|
@roff_tcache@See the
|
||||||
@roff_tcache@.Dq G
|
@roff_tcache@.Dq G
|
||||||
@roff_tcache@option for related tuning information.
|
@roff_tcache@option for related tuning information.
|
||||||
|
@roff_tcache@The default number of cache slots is 128;
|
||||||
|
@roff_tcache@.Ev JEMALLOC_OPTIONS=7h
|
||||||
|
@roff_tcache@will disable thread-specific caching.
|
||||||
|
@roff_tcache@Note that one cache slot per size class is not a valid
|
||||||
|
@roff_tcache@configuration due to implementation details.
|
||||||
@roff_fill@.It J
|
@roff_fill@.It J
|
||||||
@roff_fill@Each byte of new memory allocated by
|
@roff_fill@Each byte of new memory allocated by
|
||||||
@roff_fill@.Fn @jemalloc_prefix@malloc
|
@roff_fill@.Fn @jemalloc_prefix@malloc
|
||||||
|
@ -272,9 +272,11 @@ __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.183 2008/12/01 10:20:59 jas
|
|||||||
PAGE_SHIFT)))
|
PAGE_SHIFT)))
|
||||||
|
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
/* Number of cache slots for each bin in the thread cache. */
|
/*
|
||||||
# define TCACHE_LG_NSLOTS 7
|
* Default number of cache slots for each bin in the thread cache (0:
|
||||||
# define TCACHE_NSLOTS (1U << TCACHE_LG_NSLOTS)
|
* disabled).
|
||||||
|
*/
|
||||||
|
# define LG_TCACHE_NSLOTS_DEFAULT 7
|
||||||
/*
|
/*
|
||||||
* (1U << opt_lg_tcache_gc_sweep) is the approximate number of
|
* (1U << opt_lg_tcache_gc_sweep) is the approximate number of
|
||||||
* allocation events between full GC sweeps (-1: disabled). Integer
|
* allocation events between full GC sweeps (-1: disabled). Integer
|
||||||
@ -721,7 +723,7 @@ struct tcache_bin_s {
|
|||||||
unsigned low_water; /* Min # cached since last GC. */
|
unsigned low_water; /* Min # cached since last GC. */
|
||||||
unsigned high_water; /* Max # cached since last GC. */
|
unsigned high_water; /* Max # cached since last GC. */
|
||||||
unsigned ncached; /* # of cached objects. */
|
unsigned ncached; /* # of cached objects. */
|
||||||
void *slots[TCACHE_NSLOTS];
|
void *slots[1]; /* Dynamically sized. */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tcache_s {
|
struct tcache_s {
|
||||||
@ -1038,6 +1040,12 @@ static __thread tcache_t *tcache_tls
|
|||||||
*/
|
*/
|
||||||
static pthread_key_t tcache_tsd;
|
static pthread_key_t tcache_tsd;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of cache slots for each bin in the thread cache, or 0 if tcache is
|
||||||
|
* disabled.
|
||||||
|
*/
|
||||||
|
size_t tcache_nslots;
|
||||||
|
|
||||||
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
/* Number of tcache allocation/deallocation events between incremental GCs. */
|
||||||
unsigned tcache_gc_incr;
|
unsigned tcache_gc_incr;
|
||||||
#endif
|
#endif
|
||||||
@ -1080,7 +1088,7 @@ static bool opt_junk = false;
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
static bool opt_tcache = true;
|
static size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
|
||||||
static ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
static ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
||||||
#endif
|
#endif
|
||||||
static ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
|
static ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
|
||||||
@ -3174,7 +3182,7 @@ tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
|
|||||||
arena = tcache->arena;
|
arena = tcache->arena;
|
||||||
bin = &arena->bins[binind];
|
bin = &arena->bins[binind];
|
||||||
malloc_mutex_lock(&arena->lock);
|
malloc_mutex_lock(&arena->lock);
|
||||||
for (i = 0; i < (TCACHE_NSLOTS >> 1); i++) {
|
for (i = 0; i < (tcache_nslots >> 1); i++) {
|
||||||
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
if ((run = bin->runcur) != NULL && run->nfree > 0)
|
||||||
ptr = arena_bin_malloc_easy(arena, bin, run);
|
ptr = arena_bin_malloc_easy(arena, bin, run);
|
||||||
else
|
else
|
||||||
@ -3185,7 +3193,7 @@ tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
|
|||||||
* Fill tbin such that the objects lowest in memory are used
|
* Fill tbin such that the objects lowest in memory are used
|
||||||
* first.
|
* first.
|
||||||
*/
|
*/
|
||||||
tbin->slots[(TCACHE_NSLOTS >> 1) - 1 - i] = ptr;
|
tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
|
||||||
}
|
}
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
bin->stats.nfills++;
|
bin->stats.nfills++;
|
||||||
@ -3384,12 +3392,12 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
|
|||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
arena->stats.nmalloc_large++;
|
arena->stats.nmalloc_large++;
|
||||||
arena->stats.allocated_large += size;
|
arena->stats.allocated_large += size;
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].nrequests++;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns++;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
|
||||||
if (arena->stats.lstats[size >> PAGE_SHIFT].curruns >
|
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns) {
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns =
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
malloc_mutex_unlock(&arena->lock);
|
malloc_mutex_unlock(&arena->lock);
|
||||||
@ -3415,7 +3423,7 @@ arena_malloc(size_t size, bool zero)
|
|||||||
|
|
||||||
if (size <= bin_maxclass) {
|
if (size <= bin_maxclass) {
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
if (isthreaded && opt_tcache) {
|
if (isthreaded && tcache_nslots) {
|
||||||
tcache_t *tcache = tcache_tls;
|
tcache_t *tcache = tcache_tls;
|
||||||
if (tcache == NULL) {
|
if (tcache == NULL) {
|
||||||
tcache = tcache_create(choose_arena());
|
tcache = tcache_create(choose_arena());
|
||||||
@ -3508,12 +3516,12 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
|
|||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
arena->stats.nmalloc_large++;
|
arena->stats.nmalloc_large++;
|
||||||
arena->stats.allocated_large += size;
|
arena->stats.allocated_large += size;
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].nrequests++;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns++;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
|
||||||
if (arena->stats.lstats[size >> PAGE_SHIFT].curruns >
|
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns) {
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].highruns =
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
|
||||||
arena->stats.lstats[size >> PAGE_SHIFT].curruns;
|
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
malloc_mutex_unlock(&arena->lock);
|
malloc_mutex_unlock(&arena->lock);
|
||||||
@ -4013,7 +4021,7 @@ tcache_bin_sort(tcache_bin_t *tbin)
|
|||||||
{
|
{
|
||||||
unsigned e, i;
|
unsigned e, i;
|
||||||
void **fr, **to;
|
void **fr, **to;
|
||||||
void *mslots[TCACHE_NSLOTS];
|
void *mslots[tcache_nslots];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform iterative merge sort, swapping source and destination arrays
|
* Perform iterative merge sort, swapping source and destination arrays
|
||||||
@ -4153,9 +4161,9 @@ tcache_dalloc(tcache_t *tcache, void *ptr)
|
|||||||
tcache->tbins[binind] = tbin;
|
tcache->tbins[binind] = tbin;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tbin->ncached == TCACHE_NSLOTS)
|
if (tbin->ncached == tcache_nslots)
|
||||||
tcache_bin_flush(tbin, binind, (TCACHE_NSLOTS >> 1));
|
tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
|
||||||
assert(tbin->ncached < TCACHE_NSLOTS);
|
assert(tbin->ncached < tcache_nslots);
|
||||||
tbin->slots[tbin->ncached] = ptr;
|
tbin->slots[tbin->ncached] = ptr;
|
||||||
tbin->ncached++;
|
tbin->ncached++;
|
||||||
if (tbin->ncached > tbin->high_water)
|
if (tbin->ncached > tbin->high_water)
|
||||||
@ -4220,7 +4228,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
|
|||||||
if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
|
if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
|
||||||
/* Small allocation. */
|
/* Small allocation. */
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
if (isthreaded && opt_tcache) {
|
if (isthreaded && tcache_nslots) {
|
||||||
tcache_t *tcache = tcache_tls;
|
tcache_t *tcache = tcache_tls;
|
||||||
if ((uintptr_t)tcache > (uintptr_t)1)
|
if ((uintptr_t)tcache > (uintptr_t)1)
|
||||||
tcache_dalloc(tcache, ptr);
|
tcache_dalloc(tcache, ptr);
|
||||||
@ -4701,15 +4709,15 @@ static tcache_bin_t *
|
|||||||
tcache_bin_create(arena_t *arena)
|
tcache_bin_create(arena_t *arena)
|
||||||
{
|
{
|
||||||
tcache_bin_t *ret;
|
tcache_bin_t *ret;
|
||||||
|
size_t tsize;
|
||||||
|
|
||||||
if (sizeof(tcache_bin_t) <= small_maxclass) {
|
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||||
ret = (tcache_bin_t *)arena_malloc_small(arena,
|
if (tsize <= small_maxclass)
|
||||||
sizeof(tcache_bin_t), false);
|
ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
|
||||||
} else if (sizeof(tcache_bin_t) <= bin_maxclass) {
|
else if (tsize <= bin_maxclass)
|
||||||
ret = (tcache_bin_t *)arena_malloc_medium(arena,
|
ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
|
||||||
sizeof(tcache_bin_t), false);
|
else
|
||||||
} else
|
ret = (tcache_bin_t *)imalloc(tsize);
|
||||||
ret = imalloc(sizeof(tcache_bin_t));
|
|
||||||
if (ret == NULL)
|
if (ret == NULL)
|
||||||
return (NULL);
|
return (NULL);
|
||||||
#ifdef JEMALLOC_STATS
|
#ifdef JEMALLOC_STATS
|
||||||
@ -4727,7 +4735,7 @@ tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
|
|||||||
{
|
{
|
||||||
arena_t *arena;
|
arena_t *arena;
|
||||||
arena_chunk_t *chunk;
|
arena_chunk_t *chunk;
|
||||||
size_t pageind;
|
size_t pageind, tsize;
|
||||||
arena_chunk_map_t *mapelm;
|
arena_chunk_map_t *mapelm;
|
||||||
|
|
||||||
chunk = CHUNK_ADDR2BASE(tbin);
|
chunk = CHUNK_ADDR2BASE(tbin);
|
||||||
@ -4750,7 +4758,8 @@ tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(tbin->ncached == 0);
|
assert(tbin->ncached == 0);
|
||||||
if (sizeof(tcache_bin_t) <= bin_maxclass) {
|
tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
|
||||||
|
if (tsize <= bin_maxclass) {
|
||||||
malloc_mutex_lock(&arena->lock);
|
malloc_mutex_lock(&arena->lock);
|
||||||
arena_dalloc_bin(arena, chunk, tbin, mapelm);
|
arena_dalloc_bin(arena, chunk, tbin, mapelm);
|
||||||
malloc_mutex_unlock(&arena->lock);
|
malloc_mutex_unlock(&arena->lock);
|
||||||
@ -5622,10 +5631,13 @@ MALLOC_OUT:
|
|||||||
opt_lg_tcache_gc_sweep++;
|
opt_lg_tcache_gc_sweep++;
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
opt_tcache = false;
|
if (opt_lg_tcache_nslots > 0)
|
||||||
|
opt_lg_tcache_nslots--;
|
||||||
break;
|
break;
|
||||||
case 'H':
|
case 'H':
|
||||||
opt_tcache = true;
|
if (opt_lg_tcache_nslots + 1 <
|
||||||
|
(sizeof(size_t) << 3))
|
||||||
|
opt_lg_tcache_nslots++;
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
#ifdef JEMALLOC_FILL
|
#ifdef JEMALLOC_FILL
|
||||||
@ -5810,13 +5822,18 @@ MALLOC_OUT:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
/* Compute incremental GC event threshold. */
|
if (opt_lg_tcache_nslots > 0) {
|
||||||
if (opt_lg_tcache_gc_sweep >= 0) {
|
tcache_nslots = (1U << opt_lg_tcache_nslots);
|
||||||
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
|
|
||||||
nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == 0)
|
/* Compute incremental GC event threshold. */
|
||||||
? 0 : 1);
|
if (opt_lg_tcache_gc_sweep >= 0) {
|
||||||
|
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
|
||||||
|
nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
|
||||||
|
0) ? 0 : 1);
|
||||||
|
} else
|
||||||
|
tcache_gc_incr = 0;
|
||||||
} else
|
} else
|
||||||
tcache_gc_incr = 0;
|
tcache_nslots = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Set variables according to the value of opt_lg_chunk. */
|
/* Set variables according to the value of opt_lg_chunk. */
|
||||||
@ -5914,7 +5931,7 @@ MALLOC_OUT:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
if (opt_tcache) {
|
if (tcache_nslots) {
|
||||||
if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
|
if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
|
||||||
0) {
|
0) {
|
||||||
malloc_message("<jemalloc>",
|
malloc_message("<jemalloc>",
|
||||||
@ -5938,7 +5955,7 @@ MALLOC_OUT:
|
|||||||
* default.
|
* default.
|
||||||
*/
|
*/
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
if (opt_tcache) {
|
if (tcache_nslots) {
|
||||||
/*
|
/*
|
||||||
* Only large object allocation/deallocation is
|
* Only large object allocation/deallocation is
|
||||||
* guaranteed to acquire an arena mutex, so we can get
|
* guaranteed to acquire an arena mutex, so we can get
|
||||||
@ -6397,9 +6414,6 @@ malloc_stats_print(const char *opts)
|
|||||||
"\n", "");
|
"\n", "");
|
||||||
malloc_message("Boolean JEMALLOC_OPTIONS: ",
|
malloc_message("Boolean JEMALLOC_OPTIONS: ",
|
||||||
opt_abort ? "A" : "a", "", "");
|
opt_abort ? "A" : "a", "", "");
|
||||||
#ifdef JEMALLOC_TCACHE
|
|
||||||
malloc_message(opt_tcache ? "H" : "h", "", "", "");
|
|
||||||
#endif
|
|
||||||
#ifdef JEMALLOC_FILL
|
#ifdef JEMALLOC_FILL
|
||||||
malloc_message(opt_junk ? "J" : "j", "", "", "");
|
malloc_message(opt_junk ? "J" : "j", "", "", "");
|
||||||
#endif
|
#endif
|
||||||
@ -6459,18 +6473,17 @@ malloc_stats_print(const char *opts)
|
|||||||
"", "", "");
|
"", "", "");
|
||||||
}
|
}
|
||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
if (opt_tcache) {
|
malloc_message("Thread cache slots per size class: ",
|
||||||
malloc_message("Thread cache GC sweep interval: ",
|
tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A",
|
||||||
(tcache_gc_incr > 0) ?
|
"\n", "");
|
||||||
umax2s((1U << opt_lg_tcache_gc_sweep), 10, s)
|
malloc_message("Thread cache GC sweep interval: ",
|
||||||
: "N/A",
|
(tcache_nslots && tcache_gc_incr > 0) ?
|
||||||
"", "");
|
umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A",
|
||||||
malloc_message(" (increment interval: ",
|
"", "");
|
||||||
(tcache_gc_incr > 0) ?
|
malloc_message(" (increment interval: ",
|
||||||
umax2s(tcache_gc_incr, 10, s)
|
(tcache_nslots && tcache_gc_incr > 0) ?
|
||||||
: "N/A",
|
umax2s(tcache_gc_incr, 10, s) : "N/A",
|
||||||
")\n", "");
|
")\n", "");
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "",
|
malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "",
|
||||||
"");
|
"");
|
||||||
|
Loading…
Reference in New Issue
Block a user