Remove tcache bin sorting during flush.
This feature caused significant performance degradation, and the fragmentation reduction benefits were difficult to quantify.
This commit is contained in:
parent
c66aaf1476
commit
d8f565f239
@ -414,15 +414,6 @@ Double/halve the size of the maximum size class that is a multiple of the
|
|||||||
quantum (8 or 16 bytes, depending on architecture).
|
quantum (8 or 16 bytes, depending on architecture).
|
||||||
Above this size, cacheline spacing is used for size classes.
|
Above this size, cacheline spacing is used for size classes.
|
||||||
The default value is 128 bytes.
|
The default value is 128 bytes.
|
||||||
@roff_tcache@.It S
|
|
||||||
@roff_tcache@Sort the objects of a particular size class that are stored in a
|
|
||||||
@roff_tcache@thread-specific cache just before flushing some of them from the
|
|
||||||
@roff_tcache@cache, such that the objects highest in memory are preferentially
|
|
||||||
@roff_tcache@freed.
|
|
||||||
@roff_tcache@This tends to reduce fragmentation, but sorting is (n lg n), and in
|
|
||||||
@roff_tcache@practice it is expensive enough to have a moderate performance
|
|
||||||
@roff_tcache@impact.
|
|
||||||
@roff_tcache@This option is enabled by default.
|
|
||||||
@roff_trace@.It T
|
@roff_trace@.It T
|
||||||
@roff_trace@Write a verbose trace log to a set of files named according to the
|
@roff_trace@Write a verbose trace log to a set of files named according to the
|
||||||
@roff_trace@pattern
|
@roff_trace@pattern
|
||||||
@ -722,13 +713,6 @@ option.
|
|||||||
@roff_tcache@option.
|
@roff_tcache@option.
|
||||||
@roff_tcache@.Ed
|
@roff_tcache@.Ed
|
||||||
.\"-----------------------------------------------------------------------------
|
.\"-----------------------------------------------------------------------------
|
||||||
@roff_tcache@.It Sy "opt.tcache_sort (bool) r-"
|
|
||||||
@roff_tcache@.Bd -ragged -offset indent -compact
|
|
||||||
@roff_tcache@See the
|
|
||||||
@roff_tcache@.Dq S
|
|
||||||
@roff_tcache@option.
|
|
||||||
@roff_tcache@.Ed
|
|
||||||
.\"-----------------------------------------------------------------------------
|
|
||||||
.It Sy "opt.stats_print (bool) r-"
|
.It Sy "opt.stats_print (bool) r-"
|
||||||
.Bd -ragged -offset indent -compact
|
.Bd -ragged -offset indent -compact
|
||||||
See the
|
See the
|
||||||
|
@ -48,7 +48,6 @@ struct tcache_s {
|
|||||||
|
|
||||||
extern size_t opt_lg_tcache_nslots;
|
extern size_t opt_lg_tcache_nslots;
|
||||||
extern ssize_t opt_lg_tcache_gc_sweep;
|
extern ssize_t opt_lg_tcache_gc_sweep;
|
||||||
extern bool opt_tcache_sort;
|
|
||||||
|
|
||||||
/* Map of thread-specific caches. */
|
/* Map of thread-specific caches. */
|
||||||
extern __thread tcache_t *tcache_tls
|
extern __thread tcache_t *tcache_tls
|
||||||
|
@ -655,14 +655,6 @@ MALLOC_OUT:
|
|||||||
opt_lg_cspace_max)
|
opt_lg_cspace_max)
|
||||||
opt_lg_qspace_max++;
|
opt_lg_qspace_max++;
|
||||||
break;
|
break;
|
||||||
#ifdef JEMALLOC_TCACHE
|
|
||||||
case 's':
|
|
||||||
opt_tcache_sort = false;
|
|
||||||
break;
|
|
||||||
case 'S':
|
|
||||||
opt_tcache_sort = true;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#ifdef JEMALLOC_TRACE
|
#ifdef JEMALLOC_TRACE
|
||||||
case 't':
|
case 't':
|
||||||
opt_trace = false;
|
opt_trace = false;
|
||||||
|
@ -63,7 +63,6 @@ CTL_PROTO(opt_zero)
|
|||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
CTL_PROTO(opt_lg_tcache_nslots)
|
CTL_PROTO(opt_lg_tcache_nslots)
|
||||||
CTL_PROTO(opt_lg_tcache_gc_sweep)
|
CTL_PROTO(opt_lg_tcache_gc_sweep)
|
||||||
CTL_PROTO(opt_tcache_sort)
|
|
||||||
#endif
|
#endif
|
||||||
CTL_PROTO(opt_stats_print)
|
CTL_PROTO(opt_stats_print)
|
||||||
#ifdef JEMALLOC_TRACE
|
#ifdef JEMALLOC_TRACE
|
||||||
@ -219,7 +218,6 @@ static const ctl_node_t opt_node[] = {
|
|||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
{NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
|
{NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
|
||||||
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
|
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
|
||||||
{NAME("tcache_sort"), CTL(opt_tcache_sort)},
|
|
||||||
#endif
|
#endif
|
||||||
{NAME("stats_print"), CTL(opt_stats_print)},
|
{NAME("stats_print"), CTL(opt_stats_print)},
|
||||||
#ifdef JEMALLOC_TRACE
|
#ifdef JEMALLOC_TRACE
|
||||||
@ -1026,7 +1024,6 @@ CTL_RO_GEN(opt_zero, opt_zero, bool)
|
|||||||
#ifdef JEMALLOC_TCACHE
|
#ifdef JEMALLOC_TCACHE
|
||||||
CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
|
CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
|
||||||
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
|
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
|
||||||
CTL_RO_GEN(opt_tcache_sort, opt_tcache_sort, bool)
|
|
||||||
#endif
|
#endif
|
||||||
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
|
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
|
||||||
#ifdef JEMALLOC_TRACE
|
#ifdef JEMALLOC_TRACE
|
||||||
|
@ -436,8 +436,6 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *,
|
|||||||
if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0)
|
if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0)
|
||||||
write4(w4opaque, bv ? "O" : "o", "", "", "");
|
write4(w4opaque, bv ? "O" : "o", "", "", "");
|
||||||
write4(w4opaque, "P", "", "", "");
|
write4(w4opaque, "P", "", "", "");
|
||||||
if ((err = mallctl("opt.tcache_sort", &bv, &bsz, NULL, 0)) == 0)
|
|
||||||
write4(w4opaque, bv ? "S" : "s", "", "", "");
|
|
||||||
if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0)
|
if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0)
|
||||||
write4(w4opaque, bv ? "T" : "t", "", "", "");
|
write4(w4opaque, bv ? "T" : "t", "", "", "");
|
||||||
if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0)
|
if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0)
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
|
|
||||||
size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
|
size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
|
||||||
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
|
||||||
bool opt_tcache_sort = true;
|
|
||||||
|
|
||||||
/* Map of thread-specific caches. */
|
/* Map of thread-specific caches. */
|
||||||
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
@ -38,67 +37,6 @@ tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
|
|||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
|
||||||
tcache_bin_merge(void **to, void **fr, unsigned lcnt, unsigned rcnt)
|
|
||||||
{
|
|
||||||
void **l, **r;
|
|
||||||
unsigned li, ri, i;
|
|
||||||
|
|
||||||
l = fr;
|
|
||||||
r = &fr[lcnt];
|
|
||||||
li = ri = i = 0;
|
|
||||||
while (li < lcnt && ri < rcnt) {
|
|
||||||
/* High pointers come first in sorted result. */
|
|
||||||
if ((uintptr_t)l[li] > (uintptr_t)r[ri]) {
|
|
||||||
to[i] = l[li];
|
|
||||||
li++;
|
|
||||||
} else {
|
|
||||||
to[i] = r[ri];
|
|
||||||
ri++;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (li < lcnt)
|
|
||||||
memcpy(&to[i], &l[li], sizeof(void *) * (lcnt - li));
|
|
||||||
else if (ri < rcnt)
|
|
||||||
memcpy(&to[i], &r[ri], sizeof(void *) * (rcnt - ri));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
tcache_bin_sort(tcache_bin_t *tbin)
|
|
||||||
{
|
|
||||||
unsigned e, i;
|
|
||||||
void **fr, **to;
|
|
||||||
void *mslots[tcache_nslots];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Perform iterative merge sort, swapping source and destination arrays
|
|
||||||
* during each iteration.
|
|
||||||
*/
|
|
||||||
|
|
||||||
fr = mslots; to = tbin->slots;
|
|
||||||
for (e = 1; e < tbin->ncached; e <<= 1) {
|
|
||||||
void **tmp = fr; fr = to; to = tmp;
|
|
||||||
for (i = 0; i + (e << 1) <= tbin->ncached; i += (e << 1))
|
|
||||||
tcache_bin_merge(&to[i], &fr[i], e, e);
|
|
||||||
if (i + e <= tbin->ncached) {
|
|
||||||
tcache_bin_merge(&to[i], &fr[i],
|
|
||||||
e, tbin->ncached - (i + e));
|
|
||||||
} else if (i < tbin->ncached)
|
|
||||||
tcache_bin_merge(&to[i], &fr[i], tbin->ncached - i, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy the final result out of mslots, if necessary. */
|
|
||||||
if (to == mslots)
|
|
||||||
memcpy(tbin->slots, mslots, sizeof(void *) * tbin->ncached);
|
|
||||||
|
|
||||||
#ifdef JEMALLOC_DEBUG
|
|
||||||
for (i = 1; i < tbin->ncached; i++)
|
|
||||||
assert(tbin->slots[i-1] > tbin->slots[i]);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
|
tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
|
||||||
{
|
{
|
||||||
@ -107,12 +45,6 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
|
|||||||
void *ptr;
|
void *ptr;
|
||||||
unsigned i, ndeferred, ncached;
|
unsigned i, ndeferred, ncached;
|
||||||
|
|
||||||
if (opt_tcache_sort && rem > 0) {
|
|
||||||
assert(rem < tbin->ncached);
|
|
||||||
/* Sort pointers such that the highest objects will be freed. */
|
|
||||||
tcache_bin_sort(tbin);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
|
for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
|
||||||
ncached = ndeferred;
|
ncached = ndeferred;
|
||||||
/* Lock the arena associated with the first object. */
|
/* Lock the arena associated with the first object. */
|
||||||
|
Loading…
Reference in New Issue
Block a user