Remove tcache bin sorting during flush.

This feature caused significant performance degradation, and the
fragmentation reduction benefits were difficult to quantify.
This commit is contained in:
Jason Evans 2010-01-29 13:37:31 -08:00
parent c66aaf1476
commit d8f565f239
6 changed files with 0 additions and 98 deletions

View File

@ -414,15 +414,6 @@ Double/halve the size of the maximum size class that is a multiple of the
quantum (8 or 16 bytes, depending on architecture). quantum (8 or 16 bytes, depending on architecture).
Above this size, cacheline spacing is used for size classes. Above this size, cacheline spacing is used for size classes.
The default value is 128 bytes. The default value is 128 bytes.
@roff_tcache@.It S
@roff_tcache@Sort the objects of a particular size class that are stored in a
@roff_tcache@thread-specific cache just before flushing some of them from the
@roff_tcache@cache, such that the objects highest in memory are preferentially
@roff_tcache@freed.
@roff_tcache@This tends to reduce fragmentation, but sorting is (n lg n), and in
@roff_tcache@practice it is expensive enough to have a moderate performance
@roff_tcache@impact.
@roff_tcache@This option is enabled by default.
@roff_trace@.It T @roff_trace@.It T
@roff_trace@Write a verbose trace log to a set of files named according to the @roff_trace@Write a verbose trace log to a set of files named according to the
@roff_trace@pattern @roff_trace@pattern
@ -722,13 +713,6 @@ option.
@roff_tcache@option. @roff_tcache@option.
@roff_tcache@.Ed @roff_tcache@.Ed
.\"----------------------------------------------------------------------------- .\"-----------------------------------------------------------------------------
@roff_tcache@.It Sy "opt.tcache_sort (bool) r-"
@roff_tcache@.Bd -ragged -offset indent -compact
@roff_tcache@See the
@roff_tcache@.Dq S
@roff_tcache@option.
@roff_tcache@.Ed
.\"-----------------------------------------------------------------------------
.It Sy "opt.stats_print (bool) r-" .It Sy "opt.stats_print (bool) r-"
.Bd -ragged -offset indent -compact .Bd -ragged -offset indent -compact
See the See the

View File

@ -48,7 +48,6 @@ struct tcache_s {
extern size_t opt_lg_tcache_nslots; extern size_t opt_lg_tcache_nslots;
extern ssize_t opt_lg_tcache_gc_sweep; extern ssize_t opt_lg_tcache_gc_sweep;
extern bool opt_tcache_sort;
/* Map of thread-specific caches. */ /* Map of thread-specific caches. */
extern __thread tcache_t *tcache_tls extern __thread tcache_t *tcache_tls

View File

@ -655,14 +655,6 @@ MALLOC_OUT:
opt_lg_cspace_max) opt_lg_cspace_max)
opt_lg_qspace_max++; opt_lg_qspace_max++;
break; break;
#ifdef JEMALLOC_TCACHE
case 's':
opt_tcache_sort = false;
break;
case 'S':
opt_tcache_sort = true;
break;
#endif
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
case 't': case 't':
opt_trace = false; opt_trace = false;

View File

@ -63,7 +63,6 @@ CTL_PROTO(opt_zero)
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
CTL_PROTO(opt_lg_tcache_nslots) CTL_PROTO(opt_lg_tcache_nslots)
CTL_PROTO(opt_lg_tcache_gc_sweep) CTL_PROTO(opt_lg_tcache_gc_sweep)
CTL_PROTO(opt_tcache_sort)
#endif #endif
CTL_PROTO(opt_stats_print) CTL_PROTO(opt_stats_print)
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
@ -219,7 +218,6 @@ static const ctl_node_t opt_node[] = {
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
{NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)}, {NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
{NAME("tcache_sort"), CTL(opt_tcache_sort)},
#endif #endif
{NAME("stats_print"), CTL(opt_stats_print)}, {NAME("stats_print"), CTL(opt_stats_print)},
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE
@ -1026,7 +1024,6 @@ CTL_RO_GEN(opt_zero, opt_zero, bool)
#ifdef JEMALLOC_TCACHE #ifdef JEMALLOC_TCACHE
CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t) CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
CTL_RO_GEN(opt_tcache_sort, opt_tcache_sort, bool)
#endif #endif
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
#ifdef JEMALLOC_TRACE #ifdef JEMALLOC_TRACE

View File

@ -436,8 +436,6 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *,
if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0) if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "O" : "o", "", "", ""); write4(w4opaque, bv ? "O" : "o", "", "", "");
write4(w4opaque, "P", "", "", ""); write4(w4opaque, "P", "", "", "");
if ((err = mallctl("opt.tcache_sort", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "S" : "s", "", "", "");
if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0) if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "T" : "t", "", "", ""); write4(w4opaque, bv ? "T" : "t", "", "", "");
if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0) if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0)

View File

@ -6,7 +6,6 @@
size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT; size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
bool opt_tcache_sort = true;
/* Map of thread-specific caches. */ /* Map of thread-specific caches. */
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@ -38,67 +37,6 @@ tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
return (ret); return (ret);
} }
static inline void
tcache_bin_merge(void **to, void **fr, unsigned lcnt, unsigned rcnt)
{
void **l, **r;
unsigned li, ri, i;
l = fr;
r = &fr[lcnt];
li = ri = i = 0;
while (li < lcnt && ri < rcnt) {
/* High pointers come first in sorted result. */
if ((uintptr_t)l[li] > (uintptr_t)r[ri]) {
to[i] = l[li];
li++;
} else {
to[i] = r[ri];
ri++;
}
i++;
}
if (li < lcnt)
memcpy(&to[i], &l[li], sizeof(void *) * (lcnt - li));
else if (ri < rcnt)
memcpy(&to[i], &r[ri], sizeof(void *) * (rcnt - ri));
}
static inline void
tcache_bin_sort(tcache_bin_t *tbin)
{
unsigned e, i;
void **fr, **to;
void *mslots[tcache_nslots];
/*
* Perform iterative merge sort, swapping source and destination arrays
* during each iteration.
*/
fr = mslots; to = tbin->slots;
for (e = 1; e < tbin->ncached; e <<= 1) {
void **tmp = fr; fr = to; to = tmp;
for (i = 0; i + (e << 1) <= tbin->ncached; i += (e << 1))
tcache_bin_merge(&to[i], &fr[i], e, e);
if (i + e <= tbin->ncached) {
tcache_bin_merge(&to[i], &fr[i],
e, tbin->ncached - (i + e));
} else if (i < tbin->ncached)
tcache_bin_merge(&to[i], &fr[i], tbin->ncached - i, 0);
}
/* Copy the final result out of mslots, if necessary. */
if (to == mslots)
memcpy(tbin->slots, mslots, sizeof(void *) * tbin->ncached);
#ifdef JEMALLOC_DEBUG
for (i = 1; i < tbin->ncached; i++)
assert(tbin->slots[i-1] > tbin->slots[i]);
#endif
}
void void
tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem) tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
{ {
@ -107,12 +45,6 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
void *ptr; void *ptr;
unsigned i, ndeferred, ncached; unsigned i, ndeferred, ncached;
if (opt_tcache_sort && rem > 0) {
assert(rem < tbin->ncached);
/* Sort pointers such that the highest objects will be freed. */
tcache_bin_sort(tbin);
}
for (ndeferred = tbin->ncached - rem; ndeferred > 0;) { for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
ncached = ndeferred; ncached = ndeferred;
/* Lock the arena associated with the first object. */ /* Lock the arena associated with the first object. */