Remove tcache bin sorting during flush.

This feature caused significant performance degradation, and the
fragmentation reduction benefits were difficult to quantify.
This commit is contained in:
Jason Evans 2010-01-29 13:37:31 -08:00
parent c66aaf1476
commit d8f565f239
6 changed files with 0 additions and 98 deletions

View File

@ -414,15 +414,6 @@ Double/halve the size of the maximum size class that is a multiple of the
quantum (8 or 16 bytes, depending on architecture).
Above this size, cacheline spacing is used for size classes.
The default value is 128 bytes.
@roff_tcache@.It S
@roff_tcache@Sort the objects of a particular size class that are stored in a
@roff_tcache@thread-specific cache just before flushing some of them from the
@roff_tcache@cache, such that the objects highest in memory are preferentially
@roff_tcache@freed.
@roff_tcache@This tends to reduce fragmentation, but sorting is (n lg n), and in
@roff_tcache@practice it is expensive enough to have a moderate performance
@roff_tcache@impact.
@roff_tcache@This option is enabled by default.
@roff_trace@.It T
@roff_trace@Write a verbose trace log to a set of files named according to the
@roff_trace@pattern
@ -722,13 +713,6 @@ option.
@roff_tcache@option.
@roff_tcache@.Ed
.\"-----------------------------------------------------------------------------
@roff_tcache@.It Sy "opt.tcache_sort (bool) r-"
@roff_tcache@.Bd -ragged -offset indent -compact
@roff_tcache@See the
@roff_tcache@.Dq S
@roff_tcache@option.
@roff_tcache@.Ed
.\"-----------------------------------------------------------------------------
.It Sy "opt.stats_print (bool) r-"
.Bd -ragged -offset indent -compact
See the

View File

@ -48,7 +48,6 @@ struct tcache_s {
extern size_t opt_lg_tcache_nslots;
extern ssize_t opt_lg_tcache_gc_sweep;
extern bool opt_tcache_sort;
/* Map of thread-specific caches. */
extern __thread tcache_t *tcache_tls

View File

@ -655,14 +655,6 @@ MALLOC_OUT:
opt_lg_cspace_max)
opt_lg_qspace_max++;
break;
#ifdef JEMALLOC_TCACHE
case 's':
opt_tcache_sort = false;
break;
case 'S':
opt_tcache_sort = true;
break;
#endif
#ifdef JEMALLOC_TRACE
case 't':
opt_trace = false;

View File

@ -63,7 +63,6 @@ CTL_PROTO(opt_zero)
#ifdef JEMALLOC_TCACHE
CTL_PROTO(opt_lg_tcache_nslots)
CTL_PROTO(opt_lg_tcache_gc_sweep)
CTL_PROTO(opt_tcache_sort)
#endif
CTL_PROTO(opt_stats_print)
#ifdef JEMALLOC_TRACE
@ -219,7 +218,6 @@ static const ctl_node_t opt_node[] = {
#ifdef JEMALLOC_TCACHE
{NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)},
{NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)},
{NAME("tcache_sort"), CTL(opt_tcache_sort)},
#endif
{NAME("stats_print"), CTL(opt_stats_print)},
#ifdef JEMALLOC_TRACE
@ -1026,7 +1024,6 @@ CTL_RO_GEN(opt_zero, opt_zero, bool)
#ifdef JEMALLOC_TCACHE
CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t)
CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
CTL_RO_GEN(opt_tcache_sort, opt_tcache_sort, bool)
#endif
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
#ifdef JEMALLOC_TRACE

View File

@ -436,8 +436,6 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *,
if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "O" : "o", "", "", "");
write4(w4opaque, "P", "", "", "");
if ((err = mallctl("opt.tcache_sort", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "S" : "s", "", "", "");
if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0)
write4(w4opaque, bv ? "T" : "t", "", "", "");
if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0)

View File

@ -6,7 +6,6 @@
size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
bool opt_tcache_sort = true;
/* Map of thread-specific caches. */
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
@ -38,67 +37,6 @@ tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
return (ret);
}
static inline void
tcache_bin_merge(void **to, void **fr, unsigned lcnt, unsigned rcnt)
{
void **l, **r;
unsigned li, ri, i;
l = fr;
r = &fr[lcnt];
li = ri = i = 0;
while (li < lcnt && ri < rcnt) {
/* High pointers come first in sorted result. */
if ((uintptr_t)l[li] > (uintptr_t)r[ri]) {
to[i] = l[li];
li++;
} else {
to[i] = r[ri];
ri++;
}
i++;
}
if (li < lcnt)
memcpy(&to[i], &l[li], sizeof(void *) * (lcnt - li));
else if (ri < rcnt)
memcpy(&to[i], &r[ri], sizeof(void *) * (rcnt - ri));
}
static inline void
tcache_bin_sort(tcache_bin_t *tbin)
{
unsigned e, i;
void **fr, **to;
void *mslots[tcache_nslots];
/*
* Perform iterative merge sort, swapping source and destination arrays
* during each iteration.
*/
fr = mslots; to = tbin->slots;
for (e = 1; e < tbin->ncached; e <<= 1) {
void **tmp = fr; fr = to; to = tmp;
for (i = 0; i + (e << 1) <= tbin->ncached; i += (e << 1))
tcache_bin_merge(&to[i], &fr[i], e, e);
if (i + e <= tbin->ncached) {
tcache_bin_merge(&to[i], &fr[i],
e, tbin->ncached - (i + e));
} else if (i < tbin->ncached)
tcache_bin_merge(&to[i], &fr[i], tbin->ncached - i, 0);
}
/* Copy the final result out of mslots, if necessary. */
if (to == mslots)
memcpy(tbin->slots, mslots, sizeof(void *) * tbin->ncached);
#ifdef JEMALLOC_DEBUG
for (i = 1; i < tbin->ncached; i++)
assert(tbin->slots[i-1] > tbin->slots[i]);
#endif
}
void
tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
{
@ -107,12 +45,6 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
void *ptr;
unsigned i, ndeferred, ncached;
if (opt_tcache_sort && rem > 0) {
assert(rem < tbin->ncached);
/* Sort pointers such that the highest objects will be freed. */
tcache_bin_sort(tbin);
}
for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
ncached = ndeferred;
/* Lock the arena associated with the first object. */