Fix and simplify decay-based purging.

Simplify decay-based purging attempts to only be triggered when the
epoch is advanced, rather than every time purgeable memory increases.
In a correctly functioning system (not previously the case; see below),
this only causes a behavior difference if during subsequent purge
attempts the least recently used (LRU) purgeable memory extent is
initially too large to be purged, but that memory is reused between
attempts and one or more of the next LRU purgeable memory extents are
small enough to be purged.  In practice this is an arbitrary behavior
change that is within the set of acceptable behaviors.

As for the purging fix, assure that arena->decay.ndirty is recorded
*after* the epoch advance and associated purging occurs.  Prior to this
fix, it was possible for purging during epoch advance to cause a
substantially underrepresentative (arena->ndirty - arena->decay.ndirty),
i.e. the number of dirty pages attributed to the current epoch was too
low, and a series of unintended purges could result.  This fix is also
relevant in the context of the simplification described above, but the
bug's impact would be limited to over-purging at epoch advances.
This commit is contained in:
Jason Evans 2016-10-11 15:30:01 -07:00
parent 48993ed536
commit b4b4a77848
2 changed files with 70 additions and 70 deletions

View File

@ -97,7 +97,7 @@ struct arena_decay_s {
* and/or reused. * and/or reused.
*/ */
ssize_t time; ssize_t time;
/* decay_time / SMOOTHSTEP_NSTEPS. */ /* time / SMOOTHSTEP_NSTEPS. */
nstime_t interval; nstime_t interval;
/* /*
* Time at which the current decay interval logically started. We do * Time at which the current decay interval logically started. We do
@ -107,37 +107,30 @@ struct arena_decay_s {
* merge all relevant activity into the most recently recorded epoch. * merge all relevant activity into the most recently recorded epoch.
*/ */
nstime_t epoch; nstime_t epoch;
/* decay_deadline randomness generator. */ /* Deadline randomness generator. */
uint64_t jitter_state; uint64_t jitter_state;
/* /*
* Deadline for current epoch. This is the sum of decay_interval and * Deadline for current epoch. This is the sum of interval and per
* per epoch jitter which is a uniform random variable in * epoch jitter which is a uniform random variable in [0..interval).
* [0..decay_interval). Epochs always advance by precise multiples of * Epochs always advance by precise multiples of interval, but we
* decay_interval, but we randomize the deadline to reduce the * randomize the deadline to reduce the likelihood of arenas purging in
* likelihood of arenas purging in lockstep. * lockstep.
*/ */
nstime_t deadline; nstime_t deadline;
/* /*
* Number of dirty pages at beginning of current epoch. During epoch * Number of dirty pages at beginning of current epoch. During epoch
* advancement we use the delta between decay_ndirty and ndirty to * advancement we use the delta between arena->decay.ndirty and
* determine how many dirty pages, if any, were generated, and record * arena->ndirty to determine how many dirty pages, if any, were
* the result in decay_backlog. * generated.
*/ */
size_t ndirty; size_t ndirty;
/*
* Memoized result of arena_decay_backlog_npages_limit() corresponding
* to the current contents of decay_backlog, i.e. the limit on how many
* pages are allowed to exist for the decay epochs.
*/
size_t backlog_npages_limit;
/* /*
* Trailing log of how many unused dirty pages were generated during * Trailing log of how many unused dirty pages were generated during
* each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
* element is the most recent epoch. Corresponding epoch times are * element is the most recent epoch. Corresponding epoch times are
* relative to decay_epoch. * relative to epoch.
*/ */
size_t backlog[SMOOTHSTEP_NSTEPS]; size_t backlog[SMOOTHSTEP_NSTEPS];
}; };
struct arena_bin_s { struct arena_bin_s {

View File

@ -523,11 +523,41 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
} }
static void static void
arena_decay_epoch_advance(arena_t *arena, const nstime_t *time) arena_decay_backlog_update_last(arena_t *arena)
{
size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
arena->ndirty - arena->decay.ndirty : 0;
arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
}
static void
arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
{
if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
sizeof(size_t));
} else {
size_t nadvance_z = (size_t)nadvance_u64;
assert((uint64_t)nadvance_z == nadvance_u64);
memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
(SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
if (nadvance_z > 1) {
memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
}
}
arena_decay_backlog_update_last(arena);
}
static void
arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
{ {
uint64_t nadvance_u64; uint64_t nadvance_u64;
nstime_t delta; nstime_t delta;
size_t ndirty_delta;
assert(opt_purge == purge_mode_decay); assert(opt_purge == purge_mode_decay);
assert(arena_decay_deadline_reached(arena, time)); assert(arena_decay_deadline_reached(arena, time));
@ -546,43 +576,25 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
arena_decay_deadline_init(arena); arena_decay_deadline_init(arena);
/* Update the backlog. */ /* Update the backlog. */
if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { arena_decay_backlog_update(arena, nadvance_u64);
memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
sizeof(size_t));
} else {
size_t nadvance_z = (size_t)nadvance_u64;
assert((uint64_t)nadvance_z == nadvance_u64);
memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
(SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
if (nadvance_z > 1) {
memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
}
}
ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
arena->decay.ndirty : 0;
arena->decay.ndirty = arena->ndirty;
arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
arena->decay.backlog_npages_limit =
arena_decay_backlog_npages_limit(arena);
} }
static size_t static void
arena_decay_npages_limit(arena_t *arena) arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
{ {
size_t npages_limit; size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
assert(opt_purge == purge_mode_decay); if (arena->ndirty > ndirty_limit)
arena_purge_to_limit(tsdn, arena, ndirty_limit);
arena->decay.ndirty = arena->ndirty;
}
npages_limit = arena->decay.backlog_npages_limit; static void
arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
{
/* Add in any dirty pages created during the current epoch. */ arena_decay_epoch_advance_helper(arena, time);
if (arena->ndirty > arena->decay.ndirty) arena_decay_epoch_advance_purge(tsdn, arena);
npages_limit += arena->ndirty - arena->decay.ndirty;
return (npages_limit);
} }
static void static void
@ -600,7 +612,6 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
arena->decay.jitter_state = (uint64_t)(uintptr_t)arena; arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
arena_decay_deadline_init(arena); arena_decay_deadline_init(arena);
arena->decay.ndirty = arena->ndirty; arena->decay.ndirty = arena->ndirty;
arena->decay.backlog_npages_limit = 0;
memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
} }
@ -682,7 +693,6 @@ static void
arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena) arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
{ {
nstime_t time; nstime_t time;
size_t ndirty_limit;
assert(opt_purge == purge_mode_decay); assert(opt_purge == purge_mode_decay);
@ -698,32 +708,29 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch, if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
&time) > 0)) { &time) > 0)) {
/* /*
* Time went backwards. Move the epoch back in time, with the * Time went backwards. Move the epoch back in time and
* expectation that time typically flows forward for long enough * generate a new deadline, with the expectation that time
* periods of time that epochs complete. Unfortunately, * typically flows forward for long enough periods of time that
* this strategy is susceptible to clock jitter triggering * epochs complete. Unfortunately, this strategy is susceptible
* premature epoch advances, but clock jitter estimation and * to clock jitter triggering premature epoch advances, but
* compensation isn't feasible here because calls into this code * clock jitter estimation and compensation isn't feasible here
* are event-driven. * because calls into this code are event-driven.
*/ */
nstime_copy(&arena->decay.epoch, &time); nstime_copy(&arena->decay.epoch, &time);
arena_decay_deadline_init(arena);
} else { } else {
/* Verify that time does not go backwards. */ /* Verify that time does not go backwards. */
assert(nstime_compare(&arena->decay.epoch, &time) <= 0); assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
} }
if (arena_decay_deadline_reached(arena, &time))
arena_decay_epoch_advance(arena, &time);
ndirty_limit = arena_decay_npages_limit(arena);
/* /*
* Don't try to purge unless the number of purgeable pages exceeds the * If the deadline has been reached, advance to the current epoch and
* current limit. * purge to the new limit if necessary. Note that dirty pages created
* during the current epoch are not subject to purge until a future
* epoch, so as a result purging only happens during epoch advances.
*/ */
if (arena->ndirty <= ndirty_limit) if (arena_decay_deadline_reached(arena, &time))
return; arena_decay_epoch_advance(tsdn, arena, &time);
arena_purge_to_limit(tsdn, arena, ndirty_limit);
} }
void void