From 8f2193dc8db26eba40f7948f7ce60c8584ab31a9 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Mon, 9 Mar 2020 14:44:11 -0700 Subject: [PATCH] Decay: Move in arena decay functions. --- include/jemalloc/internal/decay.h | 54 +++++++ src/arena.c | 244 +++--------------------------- src/decay.c | 174 +++++++++++++++++++++ 3 files changed, 249 insertions(+), 223 deletions(-) diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h index 28fe54d4..ef336f07 100644 --- a/include/jemalloc/internal/decay.h +++ b/include/jemalloc/internal/decay.h @@ -8,6 +8,15 @@ * Page allocators inform a decay object when pages enter a decay-able state * (i.e. dirty or muzzy), and query it to determine how many pages should be * purged at any given time. + * + * This is mostly a single-threaded data structure and doesn't care about + * synchronization at all; it's the caller's responsibility to manage their + * synchronization on their own. There are two exceptions: + * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query). + * 2) The mtx and purging fields live (and are initialized) here, but are + * logically owned by the page allocator. This is just a convenience (since + * those fields would be duplicated for both the dirty and muzzy states + * otherwise). */ typedef struct decay_s decay_t; struct decay_s { @@ -44,6 +53,12 @@ struct decay_s { * lockstep. */ nstime_t deadline; + /* + * The number of pages we cap ourselves at in the current epoch, per + * decay policies. Updated on an epoch change. After an epoch change, + * the caller should take steps to try to purge down to this amount. + */ + size_t npages_limit; /* * Number of unpurged pages at beginning of current epoch. During epoch * advancement we use the delta between arena->decay_*.nunpurged and @@ -56,6 +71,9 @@ struct decay_s { * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last * element is the most recent epoch. Corresponding epoch times are * relative to epoch. + * + * Updated only on epoch advance, triggered by + * decay_maybe_advance_epoch, below. */ size_t backlog[SMOOTHSTEP_NSTEPS]; @@ -63,4 +81,40 @@ struct decay_s { uint64_t ceil_npages; }; +static inline ssize_t +decay_ms_read(const decay_t *decay) { + return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); +} + +static inline size_t +decay_npages_limit_get(const decay_t *decay) { + return decay->npages_limit; +} + +/* How many unused dirty pages were generated during the last epoch. */ +static inline size_t +decay_epoch_npages_delta(const decay_t *decay) { + return decay->backlog[SMOOTHSTEP_NSTEPS - 1]; +} + +bool decay_ms_valid(ssize_t decay_ms); + +/* + * As a precondition, the decay_t must be zeroed out (as if with memset). + * + * Returns true on error. + */ +bool decay_init(decay_t *decay, ssize_t decay_ms); + +/* + * Given an already-initialized decay_t, reinitialize it with the given decay + * time. The decay_t must have previously been initialized (and should not then + * be zeroed). + */ +void decay_reinit(decay_t *decay, ssize_t decay_ms); + +/* Returns true if the epoch advanced and there are pages to purge. */ +bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, + size_t current_npages); + #endif /* JEMALLOC_INTERNAL_DECAY_H */ diff --git a/src/arena.c b/src/arena.c index ce0b57cc..055b36f1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/decay.h" #include "jemalloc/internal/div.h" #include "jemalloc/internal/ehooks.h" #include "jemalloc/internal/extent_dss.h" @@ -542,98 +543,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata, arena_nactive_add(arena, udiff >> LG_PAGE); } -static ssize_t -arena_decay_ms_read(decay_t *decay) { - return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); -} - -static void -arena_decay_ms_write(decay_t *decay, ssize_t decay_ms) { - atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED); -} - -static void -arena_decay_deadline_init(decay_t *decay) { - /* - * Generate a new deadline that is uniformly random within the next - * epoch after the current one. - */ - nstime_copy(&decay->deadline, &decay->epoch); - nstime_add(&decay->deadline, &decay->interval); - if (arena_decay_ms_read(decay) > 0) { - nstime_t jitter; - - nstime_init(&jitter, prng_range_u64(&decay->jitter_state, - nstime_ns(&decay->interval))); - nstime_add(&decay->deadline, &jitter); - } -} - -static bool -arena_decay_deadline_reached(const decay_t *decay, const nstime_t *time) { - return (nstime_compare(&decay->deadline, time) <= 0); -} - -static size_t -arena_decay_backlog_npages_limit(const decay_t *decay) { - uint64_t sum; - size_t npages_limit_backlog; - unsigned i; - - /* - * For each element of decay_backlog, multiply by the corresponding - * fixed-point smoothstep decay factor. Sum the products, then divide - * to round down to the nearest whole number of pages. - */ - sum = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { - sum += decay->backlog[i] * h_steps[i]; - } - npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); - - return npages_limit_backlog; -} - -static void -arena_decay_backlog_update_last(decay_t *decay, size_t current_npages) { - size_t npages_delta = (current_npages > decay->nunpurged) ? - current_npages - decay->nunpurged : 0; - decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta; - - if (config_debug) { - if (current_npages > decay->ceil_npages) { - decay->ceil_npages = current_npages; - } - size_t npages_limit = arena_decay_backlog_npages_limit(decay); - assert(decay->ceil_npages >= npages_limit); - if (decay->ceil_npages > npages_limit) { - decay->ceil_npages = npages_limit; - } - } -} - -static void -arena_decay_backlog_update(decay_t *decay, uint64_t nadvance_u64, - size_t current_npages) { - if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { - memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) * - sizeof(size_t)); - } else { - size_t nadvance_z = (size_t)nadvance_u64; - - assert((uint64_t)nadvance_z == nadvance_u64); - - memmove(decay->backlog, &decay->backlog[nadvance_z], - (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); - if (nadvance_z > 1) { - memset(&decay->backlog[SMOOTHSTEP_NSTEPS - - nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); - } - } - - arena_decay_backlog_update_last(decay, current_npages); -} - static void arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, @@ -645,93 +554,6 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay, } } -static void -arena_decay_epoch_advance_helper(decay_t *decay, const nstime_t *time, - size_t current_npages) { - assert(arena_decay_deadline_reached(decay, time)); - - nstime_t delta; - nstime_copy(&delta, time); - nstime_subtract(&delta, &decay->epoch); - - uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); - assert(nadvance_u64 > 0); - - /* Add nadvance_u64 decay intervals to epoch. */ - nstime_copy(&delta, &decay->interval); - nstime_imultiply(&delta, nadvance_u64); - nstime_add(&decay->epoch, &delta); - - /* Set a new deadline. */ - arena_decay_deadline_init(decay); - - /* Update the backlog. */ - arena_decay_backlog_update(decay, nadvance_u64, current_npages); -} - -static void -arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, decay_t *decay, - pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, const nstime_t *time, - bool is_background_thread) { - size_t current_npages = ecache_npages_get(ecache); - arena_decay_epoch_advance_helper(decay, time, current_npages); - - size_t npages_limit = arena_decay_backlog_npages_limit(decay); - /* We may unlock decay->mtx when try_purge(). Finish logging first. */ - decay->nunpurged = (npages_limit > current_npages) ? npages_limit : - current_npages; - - if (!background_thread_enabled() || is_background_thread) { - arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache, - current_npages, npages_limit, is_background_thread); - } -} - -static void -arena_decay_reinit(decay_t *decay, ssize_t decay_ms) { - arena_decay_ms_write(decay, decay_ms); - if (decay_ms > 0) { - nstime_init(&decay->interval, (uint64_t)decay_ms * - KQU(1000000)); - nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS); - } - - nstime_init_update(&decay->epoch); - decay->jitter_state = (uint64_t)(uintptr_t)decay; - arena_decay_deadline_init(decay); - decay->nunpurged = 0; - memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); -} - -static bool -arena_decay_init(decay_t *decay, ssize_t decay_ms) { - if (config_debug) { - for (size_t i = 0; i < sizeof(decay_t); i++) { - assert(((char *)decay)[i] == 0); - } - decay->ceil_npages = 0; - } - if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY, - malloc_mutex_rank_exclusive)) { - return true; - } - decay->purging = false; - arena_decay_reinit(decay, decay_ms); - return false; -} - -static bool -arena_decay_ms_valid(ssize_t decay_ms) { - if (decay_ms < -1) { - return false; - } - if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX * - KQU(1000)) { - return true; - } - return false; -} - static bool arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, @@ -739,7 +561,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay, malloc_mutex_assert_owner(tsdn, &decay->mtx); /* Purge all or nothing if the option is disabled. */ - ssize_t decay_ms = arena_decay_ms_read(decay); + ssize_t decay_ms = decay_ms_read(decay); if (decay_ms <= 0) { if (decay_ms == 0) { arena_decay_to_limit(tsdn, arena, decay, decay_stats, @@ -749,26 +571,6 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay, return false; } - nstime_t time; - nstime_init_update(&time); - if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time) - > 0)) { - /* - * Time went backwards. Move the epoch back in time and - * generate a new deadline, with the expectation that time - * typically flows forward for long enough periods of time that - * epochs complete. Unfortunately, this strategy is susceptible - * to clock jitter triggering premature epoch advances, but - * clock jitter estimation and compensation isn't feasible here - * because calls into this code are event-driven. - */ - nstime_copy(&decay->epoch, &time); - arena_decay_deadline_init(decay); - } else { - /* Verify that time does not go backwards. */ - assert(nstime_compare(&decay->epoch, &time) <= 0); - } - /* * If the deadline has been reached, advance to the current epoch and * purge to the new limit if necessary. Note that dirty pages created @@ -776,39 +578,35 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay, * epoch, so as a result purging only happens during epoch advances, or * being triggered by background threads (scheduled event). */ - bool advance_epoch = arena_decay_deadline_reached(decay, &time); - if (advance_epoch) { - arena_decay_epoch_advance(tsdn, arena, decay, decay_stats, - ecache, &time, is_background_thread); - } else if (is_background_thread) { + nstime_t time; + nstime_init_update(&time); + size_t npages_current = ecache_npages_get(ecache); + bool epoch_advanced = decay_maybe_advance_epoch(decay, &time, + npages_current); + if (is_background_thread || + (epoch_advanced && !background_thread_enabled())) { + size_t npages_limit = decay_npages_limit_get(decay); arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache, - ecache_npages_get(ecache), - arena_decay_backlog_npages_limit(decay), - is_background_thread); + npages_current, npages_limit, is_background_thread); } - return advance_epoch; -} - -static ssize_t -arena_decay_ms_get(decay_t *decay) { - return arena_decay_ms_read(decay); + return epoch_advanced; } ssize_t arena_dirty_decay_ms_get(arena_t *arena) { - return arena_decay_ms_get(&arena->pa_shard.decay_dirty); + return decay_ms_read(&arena->pa_shard.decay_dirty); } ssize_t arena_muzzy_decay_ms_get(arena_t *arena) { - return arena_decay_ms_get(&arena->pa_shard.decay_muzzy); + return decay_ms_read(&arena->pa_shard.decay_muzzy); } static bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { + if (!decay_ms_valid(decay_ms)) { return true; } @@ -821,7 +619,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay, * infrequent, either between the {-1, 0, >0} states, or a one-time * arbitrary change during initial arena configuration. */ - arena_decay_reinit(decay, decay_ms); + decay_reinit(decay, decay_ms); arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false); malloc_mutex_unlock(tsdn, &decay->mtx); @@ -989,7 +787,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new; if (epoch_advanced) { /* Backlog is updated on epoch advance. */ - npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; + npages_new = decay_epoch_npages_delta(decay); } malloc_mutex_unlock(tsdn, &decay->mtx); @@ -1922,7 +1720,7 @@ arena_dirty_decay_ms_default_get(void) { bool arena_dirty_decay_ms_default_set(ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { + if (!decay_ms_valid(decay_ms)) { return true; } atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED); @@ -1936,7 +1734,7 @@ arena_muzzy_decay_ms_default_get(void) { bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { + if (!decay_ms_valid(decay_ms)) { return true; } atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED); @@ -2048,11 +1846,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { goto label_error; } - if (arena_decay_init(&arena->pa_shard.decay_dirty, + if (decay_init(&arena->pa_shard.decay_dirty, arena_dirty_decay_ms_default_get())) { goto label_error; } - if (arena_decay_init(&arena->pa_shard.decay_muzzy, + if (decay_init(&arena->pa_shard.decay_muzzy, arena_muzzy_decay_ms_default_get())) { goto label_error; } diff --git a/src/decay.c b/src/decay.c index 454cb475..462b9bfe 100644 --- a/src/decay.c +++ b/src/decay.c @@ -1,3 +1,177 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" +#include "jemalloc/internal/decay.h" + +/* + * Generate a new deadline that is uniformly random within the next epoch after + * the current one. + */ +void +decay_deadline_init(decay_t *decay) { + nstime_copy(&decay->deadline, &decay->epoch); + nstime_add(&decay->deadline, &decay->interval); + if (decay_ms_read(decay) > 0) { + nstime_t jitter; + + nstime_init(&jitter, prng_range_u64(&decay->jitter_state, + nstime_ns(&decay->interval))); + nstime_add(&decay->deadline, &jitter); + } +} + +void +decay_reinit(decay_t *decay, ssize_t decay_ms) { + atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED); + if (decay_ms > 0) { + nstime_init(&decay->interval, (uint64_t)decay_ms * + KQU(1000000)); + nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS); + } + + nstime_init_update(&decay->epoch); + decay->jitter_state = (uint64_t)(uintptr_t)decay; + decay_deadline_init(decay); + decay->nunpurged = 0; + memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); +} + +bool +decay_init(decay_t *decay, ssize_t decay_ms) { + if (config_debug) { + for (size_t i = 0; i < sizeof(decay_t); i++) { + assert(((char *)decay)[i] == 0); + } + decay->ceil_npages = 0; + } + if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY, + malloc_mutex_rank_exclusive)) { + return true; + } + decay->purging = false; + decay_reinit(decay, decay_ms); + return false; +} + +bool +decay_ms_valid(ssize_t decay_ms) { + if (decay_ms < -1) { + return false; + } + if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX * + KQU(1000)) { + return true; + } + return false; +} + +static void +decay_maybe_update_time(decay_t *decay, nstime_t *new_time) { + if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, + new_time) > 0)) { + /* + * Time went backwards. Move the epoch back in time and + * generate a new deadline, with the expectation that time + * typically flows forward for long enough periods of time that + * epochs complete. Unfortunately, this strategy is susceptible + * to clock jitter triggering premature epoch advances, but + * clock jitter estimation and compensation isn't feasible here + * because calls into this code are event-driven. + */ + nstime_copy(&decay->epoch, new_time); + decay_deadline_init(decay); + } else { + /* Verify that time does not go backwards. */ + assert(nstime_compare(&decay->epoch, new_time) <= 0); + } +} + +static size_t +decay_backlog_npages_limit(const decay_t *decay) { + /* + * For each element of decay_backlog, multiply by the corresponding + * fixed-point smoothstep decay factor. Sum the products, then divide + * to round down to the nearest whole number of pages. + */ + uint64_t sum = 0; + for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) { + sum += decay->backlog[i] * h_steps[i]; + } + size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); + + return npages_limit_backlog; +} + +static void +decay_backlog_update(decay_t *decay, uint64_t nadvance_u64, + size_t current_npages) { + if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { + memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + sizeof(size_t)); + } else { + size_t nadvance_z = (size_t)nadvance_u64; + + assert((uint64_t)nadvance_z == nadvance_u64); + + memmove(decay->backlog, &decay->backlog[nadvance_z], + (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); + if (nadvance_z > 1) { + memset(&decay->backlog[SMOOTHSTEP_NSTEPS - + nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); + } + } + + size_t npages_delta = (current_npages > decay->nunpurged) ? + current_npages - decay->nunpurged : 0; + decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta; + + if (config_debug) { + if (current_npages > decay->ceil_npages) { + decay->ceil_npages = current_npages; + } + size_t npages_limit = decay_backlog_npages_limit(decay); + assert(decay->ceil_npages >= npages_limit); + if (decay->ceil_npages > npages_limit) { + decay->ceil_npages = npages_limit; + } + } +} + +static inline bool +decay_deadline_reached(const decay_t *decay, const nstime_t *time) { + return (nstime_compare(&decay->deadline, time) <= 0); +} + +bool +decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, + size_t npages_current) { + /* Handle possible non-monotonicity of time. */ + decay_maybe_update_time(decay, new_time); + + if (!decay_deadline_reached(decay, new_time)) { + return false; + } + nstime_t delta; + nstime_copy(&delta, new_time); + nstime_subtract(&delta, &decay->epoch); + + uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); + assert(nadvance_u64 > 0); + + /* Add nadvance_u64 decay intervals to epoch. */ + nstime_copy(&delta, &decay->interval); + nstime_imultiply(&delta, nadvance_u64); + nstime_add(&decay->epoch, &delta); + + /* Set a new deadline. */ + decay_deadline_init(decay); + + /* Update the backlog. */ + decay_backlog_update(decay, nadvance_u64, npages_current); + + decay->npages_limit = decay_backlog_npages_limit(decay); + decay->nunpurged = (decay->npages_limit > npages_current) ? + decay->npages_limit : npages_current; + + return true; +}