Allow PAI to calculate time until deferred work

Previously the calculation of sleep time between wakeups was implemented within
background_thread. This resulted in some parts of decay and hpa specific
logic mixing with background thread implementation. In this change, background
thread delegates this calculation to arena and it, in turn, delegates it to PAI.
The next step is to implement the actual calculation of time until deferred work
in HPA.
This commit is contained in:
Alex Lapenkou 2021-08-06 14:53:05 -07:00 committed by Alexander Lapenkov
parent 26140dd246
commit b8b8027f19
14 changed files with 298 additions and 169 deletions

View File

@ -8,6 +8,12 @@
#include "jemalloc/internal/pages.h" #include "jemalloc/internal/pages.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
/*
* When the amount of pages to be purged exceeds this amount, deferred purge
* should happen.
*/
#define ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD UINT64_C(1024)
extern ssize_t opt_dirty_decay_ms; extern ssize_t opt_dirty_decay_ms;
extern ssize_t opt_muzzy_decay_ms; extern ssize_t opt_muzzy_decay_ms;
@ -16,7 +22,6 @@ extern const char *percpu_arena_mode_names[];
extern div_info_t arena_binind_div_info[SC_NBINS]; extern div_info_t arena_binind_div_info[SC_NBINS];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock; extern malloc_mutex_t arenas_lock;
extern emap_t arena_emap_global; extern emap_t arena_emap_global;
@ -51,6 +56,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state); ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
bool all); bool all);
uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena); void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
void arena_reset(tsd_t *tsd, arena_t *arena); void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena);

View File

@ -13,6 +13,9 @@ extern background_thread_info_t *background_thread_info;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind); bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_enable(tsd_t *tsd); bool background_threads_enable(tsd_t *tsd);
bool background_threads_disable(tsd_t *tsd); bool background_threads_disable(tsd_t *tsd);
bool background_thread_running(background_thread_info_t* info);
void background_thread_wakeup_early(background_thread_info_t *info,
nstime_t *remaining_sleep);
void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
decay_t *decay, size_t npages_new); decay_t *decay, size_t npages_new);
void background_thread_prefork0(tsdn_t *tsdn); void background_thread_prefork0(tsdn_t *tsdn);

View File

@ -45,18 +45,4 @@ background_thread_indefinite_sleep(background_thread_info_t *info) {
return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE); return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
} }
JEMALLOC_ALWAYS_INLINE void
arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
bool is_background_thread) {
if (!background_thread_enabled() || is_background_thread) {
return;
}
background_thread_info_t *info =
arena_background_thread_info_get(arena);
if (background_thread_indefinite_sleep(info)) {
background_thread_interval_check(tsdn, arena,
&arena->pa_shard.pac.decay_dirty, 0);
}
}
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */ #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */

View File

@ -19,6 +19,9 @@
#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2) #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
#define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
typedef enum { typedef enum {
background_thread_stopped, background_thread_stopped,
background_thread_started, background_thread_started,

View File

@ -118,6 +118,25 @@ decay_epoch_duration_ns(const decay_t *decay) {
return nstime_ns(&decay->interval); return nstime_ns(&decay->interval);
} }
static inline bool
decay_immediately(const decay_t *decay) {
ssize_t decay_ms = decay_ms_read(decay);
return decay_ms == 0;
}
static inline bool
decay_disabled(const decay_t *decay) {
ssize_t decay_ms = decay_ms_read(decay);
return decay_ms < 0;
}
/* Returns true if decay is enabled and done gradually. */
static inline bool
decay_gradually(const decay_t *decay) {
ssize_t decay_ms = decay_ms_read(decay);
return decay_ms > 0;
}
/* /*
* Returns true if the passed in decay time setting is valid. * Returns true if the passed in decay time setting is valid.
* < -1 : invalid * < -1 : invalid
@ -144,6 +163,12 @@ bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
*/ */
void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms); void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
/*
* Compute how many of 'npages_new' pages we would need to purge in 'time'.
*/
uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
size_t npages_new);
/* Returns true if the epoch advanced and there are pages to purge. */ /* Returns true if the epoch advanced and there are pages to purge. */
bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
size_t current_npages); size_t current_npages);

View File

@ -200,6 +200,8 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard, void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
bool deferral_allowed); bool deferral_allowed);
void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
/******************************************************************************/ /******************************************************************************/
/* /*

View File

@ -24,6 +24,7 @@ struct pai_s {
/* This function empties out list as a side-effect of being called. */ /* This function empties out list as a side-effect of being called. */
void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self, void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
edata_list_active_t *list); edata_list_active_t *list);
uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
}; };
/* /*
@ -64,6 +65,11 @@ pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
self->dalloc_batch(tsdn, self, list); self->dalloc_batch(tsdn, self, list);
} }
static inline uint64_t
pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
return self->time_until_deferred_work(tsdn, self);
}
/* /*
* An implementation of batch allocation that simply calls alloc once for * An implementation of batch allocation that simply calls alloc once for
* each item in the list. * each item in the list.

View File

@ -38,13 +38,6 @@ static atomic_zd_t muzzy_decay_ms_default;
emap_t arena_emap_global; emap_t arena_emap_global;
pa_central_t arena_pa_central_global; pa_central_t arena_pa_central_global;
const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
#define STEP(step, h, x, y) \
h,
SMOOTHSTEP
#undef STEP
};
div_info_t arena_binind_div_info[SC_NBINS]; div_info_t arena_binind_div_info[SC_NBINS];
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
@ -65,6 +58,9 @@ static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
bool is_background_thread, bool all); bool is_background_thread, bool all);
static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
bin_t *bin); bin_t *bin);
static void
arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
size_t npages_new);
/******************************************************************************/ /******************************************************************************/
@ -189,6 +185,20 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
} }
} }
static void
arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
bool is_background_thread) {
if (!background_thread_enabled() || is_background_thread) {
return;
}
background_thread_info_t *info =
arena_background_thread_info_get(arena);
if (background_thread_indefinite_sleep(info)) {
arena_maybe_do_deferred_work(tsdn, arena,
&arena->pa_shard.pac.decay_dirty, 0);
}
}
void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) { void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0); WITNESS_RANK_CORE, 0);
@ -420,8 +430,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
if (have_background_thread && background_thread_enabled() && if (have_background_thread && background_thread_enabled() &&
epoch_advanced && !is_background_thread) { epoch_advanced && !is_background_thread) {
background_thread_interval_check(tsdn, arena, decay, arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
npages_new);
} }
return false; return false;
@ -462,6 +471,65 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
arena_decay_muzzy(tsdn, arena, is_background_thread, all); arena_decay_muzzy(tsdn, arena, is_background_thread, all);
} }
static void
arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
size_t npages_new) {
background_thread_info_t *info = arena_background_thread_info_get(
arena);
if (malloc_mutex_trylock(tsdn, &info->mtx)) {
/*
* Background thread may hold the mutex for a long period of
* time. We'd like to avoid the variance on application
* threads. So keep this non-blocking, and leave the work to a
* future epoch.
*/
return;
}
if (!background_thread_running(info)) {
goto label_done;
}
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
goto label_done;
}
if (!decay_gradually(decay)) {
goto label_done_unlock2;
}
nstime_t diff;
nstime_init(&diff, background_thread_wakeup_time_get(info));
if (nstime_compare(&diff, &decay->epoch) <= 0) {
goto label_done_unlock2;
}
nstime_subtract(&diff, &decay->epoch);
if (npages_new > 0) {
uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
npages_new);
info->npages_to_purge_new += npurge_new;
}
bool should_signal;
if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
should_signal = true;
} else if (unlikely(background_thread_indefinite_sleep(info)) &&
(ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
info->npages_to_purge_new > 0)) {
should_signal = true;
} else {
should_signal = false;
}
if (should_signal) {
info->npages_to_purge_new = 0;
background_thread_wakeup_early(info, &diff);
}
label_done_unlock2:
malloc_mutex_unlock(tsdn, &decay->mtx);
label_done:
malloc_mutex_unlock(tsdn, &info->mtx);
}
/* Called from background threads. */ /* Called from background threads. */
void void
arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) { arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {

View File

@ -60,8 +60,9 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
bool background_threads_enable(tsd_t *tsd) NOT_REACHED bool background_threads_enable(tsd_t *tsd) NOT_REACHED
bool background_threads_disable(tsd_t *tsd) NOT_REACHED bool background_threads_disable(tsd_t *tsd) NOT_REACHED
void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, bool background_thread_running(background_thread_info_t *info) NOT_REACHED
decay_t *decay, size_t npages_new) NOT_REACHED void background_thread_wakeup_early(background_thread_info_t *info,
nstime_t *remaining_sleep) NOT_REACHED
void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
@ -98,8 +99,6 @@ set_current_thread_affinity(int cpu) {
#endif #endif
} }
/* Threshold for determining when to wake up the background thread. */
#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
#define BILLION UINT64_C(1000000000) #define BILLION UINT64_C(1000000000)
/* Minimal sleep interval 100 ms. */ /* Minimal sleep interval 100 ms. */
#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10) #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
@ -173,55 +172,10 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
return false; return false;
} }
static inline uint64_t
arena_decay_compute_purge_interval(tsdn_t *tsdn, decay_t *decay,
size_t npages) {
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
/* Use minimal interval if decay is contended. */
return BACKGROUND_THREAD_MIN_INTERVAL_NS;
}
uint64_t decay_ns = decay_ns_until_purge(decay, npages,
BACKGROUND_THREAD_NPAGES_THRESHOLD);
malloc_mutex_unlock(tsdn, &decay->mtx);
return decay_ns < BACKGROUND_THREAD_MIN_INTERVAL_NS ?
BACKGROUND_THREAD_MIN_INTERVAL_NS :
decay_ns;
}
static inline uint64_t
arena_decay_compute_min_purge_interval(tsdn_t *tsdn, arena_t *arena) {
uint64_t dirty, muzzy;
dirty = arena_decay_compute_purge_interval(tsdn,
&arena->pa_shard.pac.decay_dirty,
ecache_npages_get(&arena->pa_shard.pac.ecache_dirty));
if (dirty == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
return dirty;
}
muzzy = arena_decay_compute_purge_interval(tsdn,
&arena->pa_shard.pac.decay_muzzy,
ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy));
uint64_t min_so_far = dirty < muzzy ? dirty : muzzy;
if (opt_background_thread_hpa_interval_max_ms >= 0) {
uint64_t hpa_interval = 1000 * 1000 *
(uint64_t)opt_background_thread_hpa_interval_max_ms;
if (hpa_interval < min_so_far) {
if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
} else {
min_so_far = hpa_interval;
}
}
}
return min_so_far;
}
static inline void static inline void
background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) { background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; unsigned ind) {
uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
unsigned narenas = narenas_total_get(); unsigned narenas = narenas_total_get();
for (unsigned i = ind; i < narenas; i += max_background_threads) { for (unsigned i = ind; i < narenas; i += max_background_threads) {
@ -230,19 +184,29 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
continue; continue;
} }
arena_do_deferred_work(tsdn, arena); arena_do_deferred_work(tsdn, arena);
if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) { if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
/* Min interval will be used. */ /* Min interval will be used. */
continue; continue;
} }
uint64_t interval = arena_decay_compute_min_purge_interval(tsdn, uint64_t ns_arena_deferred = pa_shard_time_until_deferred_work(
arena); tsdn, &arena->pa_shard);
assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS); if (ns_arena_deferred < ns_until_deferred) {
if (interval != DECAY_UNBOUNDED_TIME_TO_PURGE && ns_until_deferred = ns_arena_deferred;
min_interval > interval) {
min_interval = interval;
} }
} }
background_thread_sleep(tsdn, info, min_interval);
uint64_t sleep_ns;
if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
} else {
sleep_ns =
(ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
? BACKGROUND_THREAD_MIN_INTERVAL_NS
: ns_until_deferred;
}
background_thread_sleep(tsdn, info, sleep_ns);
} }
static bool static bool
@ -609,89 +573,24 @@ background_threads_disable(tsd_t *tsd) {
return false; return false;
} }
/* Check if we need to signal the background thread early. */ bool
background_thread_running(background_thread_info_t *info) {
return info->state == background_thread_started;
}
void void
background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay, background_thread_wakeup_early(background_thread_info_t *info,
size_t npages_new) { nstime_t *remaining_sleep) {
background_thread_info_t *info = arena_background_thread_info_get(
arena);
if (malloc_mutex_trylock(tsdn, &info->mtx)) {
/* /*
* Background thread may hold the mutex for a long period of * This is an optimization to increase batching. At this point
* time. We'd like to avoid the variance on application * we know that background thread wakes up soon, so the time to cache
* threads. So keep this non-blocking, and leave the work to a * the just freed memory is bounded and low.
* future epoch.
*/ */
if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
return; return;
} }
if (info->state != background_thread_started) {
goto label_done;
}
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
goto label_done;
}
ssize_t decay_time = decay_ms_read(decay);
if (decay_time <= 0) {
/* Purging is eagerly done or disabled currently. */
goto label_done_unlock2;
}
uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
assert(decay_interval_ns > 0);
nstime_t diff;
nstime_init(&diff, background_thread_wakeup_time_get(info));
if (nstime_compare(&diff, &decay->epoch) <= 0) {
goto label_done_unlock2;
}
nstime_subtract(&diff, &decay->epoch);
if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
goto label_done_unlock2;
}
if (npages_new > 0) {
size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
/*
* Compute how many new pages we would need to purge by the next
* wakeup, which is used to determine if we should signal the
* background thread.
*/
uint64_t npurge_new;
if (n_epoch >= SMOOTHSTEP_NSTEPS) {
npurge_new = npages_new;
} else {
uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
assert(h_steps_max >=
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npurge_new = npages_new * (h_steps_max -
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npurge_new >>= SMOOTHSTEP_BFP;
}
info->npages_to_purge_new += npurge_new;
}
bool should_signal;
if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
should_signal = true;
} else if (unlikely(background_thread_indefinite_sleep(info)) &&
(ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
info->npages_to_purge_new > 0)) {
should_signal = true;
} else {
should_signal = false;
}
if (should_signal) {
info->npages_to_purge_new = 0;
pthread_cond_signal(&info->cond); pthread_cond_signal(&info->cond);
} }
label_done_unlock2:
malloc_mutex_unlock(tsdn, &decay->mtx);
label_done:
malloc_mutex_unlock(tsdn, &info->mtx);
}
void void
background_thread_prefork0(tsdn_t *tsdn) { background_thread_prefork0(tsdn_t *tsdn) {

View File

@ -3,6 +3,13 @@
#include "jemalloc/internal/decay.h" #include "jemalloc/internal/decay.h"
const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
#define STEP(step, h, x, y) \
h,
SMOOTHSTEP
#undef STEP
};
/* /*
* Generate a new deadline that is uniformly random within the next epoch after * Generate a new deadline that is uniformly random within the next epoch after
* the current one. * the current one.
@ -147,6 +154,25 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
return (nstime_compare(&decay->deadline, time) <= 0); return (nstime_compare(&decay->deadline, time) <= 0);
} }
uint64_t
decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
uint64_t npages_purge;
if (n_epoch >= SMOOTHSTEP_NSTEPS) {
npages_purge = npages_new;
} else {
uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
assert(h_steps_max >=
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npages_purge = npages_new * (h_steps_max -
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npages_purge >>= SMOOTHSTEP_BFP;
}
return npages_purge;
}
bool bool
decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
size_t npages_current) { size_t npages_current) {
@ -214,9 +240,7 @@ decay_npurge_after_interval(decay_t *decay, size_t interval) {
uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current, uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
uint64_t npages_threshold) { uint64_t npages_threshold) {
ssize_t decay_time = decay_ms_read(decay); if (!decay_gradually(decay)) {
if (decay_time <= 0) {
/* Purging is eagerly done or disabled currently. */
return DECAY_UNBOUNDED_TIME_TO_PURGE; return DECAY_UNBOUNDED_TIME_TO_PURGE;
} }
uint64_t decay_interval_ns = decay_epoch_duration_ns(decay); uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);

View File

@ -19,6 +19,7 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
edata_list_active_t *list); edata_list_active_t *list);
static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
bool bool
hpa_supported() { hpa_supported() {
@ -218,6 +219,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
shard->pai.shrink = &hpa_shrink; shard->pai.shrink = &hpa_shrink;
shard->pai.dalloc = &hpa_dalloc; shard->pai.dalloc = &hpa_dalloc;
shard->pai.dalloc_batch = &hpa_dalloc_batch; shard->pai.dalloc_batch = &hpa_dalloc_batch;
shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
hpa_do_consistency_checks(shard); hpa_do_consistency_checks(shard);
@ -850,6 +852,11 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
hpa_dalloc_batch(tsdn, self, &dalloc_list); hpa_dalloc_batch(tsdn, self, &dalloc_list);
} }
static uint64_t
hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
return opt_background_thread_hpa_interval_max_ms;
}
void void
hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
hpa_do_consistency_checks(shard); hpa_do_consistency_checks(shard);

View File

@ -96,6 +96,11 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
} }
} }
static bool
pa_shard_uses_hpa(pa_shard_t *shard) {
return atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED);
}
void void
pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) { pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
pac_destroy(tsdn, &shard->pac); pac_destroy(tsdn, &shard->pac);
@ -118,7 +123,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
WITNESS_RANK_CORE, 0); WITNESS_RANK_CORE, 0);
edata_t *edata = NULL; edata_t *edata = NULL;
if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) { if (pa_shard_uses_hpa(shard)) {
edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment, edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
zero); zero);
} }
@ -226,7 +231,7 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
void void
pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard, pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
bool deferral_allowed) { bool deferral_allowed) {
if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) { if (pa_shard_uses_hpa(shard)) {
hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard, hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
deferral_allowed); deferral_allowed);
} }
@ -234,7 +239,63 @@ pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
void void
pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) { pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) { if (pa_shard_uses_hpa(shard)) {
hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard); hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
} }
} }
static inline uint64_t
pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
/* Use minimal interval if decay is contended. */
return BACKGROUND_THREAD_DEFERRED_MIN;
}
uint64_t result = decay_ns_until_purge(decay, npages,
ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
malloc_mutex_unlock(tsdn, &decay->mtx);
return result;
}
/*
* Get time until next deferred work ought to happen. If there are multiple
* things that have been deferred, this function calculates the time until
* the soonest of those things.
*/
uint64_t
pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
uint64_t time;
time = pa_shard_ns_until_purge(tsdn,
&shard->pac.decay_dirty,
ecache_npages_get(&shard->pac.ecache_dirty));
if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
return time;
}
uint64_t muzzy = pa_shard_ns_until_purge(tsdn,
&shard->pac.decay_muzzy,
ecache_npages_get(&shard->pac.ecache_muzzy));
if (muzzy < time) {
time = muzzy;
if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
return time;
}
}
uint64_t pac = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
if (pac < time) {
time = pac;
if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
return time;
}
}
if (pa_shard_uses_hpa(shard)) {
uint64_t hpa =
pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
if (hpa < time) {
time = hpa;
}
}
return time;
}

View File

@ -10,6 +10,7 @@ static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
size_t old_size, size_t new_size); size_t old_size, size_t new_size);
static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata); static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
static ehooks_t * static ehooks_t *
pac_ehooks_get(pac_t *pac) { pac_ehooks_get(pac_t *pac) {
@ -96,6 +97,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
pac->pai.shrink = &pac_shrink_impl; pac->pai.shrink = &pac_shrink_impl;
pac->pai.dalloc = &pac_dalloc_impl; pac->pai.dalloc = &pac_dalloc_impl;
pac->pai.dalloc_batch = &pai_dalloc_batch_default; pac->pai.dalloc_batch = &pai_dalloc_batch_default;
pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
return false; return false;
} }
@ -196,6 +198,11 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata); ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
} }
static uint64_t
pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
return BACKGROUND_THREAD_DEFERRED_MAX;
}
bool bool
pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit, pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
size_t *new_limit) { size_t *new_limit) {

View File

@ -36,6 +36,37 @@ TEST_BEGIN(test_decay_ms_valid) {
} }
TEST_END TEST_END
TEST_BEGIN(test_decay_npages_purge_in) {
decay_t decay;
memset(&decay, 0, sizeof(decay));
nstime_t curtime;
nstime_init(&curtime, 0);
uint64_t decay_ms = 1000;
nstime_t decay_nstime;
nstime_init(&decay_nstime, decay_ms * 1000 * 1000);
expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
"Failed to initialize decay");
const size_t new_pages = 100;
nstime_t time;
nstime_copy(&time, &decay_nstime);
expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
new_pages, "Not all pages are expected to decay in decay_ms");
nstime_init(&time, 0);
expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages), 0,
"More than zero pages are expected to instantly decay");
nstime_copy(&time, &decay_nstime);
nstime_idivide(&time, 2);
expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
new_pages / 2, "Not half of pages decay in half the decay period");
}
TEST_END
TEST_BEGIN(test_decay_maybe_advance_epoch) { TEST_BEGIN(test_decay_maybe_advance_epoch) {
decay_t decay; decay_t decay;
memset(&decay, 0, sizeof(decay)); memset(&decay, 0, sizeof(decay));
@ -244,6 +275,7 @@ main(void) {
return test( return test(
test_decay_init, test_decay_init,
test_decay_ms_valid, test_decay_ms_valid,
test_decay_npages_purge_in,
test_decay_maybe_advance_epoch, test_decay_maybe_advance_epoch,
test_decay_empty, test_decay_empty,
test_decay, test_decay,