diff --git a/Makefile.in b/Makefile.in index 264b077c..aa6f3f62 100644 --- a/Makefile.in +++ b/Makefile.in @@ -91,6 +91,7 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/arena.c \ + $(srcroot)src/background_thread.c \ $(srcroot)src/base.c \ $(srcroot)src/bitmap.c \ $(srcroot)src/ckh.c \ diff --git a/configure.ac b/configure.ac index 6c1d4ffc..8be4be45 100644 --- a/configure.ac +++ b/configure.ac @@ -1443,12 +1443,23 @@ dnl ============================================================================ dnl Configure pthreads. if test "x$abi" != "xpecoff" ; then + AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ]) AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) dnl Some systems may embed pthreads functionality in libc; check for libpthread dnl first, but try libc too before failing. AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)], [AC_SEARCH_LIBS([pthread_create], , , AC_MSG_ERROR([libpthread is missing]))]) + wrap_syms="${wrap_syms} pthread_create" + dnl Check if we have dlsym support. + have_dlsym="1" + AC_CHECK_HEADERS([dlfcn.h], + AC_CHECK_FUNC([dlsym], [], + [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]), + [have_dlsym="0"]) + if test "x$have_dlsym" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ]) + fi JE_COMPILABLE([pthread_atfork(3)], [ #include ], [ @@ -1563,6 +1574,15 @@ if test "x$have_sched_getcpu" = "x1" ; then AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ]) fi +dnl Check if the GNU-specific sched_setaffinity function exists. +AC_CHECK_FUNC([sched_setaffinity], + [have_sched_setaffinity="1"], + [have_sched_setaffinity="0"] + ) +if test "x$have_sched_setaffinity" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ]) +fi + dnl Check if the Solaris/BSD issetugid function exists. AC_CHECK_FUNC([issetugid], [have_issetugid="1"], @@ -1623,15 +1643,11 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then enable_lazy_lock="0" fi if test "x$enable_lazy_lock" = "x1" ; then - if test "x$abi" != "xpecoff" ; then - AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_FUNC([dlsym], [], - [AC_CHECK_LIB([dl], [dlsym], [JE_APPEND_VS(LIBS, -ldl)], - [AC_MSG_ERROR([libdl is missing])]) - ]) + if test "x$have_dlsym" = "x1" ; then + AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) + else + AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.]) fi - AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) - wrap_syms="${wrap_syms} pthread_create" fi AC_SUBST([enable_lazy_lock]) diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h index 292b8d6d..273705f7 100644 --- a/include/jemalloc/internal/arena_externs.h +++ b/include/jemalloc/internal/arena_externs.h @@ -5,7 +5,7 @@ #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" -static const size_t large_pad = +static const size_t large_pad = #ifdef JEMALLOC_CACHE_OBLIVIOUS PAGE #else @@ -13,88 +13,91 @@ static const size_t large_pad = #endif ; -extern ssize_t opt_dirty_decay_ms; -extern ssize_t opt_muzzy_decay_ms; +extern ssize_t opt_dirty_decay_ms; +extern ssize_t opt_muzzy_decay_ms; -extern const arena_bin_info_t arena_bin_info[NBINS]; +extern const arena_bin_info_t arena_bin_info[NBINS]; -extern percpu_arena_mode_t percpu_arena_mode; +extern percpu_arena_mode_t percpu_arena_mode; extern const char *opt_percpu_arena; extern const char *percpu_arena_mode_names[]; +extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; + void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, szind_t szind, uint64_t nrequests); void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size); -void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, +void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent); #ifdef JEMALLOC_JET -size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); +size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); #endif -extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, +extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool *zero); -void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, +void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent); -void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, +void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t oldsize); -void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, +void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t oldsize); ssize_t arena_dirty_decay_ms_get(arena_t *arena); bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); ssize_t arena_muzzy_decay_ms_get(arena_t *arena); bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); -void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all); -void arena_reset(tsd_t *tsd, arena_t *arena); -void arena_destroy(tsd_t *tsd, arena_t *arena); -void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, +void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, + bool all); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_destroy(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, +void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero); typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; -void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero); -void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); -void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, +void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); +void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path); -void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr); -void arena_dalloc_small(tsdn_t *tsdn, void *ptr); +void arena_dalloc_small(tsdn_t *tsdn, void *ptr); bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(arena_t *arena); -bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +dss_prec_t arena_dss_prec_get(arena_t *arena); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_dirty_decay_ms_default_get(void); bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); ssize_t arena_muzzy_decay_ms_default_get(void); bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); -unsigned arena_nthreads_get(arena_t *arena, bool internal); -void arena_nthreads_inc(arena_t *arena, bool internal); -void arena_nthreads_dec(arena_t *arena, bool internal); -size_t arena_extent_sn_next(arena_t *arena); -arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); -void arena_boot(void); -void arena_prefork0(tsdn_t *tsdn, arena_t *arena); -void arena_prefork1(tsdn_t *tsdn, arena_t *arena); -void arena_prefork2(tsdn_t *tsdn, arena_t *arena); -void arena_prefork3(tsdn_t *tsdn, arena_t *arena); -void arena_prefork4(tsdn_t *tsdn, arena_t *arena); -void arena_prefork5(tsdn_t *tsdn, arena_t *arena); -void arena_prefork6(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); -void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +size_t arena_extent_sn_next(arena_t *arena); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_prefork4(tsdn_t *tsdn, arena_t *arena); +void arena_prefork5(tsdn_t *tsdn, arena_t *arena); +void arena_prefork6(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h index ca7af7fd..a1057184 100644 --- a/include/jemalloc/internal/arena_inlines_b.h +++ b/include/jemalloc/internal/arena_inlines_b.h @@ -75,7 +75,7 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { return; } if (unlikely(ticker_ticks(decay_ticker, nticks))) { - arena_decay(tsdn, arena, false); + arena_decay(tsdn, arena, false, false); } } diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h new file mode 100644 index 00000000..993f0e3b --- /dev/null +++ b/include/jemalloc/internal/background_thread_externs.h @@ -0,0 +1,29 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H + +extern bool opt_background_thread; +extern malloc_mutex_t background_thread_lock; +extern atomic_b_t background_thread_enabled_state; +extern size_t n_background_threads; +extern background_thread_info_t *background_thread_info; + +bool background_thread_create(tsd_t *tsd, unsigned arena_ind); +bool background_threads_init(tsd_t *tsd); +bool background_threads_enable(tsd_t *tsd); +bool background_threads_disable(tsd_t *tsd); +bool background_threads_disable_single(tsd_t *tsd, + background_thread_info_t *info); +void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, size_t npages_new); +void background_thread_prefork0(tsdn_t *tsdn); +void background_thread_prefork1(tsdn_t *tsdn); +void background_thread_postfork_parent(tsdn_t *tsdn); +void background_thread_postfork_child(tsdn_t *tsdn); + +#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); +void *load_pthread_create_fptr(void); +#endif + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h new file mode 100644 index 00000000..2709ae31 --- /dev/null +++ b/include/jemalloc/internal/background_thread_inlines.h @@ -0,0 +1,21 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H + +JEMALLOC_ALWAYS_INLINE bool +background_thread_enabled(void) { + return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE void +background_thread_enabled_set(tsdn_t *tsdn, bool state) { + malloc_mutex_assert_owner(tsdn, &background_thread_lock); + atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE background_thread_info_t * +arena_background_thread_info_get(arena_t *arena) { + unsigned arena_ind = arena_ind_get(arena); + return &background_thread_info[arena_ind % ncpus]; +} + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */ diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h new file mode 100644 index 00000000..a43d600d --- /dev/null +++ b/include/jemalloc/internal/background_thread_structs.h @@ -0,0 +1,25 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H + +struct background_thread_info_s { + malloc_mutex_t mtx; +#ifdef JEMALLOC_BACKGROUND_THREAD + /* Background thread is pthread specific. */ + pthread_cond_t cond; + pthread_t thread; + /* Whether the thread has been created. */ + bool started; + /* Next scheduled wakeup time (absolute time). */ + nstime_t next_wakeup; + /* + * Since the last background thread run, newly added number of pages + * that need to be purged by the next wakeup. This is adjusted on + * epoch advance, and is used to determine whether we should signal the + * background thread to wake up earlier. + */ + size_t npages_to_purge_new; +#endif /* ifdef JEMALLOC_BACKGROUND_THREAD */ +}; +typedef struct background_thread_info_s background_thread_info_t; + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */ diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 78ddd376..75576a56 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -301,12 +301,21 @@ /* glibc memalign hook. */ #undef JEMALLOC_GLIBC_MEMALIGN_HOOK +/* pthread support */ +#undef JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#undef JEMALLOC_HAVE_DLSYM + /* Adaptive mutex support in pthreads. */ #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP /* GNU specific sched_getcpu support */ #undef JEMALLOC_HAVE_SCHED_GETCPU +/* GNU specific sched_setaffinity support */ +#undef JEMALLOC_HAVE_SCHED_SETAFFINITY + /* * If defined, jemalloc symbols are not exported (doesn't work when * JEMALLOC_PREFIX is not defined). diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h index cf321c12..45e648bc 100644 --- a/include/jemalloc/internal/jemalloc_internal_includes.h +++ b/include/jemalloc/internal/jemalloc_internal_includes.h @@ -65,6 +65,7 @@ #include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/background_thread_structs.h" /******************************************************************************/ /* EXTERNS */ @@ -82,6 +83,7 @@ #include "jemalloc/internal/large_externs.h" #include "jemalloc/internal/tcache_externs.h" #include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/background_thread_externs.h" /******************************************************************************/ /* INLINES */ @@ -105,5 +107,6 @@ #include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/prof_inlines_b.h" +#include "jemalloc/internal/background_thread_inlines.h" #endif /* JEMALLOC_INTERNAL_INCLUDES_H */ diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in index 9e9225ef..0e876103 100644 --- a/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -169,4 +169,17 @@ static const bool force_ivsalloc = #endif ; +#if (defined(JEMALLOC_HAVE_PTHREAD) && defined(JEMALLOC_HAVE_DLSYM) \ + && !defined(JEMALLOC_OSSPIN) && !defined(JEMALLOC_OS_UNFAIR_LOCK)) +/* Currently background thread supports pthread only. */ +#define JEMALLOC_BACKGROUND_THREAD +#endif +static const bool have_background_thread = +#ifdef JEMALLOC_BACKGROUND_THREAD + true +#else + false +#endif + ; + #endif /* JEMALLOC_PREAMBLE_H */ diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h index 2e14430f..5bca6e8c 100644 --- a/include/jemalloc/internal/smoothstep.h +++ b/include/jemalloc/internal/smoothstep.h @@ -27,206 +27,206 @@ #define SMOOTHSTEP_NSTEPS 200 #define SMOOTHSTEP_BFP 24 #define SMOOTHSTEP \ - /* STEP(step, h, x, y) */ \ - STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ - STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ - STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ - STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ - STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ - STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ - STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ - STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ - STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ - STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ - STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ - STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ - STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ - STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ - STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ - STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ - STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ - STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ - STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ - STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ - STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ - STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ - STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ - STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ - STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ - STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ - STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ - STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ - STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ - STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ - STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ - STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ - STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ - STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ - STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ - STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ - STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ - STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ - STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ - STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ - STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ - STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ - STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ - STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ - STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ - STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ - STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ - STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ - STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ - STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ - STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ - STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ - STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ - STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ - STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ - STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ - STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ - STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ - STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ - STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ - STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ - STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ - STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ - STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ - STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ - STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ - STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ - STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ - STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ - STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ - STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ - STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ - STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ - STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ - STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ - STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ - STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ - STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ - STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ - STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ - STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ - STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ - STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ - STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ - STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ - STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ - STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ - STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ - STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ - STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ - STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ - STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ - STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ - STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ - STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ - STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ - STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ - STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ - STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ - STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ - STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ - STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ - STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ - STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ - STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ - STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ - STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ - STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ - STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ - STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ - STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ - STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ - STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ - STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ - STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ - STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ - STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ - STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ - STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ - STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ - STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ - STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ - STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ - STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ - STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ - STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ - STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ - STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ - STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ - STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ - STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ - STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ - STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ - STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ - STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ - STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ - STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ - STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ - STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ - STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ - STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ - STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ - STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ - STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ - STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ - STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ - STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ - STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ - STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ - STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ - STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ - STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ - STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ - STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ - STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ - STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ - STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ - STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ - STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ - STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ - STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ - STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ - STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ - STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ - STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ - STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ - STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ - STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ - STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ - STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ - STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ - STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ - STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ - STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ - STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ - STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ - STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ - STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ - STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ - STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ - STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ - STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ - STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ - STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ - STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ - STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ - STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ - STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ - STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ - STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ - STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ - STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ - STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ - STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ - STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ - STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ - STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ - STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ - STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ - STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ + /* STEP(step, h, x, y, h_sum) */ \ + STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750, UINT64_C(0x0000000000000014)) \ + STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000, UINT64_C(0x00000000000000b9)) \ + STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250, UINT64_C(0x00000000000002e2)) \ + STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000, UINT64_C(0x00000000000007f8)) \ + STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750, UINT64_C(0x00000000000011d4)) \ + STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000, UINT64_C(0x00000000000022bc)) \ + STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250, UINT64_C(0x0000000000003d60)) \ + STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000, UINT64_C(0x00000000000064d7)) \ + STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750, UINT64_C(0x0000000000009c99)) \ + STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000, UINT64_C(0x000000000000e87f)) \ + STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250, UINT64_C(0x0000000000014cbb)) \ + STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000, UINT64_C(0x000000000001cdda)) \ + STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750, UINT64_C(0x00000000000270bc)) \ + STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000, UINT64_C(0x0000000000033a94)) \ + STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250, UINT64_C(0x00000000000430e3)) \ + STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000, UINT64_C(0x0000000000055974)) \ + STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750, UINT64_C(0x000000000006ba5b)) \ + STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000, UINT64_C(0x00000000000859f0)) \ + STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250, UINT64_C(0x00000000000a3ecc)) \ + STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000, UINT64_C(0x00000000000c6fc8)) \ + STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750, UINT64_C(0x00000000000ef3f8)) \ + STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000, UINT64_C(0x000000000011d2a8)) \ + STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250, UINT64_C(0x0000000000151359)) \ + STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000, UINT64_C(0x000000000018bdc0)) \ + STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750, UINT64_C(0x00000000001cd9c0)) \ + STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000, UINT64_C(0x0000000000216f68)) \ + STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250, UINT64_C(0x00000000002686f3)) \ + STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000, UINT64_C(0x00000000002c28c2)) \ + STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750, UINT64_C(0x0000000000325d5a)) \ + STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000, UINT64_C(0x0000000000392d63)) \ + STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250, UINT64_C(0x000000000040a1a2)) \ + STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000, UINT64_C(0x000000000048c2f9)) \ + STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750, UINT64_C(0x0000000000519a64)) \ + STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000, UINT64_C(0x00000000005b30f5)) \ + STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250, UINT64_C(0x0000000000658fd4)) \ + STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000, UINT64_C(0x000000000070c03b)) \ + STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750, UINT64_C(0x00000000007ccb73)) \ + STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000, UINT64_C(0x000000000089bad1)) \ + STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250, UINT64_C(0x00000000009797b7)) \ + STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000, UINT64_C(0x0000000000a66b8f)) \ + STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750, UINT64_C(0x0000000000b63fc8)) \ + STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000, UINT64_C(0x0000000000c71dd6)) \ + STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250, UINT64_C(0x0000000000d90f2e)) \ + STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000, UINT64_C(0x0000000000ec1d45)) \ + STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750, UINT64_C(0x000000000100518d)) \ + STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000, UINT64_C(0x000000000115b574)) \ + STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250, UINT64_C(0x00000000012c5260)) \ + STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000, UINT64_C(0x00000000014431af)) \ + STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750, UINT64_C(0x00000000015d5cb3)) \ + STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000, UINT64_C(0x000000000177dcb3)) \ + STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250, UINT64_C(0x000000000193bae5)) \ + STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000, UINT64_C(0x0000000001b10070)) \ + STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750, UINT64_C(0x0000000001cfb668)) \ + STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000, UINT64_C(0x0000000001efe5cd)) \ + STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250, UINT64_C(0x0000000002119788)) \ + STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000, UINT64_C(0x000000000234d46b)) \ + STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750, UINT64_C(0x000000000259a52e)) \ + STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000, UINT64_C(0x000000000280126e)) \ + STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250, UINT64_C(0x0000000002a824ab)) \ + STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000, UINT64_C(0x0000000002d1e447)) \ + STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750, UINT64_C(0x0000000002fd5984)) \ + STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000, UINT64_C(0x00000000032a8c82)) \ + STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250, UINT64_C(0x000000000359853e)) \ + STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000, UINT64_C(0x00000000038a4b92)) \ + STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750, UINT64_C(0x0000000003bce731)) \ + STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000, UINT64_C(0x0000000003f15fa6)) \ + STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250, UINT64_C(0x000000000427bc56)) \ + STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000, UINT64_C(0x000000000460047b)) \ + STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750, UINT64_C(0x00000000049a3f23)) \ + STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000, UINT64_C(0x0000000004d67332)) \ + STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250, UINT64_C(0x000000000514a75d)) \ + STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000, UINT64_C(0x000000000554e22b)) \ + STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750, UINT64_C(0x00000000059729f3)) \ + STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000, UINT64_C(0x0000000005db84dc)) \ + STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250, UINT64_C(0x000000000621f8dc)) \ + STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000, UINT64_C(0x00000000066a8bb4)) \ + STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750, UINT64_C(0x0000000006b542f4)) \ + STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000, UINT64_C(0x00000000070223f6)) \ + STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250, UINT64_C(0x00000000075133df)) \ + STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000, UINT64_C(0x0000000007a2779e)) \ + STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750, UINT64_C(0x0000000007f5f3eb)) \ + STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000, UINT64_C(0x00000000084bad46)) \ + STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250, UINT64_C(0x0000000008a3a7f7)) \ + STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000, UINT64_C(0x0000000008fde80c)) \ + STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750, UINT64_C(0x00000000095a715a)) \ + STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000, UINT64_C(0x0000000009b9477c)) \ + STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250, UINT64_C(0x000000000a1a6dd1)) \ + STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000, UINT64_C(0x000000000a7de77d)) \ + STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750, UINT64_C(0x000000000ae3b768)) \ + STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000, UINT64_C(0x000000000b4be03e)) \ + STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250, UINT64_C(0x000000000bb6646d)) \ + STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000, UINT64_C(0x000000000c234628)) \ + STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750, UINT64_C(0x000000000c928762)) \ + STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000, UINT64_C(0x000000000d0429d2)) \ + STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250, UINT64_C(0x000000000d782eef)) \ + STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000, UINT64_C(0x000000000dee97f4)) \ + STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750, UINT64_C(0x000000000e6765db)) \ + STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000, UINT64_C(0x000000000ee29962)) \ + STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250, UINT64_C(0x000000000f603306)) \ + STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000, UINT64_C(0x000000000fe03306)) \ + STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750, UINT64_C(0x0000000010629961)) \ + STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000, UINT64_C(0x0000000010e765d9)) \ + STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250, UINT64_C(0x00000000116e97f1)) \ + STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000, UINT64_C(0x0000000011f82eeb)) \ + STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750, UINT64_C(0x00000000128429cd)) \ + STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000, UINT64_C(0x000000001312875c)) \ + STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250, UINT64_C(0x0000000013a34621)) \ + STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000, UINT64_C(0x0000000014366465)) \ + STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750, UINT64_C(0x0000000014cbe035)) \ + STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000, UINT64_C(0x000000001563b75e)) \ + STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250, UINT64_C(0x0000000015fde772)) \ + STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000, UINT64_C(0x00000000169a6dc5)) \ + STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750, UINT64_C(0x000000001739476f)) \ + STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000, UINT64_C(0x0000000017da714c)) \ + STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250, UINT64_C(0x00000000187de7fd)) \ + STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000, UINT64_C(0x000000001923a7e7)) \ + STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750, UINT64_C(0x0000000019cbad35)) \ + STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000, UINT64_C(0x000000001a75f3d9)) \ + STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250, UINT64_C(0x000000001b22778b)) \ + STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000, UINT64_C(0x000000001bd133cb)) \ + STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750, UINT64_C(0x000000001c8223e1)) \ + STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000, UINT64_C(0x000000001d3542de)) \ + STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250, UINT64_C(0x000000001dea8b9d)) \ + STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000, UINT64_C(0x000000001ea1f8c4)) \ + STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750, UINT64_C(0x000000001f5b84c4)) \ + STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000, UINT64_C(0x00000000201729da)) \ + STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250, UINT64_C(0x0000000020d4e211)) \ + STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000, UINT64_C(0x000000002194a742)) \ + STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750, UINT64_C(0x0000000022567316)) \ + STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000, UINT64_C(0x00000000231a3f06)) \ + STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250, UINT64_C(0x0000000023e0045d)) \ + STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000, UINT64_C(0x0000000024a7bc37)) \ + STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750, UINT64_C(0x0000000025715f86)) \ + STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000, UINT64_C(0x00000000263ce710)) \ + STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250, UINT64_C(0x00000000270a4b70)) \ + STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000, UINT64_C(0x0000000027d9851b)) \ + STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750, UINT64_C(0x0000000028aa8c5e)) \ + STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000, UINT64_C(0x00000000297d595f)) \ + STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250, UINT64_C(0x000000002a51e421)) \ + STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000, UINT64_C(0x000000002b282484)) \ + STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750, UINT64_C(0x000000002c001246)) \ + STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000, UINT64_C(0x000000002cd9a505)) \ + STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250, UINT64_C(0x000000002db4d441)) \ + STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000, UINT64_C(0x000000002e91975d)) \ + STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750, UINT64_C(0x000000002f6fe5a1)) \ + STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000, UINT64_C(0x00000000304fb63b)) \ + STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250, UINT64_C(0x0000000031310042)) \ + STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000, UINT64_C(0x000000003213bab6)) \ + STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750, UINT64_C(0x0000000032f7dc83)) \ + STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000, UINT64_C(0x0000000033dd5c83)) \ + STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250, UINT64_C(0x0000000034c4317e)) \ + STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000, UINT64_C(0x0000000035ac522e)) \ + STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750, UINT64_C(0x000000003695b541)) \ + STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000, UINT64_C(0x0000000037805159)) \ + STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250, UINT64_C(0x00000000386c1d10)) \ + STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000, UINT64_C(0x0000000039590ef8)) \ + STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750, UINT64_C(0x000000003a471d9f)) \ + STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000, UINT64_C(0x000000003b363f90)) \ + STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250, UINT64_C(0x000000003c266b56)) \ + STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000, UINT64_C(0x000000003d17977d)) \ + STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750, UINT64_C(0x000000003e09ba96)) \ + STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000, UINT64_C(0x000000003efccb37)) \ + STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250, UINT64_C(0x000000003ff0bffe)) \ + STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000, UINT64_C(0x0000000040e58f96)) \ + STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750, UINT64_C(0x0000000041db30b6)) \ + STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000, UINT64_C(0x0000000042d19a24)) \ + STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250, UINT64_C(0x0000000043c8c2b8)) \ + STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000, UINT64_C(0x0000000044c0a160)) \ + STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750, UINT64_C(0x0000000045b92d20)) \ + STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000, UINT64_C(0x0000000046b25d16)) \ + STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250, UINT64_C(0x0000000047ac287d)) \ + STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000, UINT64_C(0x0000000048a686ad)) \ + STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750, UINT64_C(0x0000000049a16f21)) \ + STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000, UINT64_C(0x000000004a9cd978)) \ + STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250, UINT64_C(0x000000004b98bd78)) \ + STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000, UINT64_C(0x000000004c951310)) \ + STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750, UINT64_C(0x000000004d91d25e)) \ + STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000, UINT64_C(0x000000004e8ef3ad)) \ + STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250, UINT64_C(0x000000004f8c6f7c)) \ + STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000, UINT64_C(0x00000000508a3e7f)) \ + STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750, UINT64_C(0x00000000518859a2)) \ + STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000, UINT64_C(0x000000005286ba0c)) \ + STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250, UINT64_C(0x0000000053855924)) \ + STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000, UINT64_C(0x0000000054843092)) \ + STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750, UINT64_C(0x0000000055833a42)) \ + STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000, UINT64_C(0x0000000056827069)) \ + STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250, UINT64_C(0x000000005781cd86)) \ + STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000, UINT64_C(0x0000000058814c66)) \ + STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750, UINT64_C(0x000000005980e829)) \ + STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000, UINT64_C(0x000000005a809c42)) \ + STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250, UINT64_C(0x000000005b80647f)) \ + STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000, UINT64_C(0x000000005c803d07)) \ + STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750, UINT64_C(0x000000005d802262)) \ + STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000, UINT64_C(0x000000005e801179)) \ + STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250, UINT64_C(0x000000005f80079c)) \ + STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000, UINT64_C(0x0000000060800285)) \ + STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750, UINT64_C(0x000000006180005b)) \ + STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000, UINT64_C(0x00000000627fffb5)) \ + STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250, UINT64_C(0x00000000637fffa0)) \ + STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000, UINT64_C(0x00000000647fffa0)) \ #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh index 65de97bf..41164615 100755 --- a/include/jemalloc/internal/smoothstep.sh +++ b/include/jemalloc/internal/smoothstep.sh @@ -83,14 +83,16 @@ cat <extents_dirty, extent); if (arena_dirty_decay_ms_get(arena) == 0) { - arena_decay_dirty(tsdn, arena, true); + arena_decay_dirty(tsdn, arena, false, true); } } @@ -606,12 +613,6 @@ arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) { static size_t arena_decay_backlog_npages_limit(const arena_decay_t *decay) { - static const uint64_t h_steps[] = { -#define STEP(step, h, x, y) \ - h, - SMOOTHSTEP -#undef STEP - }; uint64_t sum; size_t npages_limit_backlog; unsigned i; @@ -660,17 +661,27 @@ arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents, arena_decay_backlog_update_last(decay, extents); } +static void +arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, extents_t *extents) { + size_t npages_limit = arena_decay_backlog_npages_limit(decay); + + if (extents_npages_get(extents) > npages_limit) { + arena_decay_to_limit(tsdn, arena, decay, extents, false, + npages_limit); + } +} + static void arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents, const nstime_t *time) { - uint64_t nadvance_u64; - nstime_t delta; - assert(arena_decay_deadline_reached(decay, time)); + nstime_t delta; nstime_copy(&delta, time); nstime_subtract(&delta, &decay->epoch); - nadvance_u64 = nstime_divide(&delta, &decay->interval); + + uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); assert(nadvance_u64 > 0); /* Add nadvance_u64 decay intervals to epoch. */ @@ -686,14 +697,13 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents, } static void -arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena, - arena_decay_t *decay, extents_t *extents) { - size_t npages_limit = arena_decay_backlog_npages_limit(decay); - - if (extents_npages_get(extents) > npages_limit) { - arena_decay_to_limit(tsdn, arena, decay, extents, false, - npages_limit); +arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, + extents_t *extents, const nstime_t *time, bool purge) { + arena_decay_epoch_advance_helper(decay, extents, time); + if (purge) { + arena_decay_try_purge(tsdn, arena, decay, extents); } + /* * There may be concurrent ndirty fluctuation between the purge above * and the nunpurged update below, but this is inconsequential to decay @@ -702,13 +712,6 @@ arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena, decay->nunpurged = extents_npages_get(extents); } -static void -arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, const nstime_t *time) { - arena_decay_epoch_advance_helper(decay, extents, time); - arena_decay_epoch_advance_purge(tsdn, arena, decay, extents); -} - static void arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { arena_decay_ms_write(decay, decay_ms); @@ -759,9 +762,9 @@ arena_decay_ms_valid(ssize_t decay_ms) { return false; } -static void +static bool arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents) { + extents_t *extents, bool is_background_thread) { malloc_mutex_assert_owner(tsdn, &decay->mtx); /* Purge all or nothing if the option is disabled. */ @@ -771,7 +774,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, arena_decay_to_limit(tsdn, arena, decay, extents, false, 0); } - return; + return false; } nstime_t time; @@ -799,11 +802,20 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, * If the deadline has been reached, advance to the current epoch and * purge to the new limit if necessary. Note that dirty pages created * during the current epoch are not subject to purge until a future - * epoch, so as a result purging only happens during epoch advances. + * epoch, so as a result purging only happens during epoch advances, or + * being triggered by background threads (scheduled event). */ - if (arena_decay_deadline_reached(decay, &time)) { - arena_decay_epoch_advance(tsdn, arena, decay, extents, &time); + bool advance_epoch = arena_decay_deadline_reached(decay, &time); + if (advance_epoch) { + bool should_purge = is_background_thread || + !background_thread_enabled(); + arena_decay_epoch_advance(tsdn, arena, decay, extents, &time, + should_purge); + } else if (is_background_thread) { + arena_decay_try_purge(tsdn, arena, decay, extents); } + + return advance_epoch; } static ssize_t @@ -838,7 +850,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, * arbitrary change during initial arena configuration. */ arena_decay_reinit(decay, extents, decay_ms); - arena_maybe_decay(tsdn, arena, decay, extents); + arena_maybe_decay(tsdn, arena, decay, extents, false); malloc_mutex_unlock(tsdn, &decay->mtx); return false; @@ -974,40 +986,57 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, static bool arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool all) { + extents_t *extents, bool is_background_thread, bool all) { if (all) { malloc_mutex_lock(tsdn, &decay->mtx); arena_decay_to_limit(tsdn, arena, decay, extents, all, 0); - } else { - if (malloc_mutex_trylock(tsdn, &decay->mtx)) { - /* No need to wait if another thread is in progress. */ - return true; - } - arena_maybe_decay(tsdn, arena, decay, extents); + malloc_mutex_unlock(tsdn, &decay->mtx); + + return false; + } + + if (malloc_mutex_trylock(tsdn, &decay->mtx)) { + /* No need to wait if another thread is in progress. */ + return true; + } + + bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents, + is_background_thread); + size_t npages_new; + if (epoch_advanced) { + /* Backlog is updated on epoch advance. */ + npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; } malloc_mutex_unlock(tsdn, &decay->mtx); + if (have_background_thread && background_thread_enabled() && + epoch_advanced && !is_background_thread) { + background_thread_interval_check(tsdn, arena, decay, npages_new); + } + return false; } static bool -arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all) { +arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, + bool all) { return arena_decay_impl(tsdn, arena, &arena->decay_dirty, - &arena->extents_dirty, all); + &arena->extents_dirty, is_background_thread, all); } static bool -arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool all) { +arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, + bool all) { return arena_decay_impl(tsdn, arena, &arena->decay_muzzy, - &arena->extents_muzzy, all); + &arena->extents_muzzy, is_background_thread, all); } void -arena_decay(tsdn_t *tsdn, arena_t *arena, bool all) { - if (arena_decay_dirty(tsdn, arena, all)) { +arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { + if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) { return; } - arena_decay_muzzy(tsdn, arena, all); + arena_decay_muzzy(tsdn, arena, is_background_thread, all); } static void @@ -1173,6 +1202,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { * extents, so only retained extents may remain. */ assert(extents_npages_get(&arena->extents_dirty) == 0); + assert(extents_npages_get(&arena->extents_muzzy) == 0); /* Deallocate retained memory. */ arena_destroy_retained(tsd_tsdn(tsd), arena); @@ -1971,19 +2001,35 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } arena->base = base; + /* Set arena before creating background threads. */ + arena_set(ind, arena); nstime_init(&arena->create_time, 0); nstime_update(&arena->create_time); - /* We don't support reetrancy for arena 0 bootstrapping. */ - if (ind != 0 && hooks_arena_new_hook) { + /* We don't support reentrancy for arena 0 bootstrapping. */ + if (ind != 0) { /* * If we're here, then arena 0 already exists, so bootstrapping * is done enough that we should have tsd. */ + assert(!tsdn_null(tsdn)); pre_reentrancy(tsdn_tsd(tsdn)); - hooks_arena_new_hook(); + if (hooks_arena_new_hook) { + hooks_arena_new_hook(); + } post_reentrancy(tsdn_tsd(tsdn)); + + /* background_thread_create() handles reentrancy internally. */ + if (have_background_thread) { + bool err; + malloc_mutex_lock(tsdn, &background_thread_lock); + err = background_thread_create(tsdn_tsd(tsdn), ind); + malloc_mutex_unlock(tsdn, &background_thread_lock); + if (err) { + goto label_error; + } + } } return arena; diff --git a/src/background_thread.c b/src/background_thread.c new file mode 100644 index 00000000..671e57f7 --- /dev/null +++ b/src/background_thread.c @@ -0,0 +1,572 @@ +#define JEMALLOC_BACKGROUND_THREAD_C_ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/assert.h" + +/******************************************************************************/ +/* Data. */ + +/* This option should be opt-in only. */ +#define BACKGROUND_THREAD_DEFAULT false +/* Read-only after initialization. */ +bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; + +/* Used for thread creation, termination and stats. */ +malloc_mutex_t background_thread_lock; +/* Indicates global state. Atomic because decay reads this w/o locking. */ +atomic_b_t background_thread_enabled_state; +size_t n_background_threads; +/* Thread info per-index. */ +background_thread_info_t *background_thread_info; + +/******************************************************************************/ + +#ifndef JEMALLOC_BACKGROUND_THREAD +#define NOT_REACHED { not_reached(); } +bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED +bool background_threads_init(tsd_t *tsd) NOT_REACHED +bool background_threads_enable(tsd_t *tsd) NOT_REACHED +bool background_threads_disable(tsd_t *tsd) NOT_REACHED +bool background_threads_disable_single(tsd_t *tsd, + background_thread_info_t *info) NOT_REACHED +void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, size_t npages_new) NOT_REACHED +void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED +void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED +void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED +void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED +#undef NOT_REACHED +#else +bool +background_threads_init(tsd_t *tsd) { + assert(have_background_thread); + assert(narenas_total_get() > 0); + + background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread); + if (malloc_mutex_init(&background_thread_lock, + "background_thread_global", + WITNESS_RANK_BACKGROUND_THREAD_GLOBAL, + malloc_mutex_rank_exclusive)) { + return true; + } + background_thread_info = (background_thread_info_t *)base_alloc( + tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t), + CACHELINE); + if (background_thread_info == NULL) { + return true; + } + + for (unsigned i = 0; i < ncpus; i++) { + background_thread_info_t *info = &background_thread_info[i]; + if (malloc_mutex_init(&info->mtx, "background_thread", + WITNESS_RANK_BACKGROUND_THREAD, + malloc_mutex_rank_exclusive)) { + return true; + } + if (pthread_cond_init(&info->cond, NULL)) { + return true; + } + info->started = false; + nstime_init(&info->next_wakeup, 0); + info->npages_to_purge_new = 0; + } + + return false; +} + +static inline bool +set_current_thread_affinity(UNUSED int cpu) { +#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); + + return (ret != 0); +#else + return false; +#endif +} + +/* Threshold for determining when to wake up the background thread. */ +#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024) +#define BILLION UINT64_C(1000000000) +/* Minimal sleep interval 100 ms. */ +#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10) +#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX + +static inline size_t +decay_npurge_after_interval(arena_decay_t *decay, size_t interval) { + size_t i; + uint64_t sum = 0; + for (i = 0; i < interval; i++) { + sum += decay->backlog[i] * h_steps[i]; + } + for (; i < SMOOTHSTEP_NSTEPS; i++) { + sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]); + } + + return (size_t)(sum >> SMOOTHSTEP_BFP); +} + +static uint64_t +arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay, + extents_t *extents) { + if (malloc_mutex_trylock(tsdn, &decay->mtx)) { + /* Use minimal interval if decay is contended. */ + return BACKGROUND_THREAD_MIN_INTERVAL_NS; + } + + uint64_t interval; + ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); + if (decay_time <= 0) { + /* Purging is eagerly done or disabled currently. */ + interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; + goto label_done; + } + + uint64_t decay_interval_ns = nstime_ns(&decay->interval); + assert(decay_interval_ns > 0); + size_t npages = extents_npages_get(extents); + if (npages == 0) { + unsigned i; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { + if (decay->backlog[i] > 0) { + break; + } + } + if (i == SMOOTHSTEP_NSTEPS) { + /* No dirty pages recorded. Sleep indefinitely. */ + interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; + goto label_done; + } + } + if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) { + /* Use max interval. */ + interval = decay_interval_ns * SMOOTHSTEP_NSTEPS; + goto label_done; + } + + size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns; + size_t ub = SMOOTHSTEP_NSTEPS; + /* Minimal 2 intervals to ensure reaching next epoch deadline. */ + lb = (lb < 2) ? 2 : lb; + if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) || + (lb + 2 > ub)) { + interval = BACKGROUND_THREAD_MIN_INTERVAL_NS; + goto label_done; + } + + assert(lb + 2 <= ub); + size_t npurge_lb, npurge_ub; + npurge_lb = decay_npurge_after_interval(decay, lb); + if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) { + interval = decay_interval_ns * lb; + goto label_done; + } + npurge_ub = decay_npurge_after_interval(decay, ub); + if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) { + interval = decay_interval_ns * ub; + goto label_done; + } + + unsigned n_search = 0; + size_t target, npurge; + while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub) + && (lb + 2 < ub)) { + target = (lb + ub) / 2; + npurge = decay_npurge_after_interval(decay, target); + if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) { + ub = target; + npurge_ub = npurge; + } else { + lb = target; + npurge_lb = npurge; + } + assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1); + } + interval = decay_interval_ns * (ub + lb) / 2; +label_done: + interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ? + BACKGROUND_THREAD_MIN_INTERVAL_NS : interval; + malloc_mutex_unlock(tsdn, &decay->mtx); + + return interval; +} + +/* Compute purge interval for background threads. */ +static uint64_t +arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) { + uint64_t i1, i2; + i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty, + &arena->extents_dirty); + if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) { + return i1; + } + i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy, + &arena->extents_muzzy); + + return i1 < i2 ? i1 : i2; +} + +static inline uint64_t +background_work_once(tsdn_t *tsdn, unsigned ind) { + arena_t *arena; + unsigned i, narenas; + uint64_t min_interval; + + min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; + narenas = narenas_total_get(); + for (i = ind; i < narenas; i += ncpus) { + arena = arena_get(tsdn, i, false); + if (!arena) { + continue; + } + + arena_decay(tsdn, arena, true, false); + uint64_t interval = arena_decay_compute_purge_interval(tsdn, + arena); + if (interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) { + return interval; + } + + assert(interval > BACKGROUND_THREAD_MIN_INTERVAL_NS); + if (min_interval > interval) { + min_interval = interval; + } + } + + return min_interval; +} + +static void +background_work(tsdn_t *tsdn, unsigned ind) { + int ret; + background_thread_info_t *info = &background_thread_info[ind]; + + malloc_mutex_lock(tsdn, &info->mtx); + while (info->started) { + uint64_t interval = background_work_once(tsdn, ind); + info->npages_to_purge_new = 0; + + if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) { + nstime_init(&info->next_wakeup, + BACKGROUND_THREAD_INDEFINITE_SLEEP); + ret = pthread_cond_wait(&info->cond, &info->mtx.lock); + assert(ret == 0); + continue; + } + + assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS && + interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP); + nstime_init(&info->next_wakeup, 0); + nstime_update(&info->next_wakeup); + info->next_wakeup.ns += interval; + + nstime_t ts_wakeup; + struct timeval tv; + gettimeofday(&tv, NULL); + nstime_init2(&ts_wakeup, tv.tv_sec, + tv.tv_usec * 1000 + interval); + struct timespec ts; + ts.tv_sec = (size_t)nstime_sec(&ts_wakeup); + ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup); + ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, + &ts); + assert(ret == ETIMEDOUT || ret == 0); + } + malloc_mutex_unlock(tsdn, &info->mtx); +} + +static void * +background_thread_entry(void *ind_arg) { + unsigned thread_ind = (unsigned)(uintptr_t)ind_arg; + assert(thread_ind < narenas_total_get() && thread_ind < ncpus); + + if (opt_percpu_arena != percpu_arena_disabled) { + set_current_thread_affinity((int)thread_ind); + } + /* + * Start periodic background work. We avoid fetching tsd to keep the + * background thread "outside", since there may be side effects, for + * example triggering new arena creation (which in turn triggers + * background thread creation). + */ + background_work(TSDN_NULL, thread_ind); + assert(pthread_equal(pthread_self(), + background_thread_info[thread_ind].thread)); + + return NULL; +} + +/* Create a new background thread if needed. */ +bool +background_thread_create(tsd_t *tsd, unsigned arena_ind) { + assert(have_background_thread); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); + + /* We create at most NCPUs threads. */ + size_t thread_ind = arena_ind % ncpus; + background_thread_info_t *info = &background_thread_info[thread_ind]; + + bool need_new_thread; + malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); + need_new_thread = background_thread_enabled() && !info->started; + malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + if (!need_new_thread) { + return false; + } + + pre_reentrancy(tsd); + int err; + load_pthread_create_fptr(); + if ((err = pthread_create(&info->thread, NULL, + background_thread_entry, (void *)thread_ind)) != 0) { + malloc_printf(": arena %u background thread creation " + "failed (%d).\n", arena_ind, err); + } + post_reentrancy(tsd); + + malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); + assert(info->started == false); + if (err == 0) { + info->started = true; + n_background_threads++; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + + return (err != 0); +} + +bool +background_threads_enable(tsd_t *tsd) { + assert(n_background_threads == 0); + assert(background_thread_enabled()); + malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); + + VARIABLE_ARRAY(bool, created, ncpus); + unsigned i, ncreated; + for (i = 0; i < ncpus; i++) { + created[i] = false; + } + ncreated = 0; + + unsigned n = narenas_total_get(); + for (i = 0; i < n; i++) { + if (created[i % ncpus] || + arena_get(tsd_tsdn(tsd), i, false) == NULL) { + continue; + } + if (background_thread_create(tsd, i)) { + return true; + } + created[i % ncpus] = true; + if (++ncreated == ncpus) { + break; + } + } + + return false; +} + +bool +background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) { + malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); + pre_reentrancy(tsd); + + bool has_thread; + malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); + if (info->started) { + has_thread = true; + info->started = false; + pthread_cond_signal(&info->cond); + } else { + has_thread = false; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + + if (!has_thread) { + post_reentrancy(tsd); + return false; + } + void *ret; + if (pthread_join(info->thread, &ret)) { + post_reentrancy(tsd); + return true; + } + assert(ret == NULL); + n_background_threads--; + post_reentrancy(tsd); + + return false; +} + +bool +background_threads_disable(tsd_t *tsd) { + assert(!background_thread_enabled()); + for (unsigned i = 0; i < ncpus; i++) { + background_thread_info_t *info = &background_thread_info[i]; + if (background_threads_disable_single(tsd, info)) { + return true; + } + } + assert(n_background_threads == 0); + + return false; +} + +/* Check if we need to signal the background thread early. */ +void +background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, size_t npages_new) { + background_thread_info_t *info = arena_background_thread_info_get( + arena); + + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Background thread may hold the mutex for a long period of + * time. We'd like to avoid the variance on application + * threads. So keep this non-blocking, and leave the work to a + * future epoch. + */ + return; + } + + if (!info->started) { + goto label_done; + } + if (malloc_mutex_trylock(tsdn, &decay->mtx)) { + goto label_done; + } + + ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); + if (decay_time <= 0) { + /* Purging is eagerly done or disabled currently. */ + goto label_done_unlock2; + } + if (nstime_compare(&info->next_wakeup, &decay->epoch) <= 0) { + goto label_done_unlock2; + } + + uint64_t decay_interval_ns = nstime_ns(&decay->interval); + assert(decay_interval_ns > 0); + nstime_t diff; + nstime_copy(&diff, &info->next_wakeup); + nstime_subtract(&diff, &decay->epoch); + if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) { + goto label_done_unlock2; + } + + if (npages_new > 0) { + size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns); + /* + * Compute how many new pages we would need to purge by the next + * wakeup, which is used to determine if we should signal the + * background thread. + */ + uint64_t npurge_new; + if (n_epoch >= SMOOTHSTEP_NSTEPS) { + npurge_new = npages_new; + } else { + uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1]; + assert(h_steps_max >= + h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); + npurge_new = npages_new * (h_steps_max - + h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); + npurge_new >>= SMOOTHSTEP_BFP; + } + info->npages_to_purge_new += npurge_new; + } + + if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD || + (nstime_ns(&info->next_wakeup) == + BACKGROUND_THREAD_INDEFINITE_SLEEP && info->npages_to_purge_new > 0)) { + info->npages_to_purge_new = 0; + pthread_cond_signal(&info->cond); + } +label_done_unlock2: + malloc_mutex_unlock(tsdn, &decay->mtx); +label_done: + malloc_mutex_unlock(tsdn, &info->mtx); +} + +void +background_thread_prefork0(tsdn_t *tsdn) { + malloc_mutex_prefork(tsdn, &background_thread_lock); + if (background_thread_enabled()) { + background_thread_enabled_set(tsdn, false); + background_threads_disable(tsdn_tsd(tsdn)); + /* Enable again to re-create threads after fork. */ + background_thread_enabled_set(tsdn, true); + } + assert(n_background_threads == 0); +} + +void +background_thread_prefork1(tsdn_t *tsdn) { + for (unsigned i = 0; i < ncpus; i++) { + malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx); + } +} + +static void +background_thread_postfork_init(tsdn_t *tsdn) { + if (background_thread_enabled()) { + background_threads_enable(tsdn_tsd(tsdn)); + } +} + +void +background_thread_postfork_parent(tsdn_t *tsdn) { + for (unsigned i = 0; i < ncpus; i++) { + malloc_mutex_postfork_parent(tsdn, + &background_thread_info[i].mtx); + } + background_thread_postfork_init(tsdn); + malloc_mutex_postfork_parent(tsdn, &background_thread_lock); +} + +void +background_thread_postfork_child(tsdn_t *tsdn) { + for (unsigned i = 0; i < ncpus; i++) { + malloc_mutex_postfork_child(tsdn, + &background_thread_info[i].mtx); + } + malloc_mutex_postfork_child(tsdn, &background_thread_lock); + + malloc_mutex_lock(tsdn, &background_thread_lock); + background_thread_postfork_init(tsdn); + malloc_mutex_unlock(tsdn, &background_thread_lock); +} + +#undef BACKGROUND_THREAD_NPAGES_THRESHOLD +#undef BILLION +#undef BACKGROUND_THREAD_MIN_INTERVAL_NS +#undef BACKGROUND_THREAD_INDEFINITE_SLEEP + +#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */ + +#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +#include + +int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); + +void * +load_pthread_create_fptr(void) { + if (pthread_create_fptr) { + return pthread_create_fptr; + } + + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + malloc_write(": Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + + return pthread_create_fptr; +} + +#endif diff --git a/src/ctl.c b/src/ctl.c index 7f69f151..ee0979a8 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -53,6 +53,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ CTL_PROTO(version) CTL_PROTO(epoch) +CTL_PROTO(background_thread) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_prof_name) @@ -78,6 +79,7 @@ CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) +CTL_PROTO(opt_background_thread) CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_stats_print) @@ -265,6 +267,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, + {NAME("background_thread"), CTL(opt_background_thread)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("stats_print"), CTL(opt_stats_print)}, @@ -501,6 +504,7 @@ static const ctl_named_node_t stats_node[] = { static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, + {NAME("background_thread"), CTL(background_thread)}, {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, @@ -1445,6 +1449,53 @@ label_return: return ret; } +static int +background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + bool oldval; + + if (!have_background_thread) { + return ENOENT; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (newp == NULL) { + oldval = background_thread_enabled(); + READ(oldval, bool); + } else { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + oldval = background_thread_enabled(); + READ(oldval, bool); + + bool newval = *(bool *)newp; + if (newval == oldval) { + ret = 0; + goto label_return; + } + + background_thread_enabled_set(tsd_tsdn(tsd), newval); + if (newval) { + if (background_threads_enable(tsd)) { + ret = EFAULT; + goto label_return; + } + } else { + if (background_threads_disable(tsd)) { + ret = EFAULT; + goto label_return; + } + } + } + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + return ret; +} + /******************************************************************************/ CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) @@ -1466,6 +1517,7 @@ CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *) +CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) @@ -1764,7 +1816,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) { for (i = 0; i < narenas; i++) { if (tarenas[i] != NULL) { - arena_decay(tsdn, tarenas[i], all); + arena_decay(tsdn, tarenas[i], false, + all); } } } else { @@ -1778,7 +1831,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) { malloc_mutex_unlock(tsdn, &ctl_mtx); if (tarena != NULL) { - arena_decay(tsdn, tarena, all); + arena_decay(tsdn, tarena, false, all); } } } @@ -1837,6 +1890,35 @@ label_return: return ret; } +static void +arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) { + /* Temporarily disable the background thread during arena reset. */ + if (have_background_thread) { + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (background_thread_enabled()) { + unsigned ind = arena_ind % ncpus; + background_thread_info_t *info = + &background_thread_info[ind]; + assert(info->started); + background_threads_disable_single(tsd, info); + } + } +} + +static void +arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) { + if (have_background_thread) { + if (background_thread_enabled()) { + unsigned ind = arena_ind % ncpus; + background_thread_info_t *info = + &background_thread_info[ind]; + assert(!info->started); + background_thread_create(tsd, ind); + } + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + } +} + static int arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { @@ -1850,7 +1932,9 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, return ret; } + arena_reset_prepare_background_thread(tsd, arena_ind); arena_reset(tsd, arena); + arena_reset_finish_background_thread(tsd, arena_ind); return ret; } @@ -1875,9 +1959,10 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, goto label_return; } + arena_reset_prepare_background_thread(tsd, arena_ind); /* Merge stats after resetting and purging arena. */ arena_reset(tsd, arena); - arena_decay(tsd_tsdn(tsd), arena, true); + arena_decay(tsd_tsdn(tsd), arena, false, true); ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED); ctl_darena->initialized = true; ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true); @@ -1888,6 +1973,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, /* Record arena index for later recycling via arenas.create. */ ql_elm_new(ctl_arena, destroyed_link); ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link); + arena_reset_finish_background_thread(tsd, arena_ind); assert(ret == 0); label_return: diff --git a/src/jemalloc.c b/src/jemalloc.c index 56aef5b0..b03e5f48 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -420,7 +420,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { /* Actually initialize the arena. */ arena = arena_new(tsdn, ind, extent_hooks); - arena_set(ind, arena); + return arena; } @@ -1140,6 +1140,8 @@ malloc_conf_init(void) { } continue; } + CONF_HANDLE_BOOL(opt_background_thread, + "background_thread"); if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -1380,6 +1382,22 @@ malloc_init_narenas(void) { return false; } +static bool +malloc_init_background_threads(tsd_t *tsd) { + malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock); + if (!have_background_thread) { + if (opt_background_thread) { + malloc_printf(": option background_thread " + "currently supports pthread only. \n"); + return true; + } else { + return false; + } + } + + return background_threads_init(tsd); +} + static bool malloc_init_hard_finish(void) { if (malloc_mutex_boot()) @@ -1421,8 +1439,8 @@ malloc_init_hard(void) { } malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); - /* Need this before prof_boot2 (for allocation). */ - if (malloc_init_narenas()) { + /* Initialize narenas before prof_boot2 (for allocation). */ + if (malloc_init_narenas() || malloc_init_background_threads(tsd)) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); return true; } @@ -1439,6 +1457,23 @@ malloc_init_hard(void) { malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); malloc_tsd_boot1(); + + /* Update TSD after tsd_boot1. */ + tsd = tsd_fetch(); + if (opt_background_thread) { + assert(have_background_thread); + /* + * Need to finish init & unlock first before creating background + * threads (pthread_create depends on malloc). + */ + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + bool err = background_thread_create(tsd, 0); + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + if (err) { + return true; + } + } + return false; } @@ -2970,7 +3005,13 @@ _malloc_prefork(void) ctl_prefork(tsd_tsdn(tsd)); tcache_prefork(tsd_tsdn(tsd)); malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock); + if (have_background_thread) { + background_thread_prefork0(tsd_tsdn(tsd)); + } prof_prefork0(tsd_tsdn(tsd)); + if (have_background_thread) { + background_thread_prefork1(tsd_tsdn(tsd)); + } /* Break arena prefork into stages to preserve lock order. */ for (i = 0; i < 7; i++) { for (j = 0; j < narenas; j++) { @@ -3036,6 +3077,9 @@ _malloc_postfork(void) } } prof_postfork_parent(tsd_tsdn(tsd)); + if (have_background_thread) { + background_thread_postfork_parent(tsd_tsdn(tsd)); + } malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock); tcache_postfork_parent(tsd_tsdn(tsd)); ctl_postfork_parent(tsd_tsdn(tsd)); @@ -3060,6 +3104,9 @@ jemalloc_postfork_child(void) { } } prof_postfork_child(tsd_tsdn(tsd)); + if (have_background_thread) { + background_thread_postfork_child(tsd_tsdn(tsd)); + } malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock); tcache_postfork_child(tsd_tsdn(tsd)); ctl_postfork_child(tsd_tsdn(tsd)); diff --git a/src/mutex.c b/src/mutex.c index b15bbf6e..c92ddd72 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -5,10 +5,6 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" -#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -#include -#endif - #ifndef _CRT_SPINCOUNT #define _CRT_SPINCOUNT 4000 #endif @@ -24,10 +20,6 @@ static bool postpone_init = true; static malloc_mutex_t *postponed_mutexes = NULL; #endif -#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -static void pthread_create_once(void); -#endif - /******************************************************************************/ /* * We intercept pthread_create() calls in order to toggle isthreaded if the @@ -35,18 +27,9 @@ static void pthread_create_once(void); */ #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) -static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, - void *(*)(void *), void *__restrict); - static void pthread_create_once(void) { - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - malloc_write(": Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - + pthread_create_fptr = load_pthread_create_fptr(); isthreaded = true; } diff --git a/src/stats.c b/src/stats.c index 3c9eb35a..f98b8ece 100644 --- a/src/stats.c +++ b/src/stats.c @@ -816,6 +816,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_CHAR_P(dss, ",") OPT_WRITE_UNSIGNED(narenas, ",") OPT_WRITE_CHAR_P(percpu_arena, ",") + OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",") OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",") OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",") OPT_WRITE_CHAR_P(junk, ",") diff --git a/test/integration/extent.c b/test/integration/extent.c index 32432af9..7262b803 100644 --- a/test/integration/extent.c +++ b/test/integration/extent.c @@ -2,6 +2,18 @@ #include "test/extent_hooks.h" +static bool +check_background_thread_enabled(void) { + bool enabled; + size_t sz = sizeof(bool); + int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0); + if (ret == ENOENT) { + return false; + } + assert_d_eq(ret, 0, "Unexpected mallctl error"); + return enabled; +} + static void test_extent_body(unsigned arena_ind) { void *p; @@ -124,6 +136,7 @@ TEST_BEGIN(test_extent_manual_hook) { assert_ptr_ne(old_hooks->merge, extent_merge_hook, "Unexpected extent_hooks error"); + test_skip_if(check_background_thread_enabled()); test_extent_body(arena_ind); /* Restore extent hooks. */ @@ -164,6 +177,7 @@ TEST_BEGIN(test_extent_auto_hook) { assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure"); + test_skip_if(check_background_thread_enabled()); test_extent_body(arena_ind); } TEST_END diff --git a/test/unit/decay.c b/test/unit/decay.c index 19f76fa5..f727bf93 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -9,6 +9,18 @@ static unsigned nupdates_mock; static nstime_t time_mock; static bool monotonic_mock; +static bool +check_background_thread_enabled(void) { + bool enabled; + size_t sz = sizeof(bool); + int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0); + if (ret == ENOENT) { + return false; + } + assert_d_eq(ret, 0, "Unexpected mallctl error"); + return enabled; +} + static bool nstime_monotonic_mock(void) { return monotonic_mock; @@ -167,6 +179,8 @@ generate_dirty(unsigned arena_ind, size_t size) { } TEST_BEGIN(test_decay_ticks) { + test_skip_if(check_background_thread_enabled()); + ticker_t *decay_ticker; unsigned tick0, tick1, arena_ind; size_t sz, large0; @@ -405,6 +419,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt, } TEST_BEGIN(test_decay_ticker) { + test_skip_if(check_background_thread_enabled()); #define NPS 2048 ssize_t ddt = opt_dirty_decay_ms; ssize_t mdt = opt_muzzy_decay_ms; @@ -466,6 +481,7 @@ TEST_BEGIN(test_decay_ticker) { TEST_END TEST_BEGIN(test_decay_nonmonotonic) { + test_skip_if(check_background_thread_enabled()); #define NPS (SMOOTHSTEP_NSTEPS + 1) int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); void *ps[NPS]; @@ -523,6 +539,8 @@ TEST_BEGIN(test_decay_nonmonotonic) { TEST_END TEST_BEGIN(test_decay_now) { + test_skip_if(check_background_thread_enabled()); + unsigned arena_ind = do_arena_create(0, 0); assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages"); assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages"); @@ -541,6 +559,8 @@ TEST_BEGIN(test_decay_now) { TEST_END TEST_BEGIN(test_decay_never) { + test_skip_if(check_background_thread_enabled()); + unsigned arena_ind = do_arena_create(-1, -1); int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages"); diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c index 6e3eb0f9..549aed12 100644 --- a/test/unit/smoothstep.c +++ b/test/unit/smoothstep.c @@ -1,7 +1,7 @@ #include "test/jemalloc_test.h" static const uint64_t smoothstep_tab[] = { -#define STEP(step, h, x, y) \ +#define STEP(step, h, x, y, h_sum) \ h, SMOOTHSTEP #undef STEP diff --git a/test/unit/stats.c b/test/unit/stats.c index f5ee1287..d9849d80 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -115,8 +115,10 @@ TEST_BEGIN(test_stats_arenas_summary) { "Unexepected mallctl() result"); if (config_stats) { - assert_u64_gt(dirty_npurge + muzzy_npurge, 0, - "At least one purge should have occurred"); + if (!background_thread_enabled()) { + assert_u64_gt(dirty_npurge + muzzy_npurge, 0, + "At least one purge should have occurred"); + } assert_u64_le(dirty_nmadvise, dirty_purged, "dirty_nmadvise should be no greater than dirty_purged"); assert_u64_le(muzzy_nmadvise, muzzy_purged,