Implementing opt.background_thread.

Added opt.background_thread to enable background threads, which handles purging
currently.  When enabled, decay ticks will not trigger purging (which will be
left to the background threads).  We limit the max number of threads to NCPUs.
When percpu arena is enabled, set CPU affinity for the background threads as
well.

The sleep interval of background threads is dynamic and determined by computing
number of pages to purge in the future (based on backlog).
This commit is contained in:
Qi Wang 2017-03-17 12:42:33 -07:00 committed by Qi Wang
parent 3f685e8824
commit b693c7868e
23 changed files with 1245 additions and 348 deletions

View File

@ -91,6 +91,7 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je
C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
C_SRCS := $(srcroot)src/jemalloc.c \ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/arena.c \ $(srcroot)src/arena.c \
$(srcroot)src/background_thread.c \
$(srcroot)src/base.c \ $(srcroot)src/base.c \
$(srcroot)src/bitmap.c \ $(srcroot)src/bitmap.c \
$(srcroot)src/ckh.c \ $(srcroot)src/ckh.c \

View File

@ -1443,12 +1443,23 @@ dnl ============================================================================
dnl Configure pthreads. dnl Configure pthreads.
if test "x$abi" != "xpecoff" ; then if test "x$abi" != "xpecoff" ; then
AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
dnl Some systems may embed pthreads functionality in libc; check for libpthread dnl Some systems may embed pthreads functionality in libc; check for libpthread
dnl first, but try libc too before failing. dnl first, but try libc too before failing.
AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)], AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
[AC_SEARCH_LIBS([pthread_create], , , [AC_SEARCH_LIBS([pthread_create], , ,
AC_MSG_ERROR([libpthread is missing]))]) AC_MSG_ERROR([libpthread is missing]))])
wrap_syms="${wrap_syms} pthread_create"
dnl Check if we have dlsym support.
have_dlsym="1"
AC_CHECK_HEADERS([dlfcn.h],
AC_CHECK_FUNC([dlsym], [],
[AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
[have_dlsym="0"])
if test "x$have_dlsym" = "x1" ; then
AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
fi
JE_COMPILABLE([pthread_atfork(3)], [ JE_COMPILABLE([pthread_atfork(3)], [
#include <pthread.h> #include <pthread.h>
], [ ], [
@ -1563,6 +1574,15 @@ if test "x$have_sched_getcpu" = "x1" ; then
AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ]) AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
fi fi
dnl Check if the GNU-specific sched_setaffinity function exists.
AC_CHECK_FUNC([sched_setaffinity],
[have_sched_setaffinity="1"],
[have_sched_setaffinity="0"]
)
if test "x$have_sched_setaffinity" = "x1" ; then
AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
fi
dnl Check if the Solaris/BSD issetugid function exists. dnl Check if the Solaris/BSD issetugid function exists.
AC_CHECK_FUNC([issetugid], AC_CHECK_FUNC([issetugid],
[have_issetugid="1"], [have_issetugid="1"],
@ -1623,15 +1643,11 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
enable_lazy_lock="0" enable_lazy_lock="0"
fi fi
if test "x$enable_lazy_lock" = "x1" ; then if test "x$enable_lazy_lock" = "x1" ; then
if test "x$abi" != "xpecoff" ; then if test "x$have_dlsym" = "x1" ; then
AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
AC_CHECK_FUNC([dlsym], [], else
[AC_CHECK_LIB([dl], [dlsym], [JE_APPEND_VS(LIBS, -ldl)], AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
[AC_MSG_ERROR([libdl is missing])])
])
fi fi
AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
wrap_syms="${wrap_syms} pthread_create"
fi fi
AC_SUBST([enable_lazy_lock]) AC_SUBST([enable_lazy_lock])

View File

@ -5,7 +5,7 @@
#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats.h" #include "jemalloc/internal/stats.h"
static const size_t large_pad = static const size_t large_pad =
#ifdef JEMALLOC_CACHE_OBLIVIOUS #ifdef JEMALLOC_CACHE_OBLIVIOUS
PAGE PAGE
#else #else
@ -13,88 +13,91 @@ static const size_t large_pad =
#endif #endif
; ;
extern ssize_t opt_dirty_decay_ms; extern ssize_t opt_dirty_decay_ms;
extern ssize_t opt_muzzy_decay_ms; extern ssize_t opt_muzzy_decay_ms;
extern const arena_bin_info_t arena_bin_info[NBINS]; extern const arena_bin_info_t arena_bin_info[NBINS];
extern percpu_arena_mode_t percpu_arena_mode; extern percpu_arena_mode_t percpu_arena_mode;
extern const char *opt_percpu_arena; extern const char *opt_percpu_arena;
extern const char *percpu_arena_mode_names[]; extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
szind_t szind, uint64_t nrequests); szind_t szind, uint64_t nrequests);
void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
size_t size); size_t size);
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extent_t *extent); extent_hooks_t **r_extent_hooks, extent_t *extent);
#ifdef JEMALLOC_JET #ifdef JEMALLOC_JET
size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
#endif #endif
extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
size_t usize, size_t alignment, bool *zero); size_t usize, size_t alignment, bool *zero);
void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
extent_t *extent); extent_t *extent);
void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, size_t oldsize); extent_t *extent, size_t oldsize);
void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, size_t oldsize); extent_t *extent, size_t oldsize);
ssize_t arena_dirty_decay_ms_get(arena_t *arena); ssize_t arena_dirty_decay_ms_get(arena_t *arena);
bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
ssize_t arena_muzzy_decay_ms_get(arena_t *arena); ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all); void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void arena_reset(tsd_t *tsd, arena_t *arena); bool all);
void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, void arena_destroy(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
bool zero); bool zero);
typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
szind_t ind, bool zero); szind_t ind, bool zero);
void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
size_t alignment, bool zero, tcache_t *tcache); size_t alignment, bool zero, tcache_t *tcache);
void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
bool slow_path); bool slow_path);
void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, void *ptr); extent_t *extent, void *ptr);
void arena_dalloc_small(tsdn_t *tsdn, void *ptr); void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
size_t extra, bool zero); size_t extra, bool zero);
void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
size_t size, size_t alignment, bool zero, tcache_t *tcache); size_t size, size_t alignment, bool zero, tcache_t *tcache);
dss_prec_t arena_dss_prec_get(arena_t *arena); dss_prec_t arena_dss_prec_get(arena_t *arena);
bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
ssize_t arena_dirty_decay_ms_default_get(void); ssize_t arena_dirty_decay_ms_default_get(void);
bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
ssize_t arena_muzzy_decay_ms_default_get(void); ssize_t arena_muzzy_decay_ms_default_get(void);
bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
unsigned arena_nthreads_get(arena_t *arena, bool internal); unsigned arena_nthreads_get(arena_t *arena, bool internal);
void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal);
size_t arena_extent_sn_next(arena_t *arena); size_t arena_extent_sn_next(arena_t *arena);
arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void arena_boot(void); void arena_boot(void);
void arena_prefork0(tsdn_t *tsdn, arena_t *arena); void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
void arena_prefork1(tsdn_t *tsdn, arena_t *arena); void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
void arena_prefork2(tsdn_t *tsdn, arena_t *arena); void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
void arena_prefork3(tsdn_t *tsdn, arena_t *arena); void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
void arena_prefork4(tsdn_t *tsdn, arena_t *arena); void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
void arena_prefork5(tsdn_t *tsdn, arena_t *arena); void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
void arena_prefork6(tsdn_t *tsdn, arena_t *arena); void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */

View File

@ -75,7 +75,7 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
return; return;
} }
if (unlikely(ticker_ticks(decay_ticker, nticks))) { if (unlikely(ticker_ticks(decay_ticker, nticks))) {
arena_decay(tsdn, arena, false); arena_decay(tsdn, arena, false, false);
} }
} }

View File

@ -0,0 +1,29 @@
#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
extern bool opt_background_thread;
extern malloc_mutex_t background_thread_lock;
extern atomic_b_t background_thread_enabled_state;
extern size_t n_background_threads;
extern background_thread_info_t *background_thread_info;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_init(tsd_t *tsd);
bool background_threads_enable(tsd_t *tsd);
bool background_threads_disable(tsd_t *tsd);
bool background_threads_disable_single(tsd_t *tsd,
background_thread_info_t *info);
void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
arena_decay_t *decay, size_t npages_new);
void background_thread_prefork0(tsdn_t *tsdn);
void background_thread_prefork1(tsdn_t *tsdn);
void background_thread_postfork_parent(tsdn_t *tsdn);
void background_thread_postfork_child(tsdn_t *tsdn);
#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
void *load_pthread_create_fptr(void);
#endif
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */

View File

@ -0,0 +1,21 @@
#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
JEMALLOC_ALWAYS_INLINE bool
background_thread_enabled(void) {
return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE void
background_thread_enabled_set(tsdn_t *tsdn, bool state) {
malloc_mutex_assert_owner(tsdn, &background_thread_lock);
atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE background_thread_info_t *
arena_background_thread_info_get(arena_t *arena) {
unsigned arena_ind = arena_ind_get(arena);
return &background_thread_info[arena_ind % ncpus];
}
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */

View File

@ -0,0 +1,25 @@
#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
struct background_thread_info_s {
malloc_mutex_t mtx;
#ifdef JEMALLOC_BACKGROUND_THREAD
/* Background thread is pthread specific. */
pthread_cond_t cond;
pthread_t thread;
/* Whether the thread has been created. */
bool started;
/* Next scheduled wakeup time (absolute time). */
nstime_t next_wakeup;
/*
* Since the last background thread run, newly added number of pages
* that need to be purged by the next wakeup. This is adjusted on
* epoch advance, and is used to determine whether we should signal the
* background thread to wake up earlier.
*/
size_t npages_to_purge_new;
#endif /* ifdef JEMALLOC_BACKGROUND_THREAD */
};
typedef struct background_thread_info_s background_thread_info_t;
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */

View File

@ -301,12 +301,21 @@
/* glibc memalign hook. */ /* glibc memalign hook. */
#undef JEMALLOC_GLIBC_MEMALIGN_HOOK #undef JEMALLOC_GLIBC_MEMALIGN_HOOK
/* pthread support */
#undef JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#undef JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */ /* Adaptive mutex support in pthreads. */
#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
/* GNU specific sched_getcpu support */ /* GNU specific sched_getcpu support */
#undef JEMALLOC_HAVE_SCHED_GETCPU #undef JEMALLOC_HAVE_SCHED_GETCPU
/* GNU specific sched_setaffinity support */
#undef JEMALLOC_HAVE_SCHED_SETAFFINITY
/* /*
* If defined, jemalloc symbols are not exported (doesn't work when * If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined). * JEMALLOC_PREFIX is not defined).

View File

@ -65,6 +65,7 @@
#include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/arena_structs_b.h"
#include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/rtree_structs.h"
#include "jemalloc/internal/tcache_structs.h" #include "jemalloc/internal/tcache_structs.h"
#include "jemalloc/internal/background_thread_structs.h"
/******************************************************************************/ /******************************************************************************/
/* EXTERNS */ /* EXTERNS */
@ -82,6 +83,7 @@
#include "jemalloc/internal/large_externs.h" #include "jemalloc/internal/large_externs.h"
#include "jemalloc/internal/tcache_externs.h" #include "jemalloc/internal/tcache_externs.h"
#include "jemalloc/internal/prof_externs.h" #include "jemalloc/internal/prof_externs.h"
#include "jemalloc/internal/background_thread_externs.h"
/******************************************************************************/ /******************************************************************************/
/* INLINES */ /* INLINES */
@ -105,5 +107,6 @@
#include "jemalloc/internal/arena_inlines_b.h" #include "jemalloc/internal/arena_inlines_b.h"
#include "jemalloc/internal/jemalloc_internal_inlines_c.h" #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
#include "jemalloc/internal/prof_inlines_b.h" #include "jemalloc/internal/prof_inlines_b.h"
#include "jemalloc/internal/background_thread_inlines.h"
#endif /* JEMALLOC_INTERNAL_INCLUDES_H */ #endif /* JEMALLOC_INTERNAL_INCLUDES_H */

View File

@ -169,4 +169,17 @@ static const bool force_ivsalloc =
#endif #endif
; ;
#if (defined(JEMALLOC_HAVE_PTHREAD) && defined(JEMALLOC_HAVE_DLSYM) \
&& !defined(JEMALLOC_OSSPIN) && !defined(JEMALLOC_OS_UNFAIR_LOCK))
/* Currently background thread supports pthread only. */
#define JEMALLOC_BACKGROUND_THREAD
#endif
static const bool have_background_thread =
#ifdef JEMALLOC_BACKGROUND_THREAD
true
#else
false
#endif
;
#endif /* JEMALLOC_PREAMBLE_H */ #endif /* JEMALLOC_PREAMBLE_H */

View File

@ -27,206 +27,206 @@
#define SMOOTHSTEP_NSTEPS 200 #define SMOOTHSTEP_NSTEPS 200
#define SMOOTHSTEP_BFP 24 #define SMOOTHSTEP_BFP 24
#define SMOOTHSTEP \ #define SMOOTHSTEP \
/* STEP(step, h, x, y) */ \ /* STEP(step, h, x, y, h_sum) */ \
STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750, UINT64_C(0x0000000000000014)) \
STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000, UINT64_C(0x00000000000000b9)) \
STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250, UINT64_C(0x00000000000002e2)) \
STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000, UINT64_C(0x00000000000007f8)) \
STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750, UINT64_C(0x00000000000011d4)) \
STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000, UINT64_C(0x00000000000022bc)) \
STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250, UINT64_C(0x0000000000003d60)) \
STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000, UINT64_C(0x00000000000064d7)) \
STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750, UINT64_C(0x0000000000009c99)) \
STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000, UINT64_C(0x000000000000e87f)) \
STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250, UINT64_C(0x0000000000014cbb)) \
STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000, UINT64_C(0x000000000001cdda)) \
STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750, UINT64_C(0x00000000000270bc)) \
STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000, UINT64_C(0x0000000000033a94)) \
STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250, UINT64_C(0x00000000000430e3)) \
STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000, UINT64_C(0x0000000000055974)) \
STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750, UINT64_C(0x000000000006ba5b)) \
STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000, UINT64_C(0x00000000000859f0)) \
STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250, UINT64_C(0x00000000000a3ecc)) \
STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000, UINT64_C(0x00000000000c6fc8)) \
STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750, UINT64_C(0x00000000000ef3f8)) \
STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000, UINT64_C(0x000000000011d2a8)) \
STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250, UINT64_C(0x0000000000151359)) \
STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000, UINT64_C(0x000000000018bdc0)) \
STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750, UINT64_C(0x00000000001cd9c0)) \
STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000, UINT64_C(0x0000000000216f68)) \
STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250, UINT64_C(0x00000000002686f3)) \
STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000, UINT64_C(0x00000000002c28c2)) \
STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750, UINT64_C(0x0000000000325d5a)) \
STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000, UINT64_C(0x0000000000392d63)) \
STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250, UINT64_C(0x000000000040a1a2)) \
STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000, UINT64_C(0x000000000048c2f9)) \
STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750, UINT64_C(0x0000000000519a64)) \
STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000, UINT64_C(0x00000000005b30f5)) \
STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250, UINT64_C(0x0000000000658fd4)) \
STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000, UINT64_C(0x000000000070c03b)) \
STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750, UINT64_C(0x00000000007ccb73)) \
STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000, UINT64_C(0x000000000089bad1)) \
STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250, UINT64_C(0x00000000009797b7)) \
STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000, UINT64_C(0x0000000000a66b8f)) \
STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750, UINT64_C(0x0000000000b63fc8)) \
STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000, UINT64_C(0x0000000000c71dd6)) \
STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250, UINT64_C(0x0000000000d90f2e)) \
STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000, UINT64_C(0x0000000000ec1d45)) \
STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750, UINT64_C(0x000000000100518d)) \
STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000, UINT64_C(0x000000000115b574)) \
STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250, UINT64_C(0x00000000012c5260)) \
STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000, UINT64_C(0x00000000014431af)) \
STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750, UINT64_C(0x00000000015d5cb3)) \
STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000, UINT64_C(0x000000000177dcb3)) \
STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250, UINT64_C(0x000000000193bae5)) \
STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000, UINT64_C(0x0000000001b10070)) \
STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750, UINT64_C(0x0000000001cfb668)) \
STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000, UINT64_C(0x0000000001efe5cd)) \
STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250, UINT64_C(0x0000000002119788)) \
STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000, UINT64_C(0x000000000234d46b)) \
STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750, UINT64_C(0x000000000259a52e)) \
STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000, UINT64_C(0x000000000280126e)) \
STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250, UINT64_C(0x0000000002a824ab)) \
STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000, UINT64_C(0x0000000002d1e447)) \
STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750, UINT64_C(0x0000000002fd5984)) \
STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000, UINT64_C(0x00000000032a8c82)) \
STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250, UINT64_C(0x000000000359853e)) \
STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000, UINT64_C(0x00000000038a4b92)) \
STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750, UINT64_C(0x0000000003bce731)) \
STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000, UINT64_C(0x0000000003f15fa6)) \
STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250, UINT64_C(0x000000000427bc56)) \
STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000, UINT64_C(0x000000000460047b)) \
STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750, UINT64_C(0x00000000049a3f23)) \
STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000, UINT64_C(0x0000000004d67332)) \
STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250, UINT64_C(0x000000000514a75d)) \
STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000, UINT64_C(0x000000000554e22b)) \
STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750, UINT64_C(0x00000000059729f3)) \
STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000, UINT64_C(0x0000000005db84dc)) \
STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250, UINT64_C(0x000000000621f8dc)) \
STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000, UINT64_C(0x00000000066a8bb4)) \
STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750, UINT64_C(0x0000000006b542f4)) \
STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000, UINT64_C(0x00000000070223f6)) \
STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250, UINT64_C(0x00000000075133df)) \
STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000, UINT64_C(0x0000000007a2779e)) \
STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750, UINT64_C(0x0000000007f5f3eb)) \
STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000, UINT64_C(0x00000000084bad46)) \
STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250, UINT64_C(0x0000000008a3a7f7)) \
STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000, UINT64_C(0x0000000008fde80c)) \
STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750, UINT64_C(0x00000000095a715a)) \
STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000, UINT64_C(0x0000000009b9477c)) \
STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250, UINT64_C(0x000000000a1a6dd1)) \
STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000, UINT64_C(0x000000000a7de77d)) \
STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750, UINT64_C(0x000000000ae3b768)) \
STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000, UINT64_C(0x000000000b4be03e)) \
STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250, UINT64_C(0x000000000bb6646d)) \
STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000, UINT64_C(0x000000000c234628)) \
STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750, UINT64_C(0x000000000c928762)) \
STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000, UINT64_C(0x000000000d0429d2)) \
STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250, UINT64_C(0x000000000d782eef)) \
STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000, UINT64_C(0x000000000dee97f4)) \
STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750, UINT64_C(0x000000000e6765db)) \
STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000, UINT64_C(0x000000000ee29962)) \
STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250, UINT64_C(0x000000000f603306)) \
STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000, UINT64_C(0x000000000fe03306)) \
STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750, UINT64_C(0x0000000010629961)) \
STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000, UINT64_C(0x0000000010e765d9)) \
STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250, UINT64_C(0x00000000116e97f1)) \
STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000, UINT64_C(0x0000000011f82eeb)) \
STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750, UINT64_C(0x00000000128429cd)) \
STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000, UINT64_C(0x000000001312875c)) \
STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250, UINT64_C(0x0000000013a34621)) \
STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000, UINT64_C(0x0000000014366465)) \
STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750, UINT64_C(0x0000000014cbe035)) \
STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000, UINT64_C(0x000000001563b75e)) \
STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250, UINT64_C(0x0000000015fde772)) \
STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000, UINT64_C(0x00000000169a6dc5)) \
STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750, UINT64_C(0x000000001739476f)) \
STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000, UINT64_C(0x0000000017da714c)) \
STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250, UINT64_C(0x00000000187de7fd)) \
STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000, UINT64_C(0x000000001923a7e7)) \
STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750, UINT64_C(0x0000000019cbad35)) \
STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000, UINT64_C(0x000000001a75f3d9)) \
STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250, UINT64_C(0x000000001b22778b)) \
STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000, UINT64_C(0x000000001bd133cb)) \
STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750, UINT64_C(0x000000001c8223e1)) \
STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000, UINT64_C(0x000000001d3542de)) \
STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250, UINT64_C(0x000000001dea8b9d)) \
STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000, UINT64_C(0x000000001ea1f8c4)) \
STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750, UINT64_C(0x000000001f5b84c4)) \
STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000, UINT64_C(0x00000000201729da)) \
STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250, UINT64_C(0x0000000020d4e211)) \
STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000, UINT64_C(0x000000002194a742)) \
STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750, UINT64_C(0x0000000022567316)) \
STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000, UINT64_C(0x00000000231a3f06)) \
STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250, UINT64_C(0x0000000023e0045d)) \
STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000, UINT64_C(0x0000000024a7bc37)) \
STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750, UINT64_C(0x0000000025715f86)) \
STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000, UINT64_C(0x00000000263ce710)) \
STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250, UINT64_C(0x00000000270a4b70)) \
STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000, UINT64_C(0x0000000027d9851b)) \
STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750, UINT64_C(0x0000000028aa8c5e)) \
STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000, UINT64_C(0x00000000297d595f)) \
STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250, UINT64_C(0x000000002a51e421)) \
STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000, UINT64_C(0x000000002b282484)) \
STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750, UINT64_C(0x000000002c001246)) \
STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000, UINT64_C(0x000000002cd9a505)) \
STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250, UINT64_C(0x000000002db4d441)) \
STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000, UINT64_C(0x000000002e91975d)) \
STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750, UINT64_C(0x000000002f6fe5a1)) \
STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000, UINT64_C(0x00000000304fb63b)) \
STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250, UINT64_C(0x0000000031310042)) \
STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000, UINT64_C(0x000000003213bab6)) \
STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750, UINT64_C(0x0000000032f7dc83)) \
STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000, UINT64_C(0x0000000033dd5c83)) \
STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250, UINT64_C(0x0000000034c4317e)) \
STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000, UINT64_C(0x0000000035ac522e)) \
STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750, UINT64_C(0x000000003695b541)) \
STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000, UINT64_C(0x0000000037805159)) \
STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250, UINT64_C(0x00000000386c1d10)) \
STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000, UINT64_C(0x0000000039590ef8)) \
STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750, UINT64_C(0x000000003a471d9f)) \
STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000, UINT64_C(0x000000003b363f90)) \
STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250, UINT64_C(0x000000003c266b56)) \
STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000, UINT64_C(0x000000003d17977d)) \
STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750, UINT64_C(0x000000003e09ba96)) \
STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000, UINT64_C(0x000000003efccb37)) \
STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250, UINT64_C(0x000000003ff0bffe)) \
STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000, UINT64_C(0x0000000040e58f96)) \
STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750, UINT64_C(0x0000000041db30b6)) \
STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000, UINT64_C(0x0000000042d19a24)) \
STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250, UINT64_C(0x0000000043c8c2b8)) \
STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000, UINT64_C(0x0000000044c0a160)) \
STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750, UINT64_C(0x0000000045b92d20)) \
STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000, UINT64_C(0x0000000046b25d16)) \
STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250, UINT64_C(0x0000000047ac287d)) \
STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000, UINT64_C(0x0000000048a686ad)) \
STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750, UINT64_C(0x0000000049a16f21)) \
STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000, UINT64_C(0x000000004a9cd978)) \
STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250, UINT64_C(0x000000004b98bd78)) \
STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000, UINT64_C(0x000000004c951310)) \
STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750, UINT64_C(0x000000004d91d25e)) \
STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000, UINT64_C(0x000000004e8ef3ad)) \
STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250, UINT64_C(0x000000004f8c6f7c)) \
STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000, UINT64_C(0x00000000508a3e7f)) \
STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750, UINT64_C(0x00000000518859a2)) \
STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000, UINT64_C(0x000000005286ba0c)) \
STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250, UINT64_C(0x0000000053855924)) \
STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000, UINT64_C(0x0000000054843092)) \
STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750, UINT64_C(0x0000000055833a42)) \
STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000, UINT64_C(0x0000000056827069)) \
STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250, UINT64_C(0x000000005781cd86)) \
STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000, UINT64_C(0x0000000058814c66)) \
STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750, UINT64_C(0x000000005980e829)) \
STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000, UINT64_C(0x000000005a809c42)) \
STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250, UINT64_C(0x000000005b80647f)) \
STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000, UINT64_C(0x000000005c803d07)) \
STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750, UINT64_C(0x000000005d802262)) \
STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000, UINT64_C(0x000000005e801179)) \
STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250, UINT64_C(0x000000005f80079c)) \
STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000, UINT64_C(0x0000000060800285)) \
STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750, UINT64_C(0x000000006180005b)) \
STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000, UINT64_C(0x00000000627fffb5)) \
STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250, UINT64_C(0x00000000637fffa0)) \
STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000, UINT64_C(0x00000000647fffa0)) \
#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */

View File

@ -83,14 +83,16 @@ cat <<EOF
#define SMOOTHSTEP_NSTEPS ${nsteps} #define SMOOTHSTEP_NSTEPS ${nsteps}
#define SMOOTHSTEP_BFP ${bfp} #define SMOOTHSTEP_BFP ${bfp}
#define SMOOTHSTEP \\ #define SMOOTHSTEP \\
/* STEP(step, h, x, y) */ \\ /* STEP(step, h, x, y, h_sum) */ \\
EOF EOF
s=1 s=1
h_sum=0
while [ $s -le $nsteps ] ; do while [ $s -le $nsteps ] ; do
$variant ${s} $variant ${s}
x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
printf ' STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y} h_sum=$((h_sum+h))
printf ' STEP(%4d, UINT64_C(0x%016x), %s, %s, UINT64_C(0x%016x)) \\\n' ${s} ${h} ${x} ${y} ${h_sum}
s=$((s+1)) s=$((s+1))
done done

View File

@ -22,11 +22,15 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
#define WITNESS_RANK_TCACHES 2U #define WITNESS_RANK_TCACHES 2U
#define WITNESS_RANK_ARENAS 3U #define WITNESS_RANK_ARENAS 3U
#define WITNESS_RANK_PROF_DUMP 4U #define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL 4U
#define WITNESS_RANK_PROF_BT2GCTX 5U
#define WITNESS_RANK_PROF_TDATAS 6U #define WITNESS_RANK_PROF_DUMP 5U
#define WITNESS_RANK_PROF_TDATA 7U #define WITNESS_RANK_PROF_BT2GCTX 6U
#define WITNESS_RANK_PROF_GCTX 8U #define WITNESS_RANK_PROF_TDATAS 7U
#define WITNESS_RANK_PROF_TDATA 8U
#define WITNESS_RANK_PROF_GCTX 9U
#define WITNESS_RANK_BACKGROUND_THREAD 10U
/* /*
* Used as an argument to witness_assert_depth_to_rank() in order to validate * Used as an argument to witness_assert_depth_to_rank() in order to validate
@ -34,17 +38,17 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
* witness_assert_depth_to_rank() is inclusive rather than exclusive, this * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
* definition can have the same value as the minimally ranked core lock. * definition can have the same value as the minimally ranked core lock.
*/ */
#define WITNESS_RANK_CORE 9U #define WITNESS_RANK_CORE 11U
#define WITNESS_RANK_DECAY 9U #define WITNESS_RANK_DECAY 11U
#define WITNESS_RANK_TCACHE_QL 10U #define WITNESS_RANK_TCACHE_QL 12U
#define WITNESS_RANK_EXTENTS 11U #define WITNESS_RANK_EXTENTS 13U
#define WITNESS_RANK_EXTENT_FREELIST 12U #define WITNESS_RANK_EXTENT_FREELIST 14U
#define WITNESS_RANK_EXTENT_POOL 13U #define WITNESS_RANK_EXTENT_POOL 15U
#define WITNESS_RANK_RTREE 14U #define WITNESS_RANK_RTREE 16U
#define WITNESS_RANK_BASE 15U #define WITNESS_RANK_BASE 17U
#define WITNESS_RANK_ARENA_LARGE 16U #define WITNESS_RANK_ARENA_LARGE 18U
#define WITNESS_RANK_LEAF 0xffffffffU #define WITNESS_RANK_LEAF 0xffffffffU
#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF #define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF

View File

@ -9,14 +9,13 @@
/******************************************************************************/ /******************************************************************************/
/* Data. */ /* Data. */
const char *percpu_arena_mode_names[] = { const char *percpu_arena_mode_names[] = {
"disabled", "disabled",
"percpu", "percpu",
"phycpu" "phycpu"
}; };
const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT; percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT; ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
@ -24,7 +23,7 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t dirty_decay_ms_default;
static atomic_zd_t muzzy_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default;
const arena_bin_info_t arena_bin_info[NBINS] = { const arena_bin_info_t arena_bin_info[NBINS] = {
#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ #define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \
{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
#define BIN_INFO_bin_no(reg_size, slab_size, nregs) #define BIN_INFO_bin_no(reg_size, slab_size, nregs)
@ -39,6 +38,13 @@ const arena_bin_info_t arena_bin_info[NBINS] = {
#undef SC #undef SC
}; };
const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
#define STEP(step, h, x, y, h_sum) \
h,
SMOOTHSTEP
#undef STEP
};
/******************************************************************************/ /******************************************************************************/
/* /*
* Function prototypes for static functions that are referenced prior to * Function prototypes for static functions that are referenced prior to
@ -47,7 +53,8 @@ const arena_bin_info_t arena_bin_info[NBINS] = {
static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit); arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all); static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
bool is_background_thread, bool all);
static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
arena_bin_t *bin); arena_bin_t *bin);
static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@ -359,7 +366,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty, extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
extent); extent);
if (arena_dirty_decay_ms_get(arena) == 0) { if (arena_dirty_decay_ms_get(arena) == 0) {
arena_decay_dirty(tsdn, arena, true); arena_decay_dirty(tsdn, arena, false, true);
} }
} }
@ -606,12 +613,6 @@ arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
static size_t static size_t
arena_decay_backlog_npages_limit(const arena_decay_t *decay) { arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
static const uint64_t h_steps[] = {
#define STEP(step, h, x, y) \
h,
SMOOTHSTEP
#undef STEP
};
uint64_t sum; uint64_t sum;
size_t npages_limit_backlog; size_t npages_limit_backlog;
unsigned i; unsigned i;
@ -660,17 +661,27 @@ arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
arena_decay_backlog_update_last(decay, extents); arena_decay_backlog_update_last(decay, extents);
} }
static void
arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena,
arena_decay_t *decay, extents_t *extents) {
size_t npages_limit = arena_decay_backlog_npages_limit(decay);
if (extents_npages_get(extents) > npages_limit) {
arena_decay_to_limit(tsdn, arena, decay, extents, false,
npages_limit);
}
}
static void static void
arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents, arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
const nstime_t *time) { const nstime_t *time) {
uint64_t nadvance_u64;
nstime_t delta;
assert(arena_decay_deadline_reached(decay, time)); assert(arena_decay_deadline_reached(decay, time));
nstime_t delta;
nstime_copy(&delta, time); nstime_copy(&delta, time);
nstime_subtract(&delta, &decay->epoch); nstime_subtract(&delta, &decay->epoch);
nadvance_u64 = nstime_divide(&delta, &decay->interval);
uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
assert(nadvance_u64 > 0); assert(nadvance_u64 > 0);
/* Add nadvance_u64 decay intervals to epoch. */ /* Add nadvance_u64 decay intervals to epoch. */
@ -686,14 +697,13 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
} }
static void static void
arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
arena_decay_t *decay, extents_t *extents) { extents_t *extents, const nstime_t *time, bool purge) {
size_t npages_limit = arena_decay_backlog_npages_limit(decay); arena_decay_epoch_advance_helper(decay, extents, time);
if (purge) {
if (extents_npages_get(extents) > npages_limit) { arena_decay_try_purge(tsdn, arena, decay, extents);
arena_decay_to_limit(tsdn, arena, decay, extents, false,
npages_limit);
} }
/* /*
* There may be concurrent ndirty fluctuation between the purge above * There may be concurrent ndirty fluctuation between the purge above
* and the nunpurged update below, but this is inconsequential to decay * and the nunpurged update below, but this is inconsequential to decay
@ -702,13 +712,6 @@ arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena,
decay->nunpurged = extents_npages_get(extents); decay->nunpurged = extents_npages_get(extents);
} }
static void
arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
extents_t *extents, const nstime_t *time) {
arena_decay_epoch_advance_helper(decay, extents, time);
arena_decay_epoch_advance_purge(tsdn, arena, decay, extents);
}
static void static void
arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
arena_decay_ms_write(decay, decay_ms); arena_decay_ms_write(decay, decay_ms);
@ -759,9 +762,9 @@ arena_decay_ms_valid(ssize_t decay_ms) {
return false; return false;
} }
static void static bool
arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
extents_t *extents) { extents_t *extents, bool is_background_thread) {
malloc_mutex_assert_owner(tsdn, &decay->mtx); malloc_mutex_assert_owner(tsdn, &decay->mtx);
/* Purge all or nothing if the option is disabled. */ /* Purge all or nothing if the option is disabled. */
@ -771,7 +774,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
arena_decay_to_limit(tsdn, arena, decay, extents, false, arena_decay_to_limit(tsdn, arena, decay, extents, false,
0); 0);
} }
return; return false;
} }
nstime_t time; nstime_t time;
@ -799,11 +802,20 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
* If the deadline has been reached, advance to the current epoch and * If the deadline has been reached, advance to the current epoch and
* purge to the new limit if necessary. Note that dirty pages created * purge to the new limit if necessary. Note that dirty pages created
* during the current epoch are not subject to purge until a future * during the current epoch are not subject to purge until a future
* epoch, so as a result purging only happens during epoch advances. * epoch, so as a result purging only happens during epoch advances, or
* being triggered by background threads (scheduled event).
*/ */
if (arena_decay_deadline_reached(decay, &time)) { bool advance_epoch = arena_decay_deadline_reached(decay, &time);
arena_decay_epoch_advance(tsdn, arena, decay, extents, &time); if (advance_epoch) {
bool should_purge = is_background_thread ||
!background_thread_enabled();
arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
should_purge);
} else if (is_background_thread) {
arena_decay_try_purge(tsdn, arena, decay, extents);
} }
return advance_epoch;
} }
static ssize_t static ssize_t
@ -838,7 +850,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
* arbitrary change during initial arena configuration. * arbitrary change during initial arena configuration.
*/ */
arena_decay_reinit(decay, extents, decay_ms); arena_decay_reinit(decay, extents, decay_ms);
arena_maybe_decay(tsdn, arena, decay, extents); arena_maybe_decay(tsdn, arena, decay, extents, false);
malloc_mutex_unlock(tsdn, &decay->mtx); malloc_mutex_unlock(tsdn, &decay->mtx);
return false; return false;
@ -974,40 +986,57 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
static bool static bool
arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
extents_t *extents, bool all) { extents_t *extents, bool is_background_thread, bool all) {
if (all) { if (all) {
malloc_mutex_lock(tsdn, &decay->mtx); malloc_mutex_lock(tsdn, &decay->mtx);
arena_decay_to_limit(tsdn, arena, decay, extents, all, 0); arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
} else { malloc_mutex_unlock(tsdn, &decay->mtx);
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
/* No need to wait if another thread is in progress. */ return false;
return true; }
}
arena_maybe_decay(tsdn, arena, decay, extents); if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
/* No need to wait if another thread is in progress. */
return true;
}
bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
is_background_thread);
size_t npages_new;
if (epoch_advanced) {
/* Backlog is updated on epoch advance. */
npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
} }
malloc_mutex_unlock(tsdn, &decay->mtx); malloc_mutex_unlock(tsdn, &decay->mtx);
if (have_background_thread && background_thread_enabled() &&
epoch_advanced && !is_background_thread) {
background_thread_interval_check(tsdn, arena, decay, npages_new);
}
return false; return false;
} }
static bool static bool
arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all) { arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
bool all) {
return arena_decay_impl(tsdn, arena, &arena->decay_dirty, return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
&arena->extents_dirty, all); &arena->extents_dirty, is_background_thread, all);
} }
static bool static bool
arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool all) { arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
bool all) {
return arena_decay_impl(tsdn, arena, &arena->decay_muzzy, return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
&arena->extents_muzzy, all); &arena->extents_muzzy, is_background_thread, all);
} }
void void
arena_decay(tsdn_t *tsdn, arena_t *arena, bool all) { arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
if (arena_decay_dirty(tsdn, arena, all)) { if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
return; return;
} }
arena_decay_muzzy(tsdn, arena, all); arena_decay_muzzy(tsdn, arena, is_background_thread, all);
} }
static void static void
@ -1173,6 +1202,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
* extents, so only retained extents may remain. * extents, so only retained extents may remain.
*/ */
assert(extents_npages_get(&arena->extents_dirty) == 0); assert(extents_npages_get(&arena->extents_dirty) == 0);
assert(extents_npages_get(&arena->extents_muzzy) == 0);
/* Deallocate retained memory. */ /* Deallocate retained memory. */
arena_destroy_retained(tsd_tsdn(tsd), arena); arena_destroy_retained(tsd_tsdn(tsd), arena);
@ -1971,19 +2001,35 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
} }
arena->base = base; arena->base = base;
/* Set arena before creating background threads. */
arena_set(ind, arena);
nstime_init(&arena->create_time, 0); nstime_init(&arena->create_time, 0);
nstime_update(&arena->create_time); nstime_update(&arena->create_time);
/* We don't support reetrancy for arena 0 bootstrapping. */ /* We don't support reentrancy for arena 0 bootstrapping. */
if (ind != 0 && hooks_arena_new_hook) { if (ind != 0) {
/* /*
* If we're here, then arena 0 already exists, so bootstrapping * If we're here, then arena 0 already exists, so bootstrapping
* is done enough that we should have tsd. * is done enough that we should have tsd.
*/ */
assert(!tsdn_null(tsdn));
pre_reentrancy(tsdn_tsd(tsdn)); pre_reentrancy(tsdn_tsd(tsdn));
hooks_arena_new_hook(); if (hooks_arena_new_hook) {
hooks_arena_new_hook();
}
post_reentrancy(tsdn_tsd(tsdn)); post_reentrancy(tsdn_tsd(tsdn));
/* background_thread_create() handles reentrancy internally. */
if (have_background_thread) {
bool err;
malloc_mutex_lock(tsdn, &background_thread_lock);
err = background_thread_create(tsdn_tsd(tsdn), ind);
malloc_mutex_unlock(tsdn, &background_thread_lock);
if (err) {
goto label_error;
}
}
} }
return arena; return arena;

572
src/background_thread.c Normal file
View File

@ -0,0 +1,572 @@
#define JEMALLOC_BACKGROUND_THREAD_C_
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/assert.h"
/******************************************************************************/
/* Data. */
/* This option should be opt-in only. */
#define BACKGROUND_THREAD_DEFAULT false
/* Read-only after initialization. */
bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
/* Used for thread creation, termination and stats. */
malloc_mutex_t background_thread_lock;
/* Indicates global state. Atomic because decay reads this w/o locking. */
atomic_b_t background_thread_enabled_state;
size_t n_background_threads;
/* Thread info per-index. */
background_thread_info_t *background_thread_info;
/******************************************************************************/
#ifndef JEMALLOC_BACKGROUND_THREAD
#define NOT_REACHED { not_reached(); }
bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
bool background_threads_init(tsd_t *tsd) NOT_REACHED
bool background_threads_enable(tsd_t *tsd) NOT_REACHED
bool background_threads_disable(tsd_t *tsd) NOT_REACHED
bool background_threads_disable_single(tsd_t *tsd,
background_thread_info_t *info) NOT_REACHED
void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
arena_decay_t *decay, size_t npages_new) NOT_REACHED
void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
#undef NOT_REACHED
#else
bool
background_threads_init(tsd_t *tsd) {
assert(have_background_thread);
assert(narenas_total_get() > 0);
background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread);
if (malloc_mutex_init(&background_thread_lock,
"background_thread_global",
WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
malloc_mutex_rank_exclusive)) {
return true;
}
background_thread_info = (background_thread_info_t *)base_alloc(
tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t),
CACHELINE);
if (background_thread_info == NULL) {
return true;
}
for (unsigned i = 0; i < ncpus; i++) {
background_thread_info_t *info = &background_thread_info[i];
if (malloc_mutex_init(&info->mtx, "background_thread",
WITNESS_RANK_BACKGROUND_THREAD,
malloc_mutex_rank_exclusive)) {
return true;
}
if (pthread_cond_init(&info->cond, NULL)) {
return true;
}
info->started = false;
nstime_init(&info->next_wakeup, 0);
info->npages_to_purge_new = 0;
}
return false;
}
static inline bool
set_current_thread_affinity(UNUSED int cpu) {
#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
return (ret != 0);
#else
return false;
#endif
}
/* Threshold for determining when to wake up the background thread. */
#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
#define BILLION UINT64_C(1000000000)
/* Minimal sleep interval 100 ms. */
#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
static inline size_t
decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
size_t i;
uint64_t sum = 0;
for (i = 0; i < interval; i++) {
sum += decay->backlog[i] * h_steps[i];
}
for (; i < SMOOTHSTEP_NSTEPS; i++) {
sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
}
return (size_t)(sum >> SMOOTHSTEP_BFP);
}
static uint64_t
arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
extents_t *extents) {
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
/* Use minimal interval if decay is contended. */
return BACKGROUND_THREAD_MIN_INTERVAL_NS;
}
uint64_t interval;
ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
if (decay_time <= 0) {
/* Purging is eagerly done or disabled currently. */
interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
goto label_done;
}
uint64_t decay_interval_ns = nstime_ns(&decay->interval);
assert(decay_interval_ns > 0);
size_t npages = extents_npages_get(extents);
if (npages == 0) {
unsigned i;
for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
if (decay->backlog[i] > 0) {
break;
}
}
if (i == SMOOTHSTEP_NSTEPS) {
/* No dirty pages recorded. Sleep indefinitely. */
interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
goto label_done;
}
}
if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
/* Use max interval. */
interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
goto label_done;
}
size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
size_t ub = SMOOTHSTEP_NSTEPS;
/* Minimal 2 intervals to ensure reaching next epoch deadline. */
lb = (lb < 2) ? 2 : lb;
if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
(lb + 2 > ub)) {
interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
goto label_done;
}
assert(lb + 2 <= ub);
size_t npurge_lb, npurge_ub;
npurge_lb = decay_npurge_after_interval(decay, lb);
if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
interval = decay_interval_ns * lb;
goto label_done;
}
npurge_ub = decay_npurge_after_interval(decay, ub);
if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
interval = decay_interval_ns * ub;
goto label_done;
}
unsigned n_search = 0;
size_t target, npurge;
while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
&& (lb + 2 < ub)) {
target = (lb + ub) / 2;
npurge = decay_npurge_after_interval(decay, target);
if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
ub = target;
npurge_ub = npurge;
} else {
lb = target;
npurge_lb = npurge;
}
assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
}
interval = decay_interval_ns * (ub + lb) / 2;
label_done:
interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
malloc_mutex_unlock(tsdn, &decay->mtx);
return interval;
}
/* Compute purge interval for background threads. */
static uint64_t
arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
uint64_t i1, i2;
i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
&arena->extents_dirty);
if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
return i1;
}
i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
&arena->extents_muzzy);
return i1 < i2 ? i1 : i2;
}
static inline uint64_t
background_work_once(tsdn_t *tsdn, unsigned ind) {
arena_t *arena;
unsigned i, narenas;
uint64_t min_interval;
min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
narenas = narenas_total_get();
for (i = ind; i < narenas; i += ncpus) {
arena = arena_get(tsdn, i, false);
if (!arena) {
continue;
}
arena_decay(tsdn, arena, true, false);
uint64_t interval = arena_decay_compute_purge_interval(tsdn,
arena);
if (interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
return interval;
}
assert(interval > BACKGROUND_THREAD_MIN_INTERVAL_NS);
if (min_interval > interval) {
min_interval = interval;
}
}
return min_interval;
}
static void
background_work(tsdn_t *tsdn, unsigned ind) {
int ret;
background_thread_info_t *info = &background_thread_info[ind];
malloc_mutex_lock(tsdn, &info->mtx);
while (info->started) {
uint64_t interval = background_work_once(tsdn, ind);
info->npages_to_purge_new = 0;
if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
nstime_init(&info->next_wakeup,
BACKGROUND_THREAD_INDEFINITE_SLEEP);
ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
assert(ret == 0);
continue;
}
assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
nstime_init(&info->next_wakeup, 0);
nstime_update(&info->next_wakeup);
info->next_wakeup.ns += interval;
nstime_t ts_wakeup;
struct timeval tv;
gettimeofday(&tv, NULL);
nstime_init2(&ts_wakeup, tv.tv_sec,
tv.tv_usec * 1000 + interval);
struct timespec ts;
ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock,
&ts);
assert(ret == ETIMEDOUT || ret == 0);
}
malloc_mutex_unlock(tsdn, &info->mtx);
}
static void *
background_thread_entry(void *ind_arg) {
unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
assert(thread_ind < narenas_total_get() && thread_ind < ncpus);
if (opt_percpu_arena != percpu_arena_disabled) {
set_current_thread_affinity((int)thread_ind);
}
/*
* Start periodic background work. We avoid fetching tsd to keep the
* background thread "outside", since there may be side effects, for
* example triggering new arena creation (which in turn triggers
* background thread creation).
*/
background_work(TSDN_NULL, thread_ind);
assert(pthread_equal(pthread_self(),
background_thread_info[thread_ind].thread));
return NULL;
}
/* Create a new background thread if needed. */
bool
background_thread_create(tsd_t *tsd, unsigned arena_ind) {
assert(have_background_thread);
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
/* We create at most NCPUs threads. */
size_t thread_ind = arena_ind % ncpus;
background_thread_info_t *info = &background_thread_info[thread_ind];
bool need_new_thread;
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
need_new_thread = background_thread_enabled() && !info->started;
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
if (!need_new_thread) {
return false;
}
pre_reentrancy(tsd);
int err;
load_pthread_create_fptr();
if ((err = pthread_create(&info->thread, NULL,
background_thread_entry, (void *)thread_ind)) != 0) {
malloc_printf("<jemalloc>: arena %u background thread creation "
"failed (%d).\n", arena_ind, err);
}
post_reentrancy(tsd);
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
assert(info->started == false);
if (err == 0) {
info->started = true;
n_background_threads++;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
return (err != 0);
}
bool
background_threads_enable(tsd_t *tsd) {
assert(n_background_threads == 0);
assert(background_thread_enabled());
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
VARIABLE_ARRAY(bool, created, ncpus);
unsigned i, ncreated;
for (i = 0; i < ncpus; i++) {
created[i] = false;
}
ncreated = 0;
unsigned n = narenas_total_get();
for (i = 0; i < n; i++) {
if (created[i % ncpus] ||
arena_get(tsd_tsdn(tsd), i, false) == NULL) {
continue;
}
if (background_thread_create(tsd, i)) {
return true;
}
created[i % ncpus] = true;
if (++ncreated == ncpus) {
break;
}
}
return false;
}
bool
background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
pre_reentrancy(tsd);
bool has_thread;
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
if (info->started) {
has_thread = true;
info->started = false;
pthread_cond_signal(&info->cond);
} else {
has_thread = false;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
if (!has_thread) {
post_reentrancy(tsd);
return false;
}
void *ret;
if (pthread_join(info->thread, &ret)) {
post_reentrancy(tsd);
return true;
}
assert(ret == NULL);
n_background_threads--;
post_reentrancy(tsd);
return false;
}
bool
background_threads_disable(tsd_t *tsd) {
assert(!background_thread_enabled());
for (unsigned i = 0; i < ncpus; i++) {
background_thread_info_t *info = &background_thread_info[i];
if (background_threads_disable_single(tsd, info)) {
return true;
}
}
assert(n_background_threads == 0);
return false;
}
/* Check if we need to signal the background thread early. */
void
background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
arena_decay_t *decay, size_t npages_new) {
background_thread_info_t *info = arena_background_thread_info_get(
arena);
if (malloc_mutex_trylock(tsdn, &info->mtx)) {
/*
* Background thread may hold the mutex for a long period of
* time. We'd like to avoid the variance on application
* threads. So keep this non-blocking, and leave the work to a
* future epoch.
*/
return;
}
if (!info->started) {
goto label_done;
}
if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
goto label_done;
}
ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
if (decay_time <= 0) {
/* Purging is eagerly done or disabled currently. */
goto label_done_unlock2;
}
if (nstime_compare(&info->next_wakeup, &decay->epoch) <= 0) {
goto label_done_unlock2;
}
uint64_t decay_interval_ns = nstime_ns(&decay->interval);
assert(decay_interval_ns > 0);
nstime_t diff;
nstime_copy(&diff, &info->next_wakeup);
nstime_subtract(&diff, &decay->epoch);
if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
goto label_done_unlock2;
}
if (npages_new > 0) {
size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
/*
* Compute how many new pages we would need to purge by the next
* wakeup, which is used to determine if we should signal the
* background thread.
*/
uint64_t npurge_new;
if (n_epoch >= SMOOTHSTEP_NSTEPS) {
npurge_new = npages_new;
} else {
uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
assert(h_steps_max >=
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npurge_new = npages_new * (h_steps_max -
h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
npurge_new >>= SMOOTHSTEP_BFP;
}
info->npages_to_purge_new += npurge_new;
}
if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD ||
(nstime_ns(&info->next_wakeup) ==
BACKGROUND_THREAD_INDEFINITE_SLEEP && info->npages_to_purge_new > 0)) {
info->npages_to_purge_new = 0;
pthread_cond_signal(&info->cond);
}
label_done_unlock2:
malloc_mutex_unlock(tsdn, &decay->mtx);
label_done:
malloc_mutex_unlock(tsdn, &info->mtx);
}
void
background_thread_prefork0(tsdn_t *tsdn) {
malloc_mutex_prefork(tsdn, &background_thread_lock);
if (background_thread_enabled()) {
background_thread_enabled_set(tsdn, false);
background_threads_disable(tsdn_tsd(tsdn));
/* Enable again to re-create threads after fork. */
background_thread_enabled_set(tsdn, true);
}
assert(n_background_threads == 0);
}
void
background_thread_prefork1(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
}
}
static void
background_thread_postfork_init(tsdn_t *tsdn) {
if (background_thread_enabled()) {
background_threads_enable(tsdn_tsd(tsdn));
}
}
void
background_thread_postfork_parent(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
malloc_mutex_postfork_parent(tsdn,
&background_thread_info[i].mtx);
}
background_thread_postfork_init(tsdn);
malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
}
void
background_thread_postfork_child(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
malloc_mutex_postfork_child(tsdn,
&background_thread_info[i].mtx);
}
malloc_mutex_postfork_child(tsdn, &background_thread_lock);
malloc_mutex_lock(tsdn, &background_thread_lock);
background_thread_postfork_init(tsdn);
malloc_mutex_unlock(tsdn, &background_thread_lock);
}
#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
#undef BILLION
#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
#undef BACKGROUND_THREAD_INDEFINITE_SLEEP
#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
#include <dlfcn.h>
int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
void *
load_pthread_create_fptr(void) {
if (pthread_create_fptr) {
return pthread_create_fptr;
}
pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
if (pthread_create_fptr == NULL) {
malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
"\"pthread_create\")\n");
abort();
}
return pthread_create_fptr;
}
#endif

View File

@ -53,6 +53,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \
CTL_PROTO(version) CTL_PROTO(version)
CTL_PROTO(epoch) CTL_PROTO(epoch)
CTL_PROTO(background_thread)
CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_enabled)
CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_tcache_flush)
CTL_PROTO(thread_prof_name) CTL_PROTO(thread_prof_name)
@ -78,6 +79,7 @@ CTL_PROTO(opt_retain)
CTL_PROTO(opt_dss) CTL_PROTO(opt_dss)
CTL_PROTO(opt_narenas) CTL_PROTO(opt_narenas)
CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_percpu_arena)
CTL_PROTO(opt_background_thread)
CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_dirty_decay_ms)
CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_muzzy_decay_ms)
CTL_PROTO(opt_stats_print) CTL_PROTO(opt_stats_print)
@ -265,6 +267,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("dss"), CTL(opt_dss)}, {NAME("dss"), CTL(opt_dss)},
{NAME("narenas"), CTL(opt_narenas)}, {NAME("narenas"), CTL(opt_narenas)},
{NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)},
{NAME("background_thread"), CTL(opt_background_thread)},
{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
{NAME("stats_print"), CTL(opt_stats_print)}, {NAME("stats_print"), CTL(opt_stats_print)},
@ -501,6 +504,7 @@ static const ctl_named_node_t stats_node[] = {
static const ctl_named_node_t root_node[] = { static const ctl_named_node_t root_node[] = {
{NAME("version"), CTL(version)}, {NAME("version"), CTL(version)},
{NAME("epoch"), CTL(epoch)}, {NAME("epoch"), CTL(epoch)},
{NAME("background_thread"), CTL(background_thread)},
{NAME("thread"), CHILD(named, thread)}, {NAME("thread"), CHILD(named, thread)},
{NAME("config"), CHILD(named, config)}, {NAME("config"), CHILD(named, config)},
{NAME("opt"), CHILD(named, opt)}, {NAME("opt"), CHILD(named, opt)},
@ -1445,6 +1449,53 @@ label_return:
return ret; return ret;
} }
static int
background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
int ret;
bool oldval;
if (!have_background_thread) {
return ENOENT;
}
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
if (newp == NULL) {
oldval = background_thread_enabled();
READ(oldval, bool);
} else {
if (newlen != sizeof(bool)) {
ret = EINVAL;
goto label_return;
}
oldval = background_thread_enabled();
READ(oldval, bool);
bool newval = *(bool *)newp;
if (newval == oldval) {
ret = 0;
goto label_return;
}
background_thread_enabled_set(tsd_tsdn(tsd), newval);
if (newval) {
if (background_threads_enable(tsd)) {
ret = EFAULT;
goto label_return;
}
} else {
if (background_threads_disable(tsd)) {
ret = EFAULT;
goto label_return;
}
}
}
ret = 0;
label_return:
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
return ret;
}
/******************************************************************************/ /******************************************************************************/
CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
@ -1466,6 +1517,7 @@ CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *) CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
@ -1764,7 +1816,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
for (i = 0; i < narenas; i++) { for (i = 0; i < narenas; i++) {
if (tarenas[i] != NULL) { if (tarenas[i] != NULL) {
arena_decay(tsdn, tarenas[i], all); arena_decay(tsdn, tarenas[i], false,
all);
} }
} }
} else { } else {
@ -1778,7 +1831,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
malloc_mutex_unlock(tsdn, &ctl_mtx); malloc_mutex_unlock(tsdn, &ctl_mtx);
if (tarena != NULL) { if (tarena != NULL) {
arena_decay(tsdn, tarena, all); arena_decay(tsdn, tarena, false, all);
} }
} }
} }
@ -1837,6 +1890,35 @@ label_return:
return ret; return ret;
} }
static void
arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
/* Temporarily disable the background thread during arena reset. */
if (have_background_thread) {
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
if (background_thread_enabled()) {
unsigned ind = arena_ind % ncpus;
background_thread_info_t *info =
&background_thread_info[ind];
assert(info->started);
background_threads_disable_single(tsd, info);
}
}
}
static void
arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
if (have_background_thread) {
if (background_thread_enabled()) {
unsigned ind = arena_ind % ncpus;
background_thread_info_t *info =
&background_thread_info[ind];
assert(!info->started);
background_thread_create(tsd, ind);
}
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
}
}
static int static int
arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
size_t *oldlenp, void *newp, size_t newlen) { size_t *oldlenp, void *newp, size_t newlen) {
@ -1850,7 +1932,9 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
return ret; return ret;
} }
arena_reset_prepare_background_thread(tsd, arena_ind);
arena_reset(tsd, arena); arena_reset(tsd, arena);
arena_reset_finish_background_thread(tsd, arena_ind);
return ret; return ret;
} }
@ -1875,9 +1959,10 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
goto label_return; goto label_return;
} }
arena_reset_prepare_background_thread(tsd, arena_ind);
/* Merge stats after resetting and purging arena. */ /* Merge stats after resetting and purging arena. */
arena_reset(tsd, arena); arena_reset(tsd, arena);
arena_decay(tsd_tsdn(tsd), arena, true); arena_decay(tsd_tsdn(tsd), arena, false, true);
ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED); ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED);
ctl_darena->initialized = true; ctl_darena->initialized = true;
ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true); ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true);
@ -1888,6 +1973,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
/* Record arena index for later recycling via arenas.create. */ /* Record arena index for later recycling via arenas.create. */
ql_elm_new(ctl_arena, destroyed_link); ql_elm_new(ctl_arena, destroyed_link);
ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link); ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
arena_reset_finish_background_thread(tsd, arena_ind);
assert(ret == 0); assert(ret == 0);
label_return: label_return:

View File

@ -420,7 +420,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
/* Actually initialize the arena. */ /* Actually initialize the arena. */
arena = arena_new(tsdn, ind, extent_hooks); arena = arena_new(tsdn, ind, extent_hooks);
arena_set(ind, arena);
return arena; return arena;
} }
@ -1140,6 +1140,8 @@ malloc_conf_init(void) {
} }
continue; continue;
} }
CONF_HANDLE_BOOL(opt_background_thread,
"background_thread");
if (config_prof) { if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_BOOL(opt_prof, "prof")
CONF_HANDLE_CHAR_P(opt_prof_prefix, CONF_HANDLE_CHAR_P(opt_prof_prefix,
@ -1380,6 +1382,22 @@ malloc_init_narenas(void) {
return false; return false;
} }
static bool
malloc_init_background_threads(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock);
if (!have_background_thread) {
if (opt_background_thread) {
malloc_printf("<jemalloc>: option background_thread "
"currently supports pthread only. \n");
return true;
} else {
return false;
}
}
return background_threads_init(tsd);
}
static bool static bool
malloc_init_hard_finish(void) { malloc_init_hard_finish(void) {
if (malloc_mutex_boot()) if (malloc_mutex_boot())
@ -1421,8 +1439,8 @@ malloc_init_hard(void) {
} }
malloc_mutex_lock(tsd_tsdn(tsd), &init_lock); malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
/* Need this before prof_boot2 (for allocation). */ /* Initialize narenas before prof_boot2 (for allocation). */
if (malloc_init_narenas()) { if (malloc_init_narenas() || malloc_init_background_threads(tsd)) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
return true; return true;
} }
@ -1439,6 +1457,23 @@ malloc_init_hard(void) {
malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
malloc_tsd_boot1(); malloc_tsd_boot1();
/* Update TSD after tsd_boot1. */
tsd = tsd_fetch();
if (opt_background_thread) {
assert(have_background_thread);
/*
* Need to finish init & unlock first before creating background
* threads (pthread_create depends on malloc).
*/
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
bool err = background_thread_create(tsd, 0);
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
if (err) {
return true;
}
}
return false; return false;
} }
@ -2970,7 +3005,13 @@ _malloc_prefork(void)
ctl_prefork(tsd_tsdn(tsd)); ctl_prefork(tsd_tsdn(tsd));
tcache_prefork(tsd_tsdn(tsd)); tcache_prefork(tsd_tsdn(tsd));
malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock); malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
if (have_background_thread) {
background_thread_prefork0(tsd_tsdn(tsd));
}
prof_prefork0(tsd_tsdn(tsd)); prof_prefork0(tsd_tsdn(tsd));
if (have_background_thread) {
background_thread_prefork1(tsd_tsdn(tsd));
}
/* Break arena prefork into stages to preserve lock order. */ /* Break arena prefork into stages to preserve lock order. */
for (i = 0; i < 7; i++) { for (i = 0; i < 7; i++) {
for (j = 0; j < narenas; j++) { for (j = 0; j < narenas; j++) {
@ -3036,6 +3077,9 @@ _malloc_postfork(void)
} }
} }
prof_postfork_parent(tsd_tsdn(tsd)); prof_postfork_parent(tsd_tsdn(tsd));
if (have_background_thread) {
background_thread_postfork_parent(tsd_tsdn(tsd));
}
malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock); malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
tcache_postfork_parent(tsd_tsdn(tsd)); tcache_postfork_parent(tsd_tsdn(tsd));
ctl_postfork_parent(tsd_tsdn(tsd)); ctl_postfork_parent(tsd_tsdn(tsd));
@ -3060,6 +3104,9 @@ jemalloc_postfork_child(void) {
} }
} }
prof_postfork_child(tsd_tsdn(tsd)); prof_postfork_child(tsd_tsdn(tsd));
if (have_background_thread) {
background_thread_postfork_child(tsd_tsdn(tsd));
}
malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock); malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
tcache_postfork_child(tsd_tsdn(tsd)); tcache_postfork_child(tsd_tsdn(tsd));
ctl_postfork_child(tsd_tsdn(tsd)); ctl_postfork_child(tsd_tsdn(tsd));

View File

@ -5,10 +5,6 @@
#include "jemalloc/internal/assert.h" #include "jemalloc/internal/assert.h"
#include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/malloc_io.h"
#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
#include <dlfcn.h>
#endif
#ifndef _CRT_SPINCOUNT #ifndef _CRT_SPINCOUNT
#define _CRT_SPINCOUNT 4000 #define _CRT_SPINCOUNT 4000
#endif #endif
@ -24,10 +20,6 @@ static bool postpone_init = true;
static malloc_mutex_t *postponed_mutexes = NULL; static malloc_mutex_t *postponed_mutexes = NULL;
#endif #endif
#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
static void pthread_create_once(void);
#endif
/******************************************************************************/ /******************************************************************************/
/* /*
* We intercept pthread_create() calls in order to toggle isthreaded if the * We intercept pthread_create() calls in order to toggle isthreaded if the
@ -35,18 +27,9 @@ static void pthread_create_once(void);
*/ */
#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
static void static void
pthread_create_once(void) { pthread_create_once(void) {
pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); pthread_create_fptr = load_pthread_create_fptr();
if (pthread_create_fptr == NULL) {
malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
"\"pthread_create\")\n");
abort();
}
isthreaded = true; isthreaded = true;
} }

View File

@ -816,6 +816,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
OPT_WRITE_CHAR_P(dss, ",") OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",") OPT_WRITE_UNSIGNED(narenas, ",")
OPT_WRITE_CHAR_P(percpu_arena, ",") OPT_WRITE_CHAR_P(percpu_arena, ",")
OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",") OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",") OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
OPT_WRITE_CHAR_P(junk, ",") OPT_WRITE_CHAR_P(junk, ",")

View File

@ -2,6 +2,18 @@
#include "test/extent_hooks.h" #include "test/extent_hooks.h"
static bool
check_background_thread_enabled(void) {
bool enabled;
size_t sz = sizeof(bool);
int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
if (ret == ENOENT) {
return false;
}
assert_d_eq(ret, 0, "Unexpected mallctl error");
return enabled;
}
static void static void
test_extent_body(unsigned arena_ind) { test_extent_body(unsigned arena_ind) {
void *p; void *p;
@ -124,6 +136,7 @@ TEST_BEGIN(test_extent_manual_hook) {
assert_ptr_ne(old_hooks->merge, extent_merge_hook, assert_ptr_ne(old_hooks->merge, extent_merge_hook,
"Unexpected extent_hooks error"); "Unexpected extent_hooks error");
test_skip_if(check_background_thread_enabled());
test_extent_body(arena_ind); test_extent_body(arena_ind);
/* Restore extent hooks. */ /* Restore extent hooks. */
@ -164,6 +177,7 @@ TEST_BEGIN(test_extent_auto_hook) {
assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
(void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure"); (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
test_skip_if(check_background_thread_enabled());
test_extent_body(arena_ind); test_extent_body(arena_ind);
} }
TEST_END TEST_END

View File

@ -9,6 +9,18 @@ static unsigned nupdates_mock;
static nstime_t time_mock; static nstime_t time_mock;
static bool monotonic_mock; static bool monotonic_mock;
static bool
check_background_thread_enabled(void) {
bool enabled;
size_t sz = sizeof(bool);
int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
if (ret == ENOENT) {
return false;
}
assert_d_eq(ret, 0, "Unexpected mallctl error");
return enabled;
}
static bool static bool
nstime_monotonic_mock(void) { nstime_monotonic_mock(void) {
return monotonic_mock; return monotonic_mock;
@ -167,6 +179,8 @@ generate_dirty(unsigned arena_ind, size_t size) {
} }
TEST_BEGIN(test_decay_ticks) { TEST_BEGIN(test_decay_ticks) {
test_skip_if(check_background_thread_enabled());
ticker_t *decay_ticker; ticker_t *decay_ticker;
unsigned tick0, tick1, arena_ind; unsigned tick0, tick1, arena_ind;
size_t sz, large0; size_t sz, large0;
@ -405,6 +419,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
} }
TEST_BEGIN(test_decay_ticker) { TEST_BEGIN(test_decay_ticker) {
test_skip_if(check_background_thread_enabled());
#define NPS 2048 #define NPS 2048
ssize_t ddt = opt_dirty_decay_ms; ssize_t ddt = opt_dirty_decay_ms;
ssize_t mdt = opt_muzzy_decay_ms; ssize_t mdt = opt_muzzy_decay_ms;
@ -466,6 +481,7 @@ TEST_BEGIN(test_decay_ticker) {
TEST_END TEST_END
TEST_BEGIN(test_decay_nonmonotonic) { TEST_BEGIN(test_decay_nonmonotonic) {
test_skip_if(check_background_thread_enabled());
#define NPS (SMOOTHSTEP_NSTEPS + 1) #define NPS (SMOOTHSTEP_NSTEPS + 1)
int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
void *ps[NPS]; void *ps[NPS];
@ -523,6 +539,8 @@ TEST_BEGIN(test_decay_nonmonotonic) {
TEST_END TEST_END
TEST_BEGIN(test_decay_now) { TEST_BEGIN(test_decay_now) {
test_skip_if(check_background_thread_enabled());
unsigned arena_ind = do_arena_create(0, 0); unsigned arena_ind = do_arena_create(0, 0);
assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages"); assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages"); assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
@ -541,6 +559,8 @@ TEST_BEGIN(test_decay_now) {
TEST_END TEST_END
TEST_BEGIN(test_decay_never) { TEST_BEGIN(test_decay_never) {
test_skip_if(check_background_thread_enabled());
unsigned arena_ind = do_arena_create(-1, -1); unsigned arena_ind = do_arena_create(-1, -1);
int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE; int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages"); assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");

View File

@ -1,7 +1,7 @@
#include "test/jemalloc_test.h" #include "test/jemalloc_test.h"
static const uint64_t smoothstep_tab[] = { static const uint64_t smoothstep_tab[] = {
#define STEP(step, h, x, y) \ #define STEP(step, h, x, y, h_sum) \
h, h,
SMOOTHSTEP SMOOTHSTEP
#undef STEP #undef STEP

View File

@ -115,8 +115,10 @@ TEST_BEGIN(test_stats_arenas_summary) {
"Unexepected mallctl() result"); "Unexepected mallctl() result");
if (config_stats) { if (config_stats) {
assert_u64_gt(dirty_npurge + muzzy_npurge, 0, if (!background_thread_enabled()) {
"At least one purge should have occurred"); assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
"At least one purge should have occurred");
}
assert_u64_le(dirty_nmadvise, dirty_purged, assert_u64_le(dirty_nmadvise, dirty_purged,
"dirty_nmadvise should be no greater than dirty_purged"); "dirty_nmadvise should be no greater than dirty_purged");
assert_u64_le(muzzy_nmadvise, muzzy_purged, assert_u64_le(muzzy_nmadvise, muzzy_purged,