From d4ac7582f32f506d5203bea2f0115076202add38 Mon Sep 17 00:00:00 2001 From: David Goldblatt Date: Wed, 25 Jan 2017 09:54:27 -0800 Subject: [PATCH] Introduce a backport of C11 atomics This introduces a backport of C11 atomics. It has four implementations; ranked in order of preference, they are: - GCC/Clang __atomic builtins - GCC/Clang __sync builtins - MSVC _Interlocked builtins - C11 atomics, from The primary advantages are: - Close adherence to the standard API gives us a defined memory model. - Type safety: atomic objects are now separate types from non-atomic ones, so that it's impossible to mix up atomic and non-atomic updates (which is undefined behavior that compilers are starting to take advantage of). - Efficiency: we can specify ordering for operations, avoiding fences and atomic operations on strongly ordered architectures (example: `atomic_write_u32(ptr, val);` involves a CAS loop, whereas `atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store. This diff leaves in the current atomics API (implementing them in terms of the backport). This lets us transition uses over piecemeal. Testing: This is by nature hard to test. I've manually tested the first three options on Linux on gcc by futzing with the #defines manually, on freebsd with gcc and clang, on MSVC, and on OS X with clang. All of these were x86 machines though, and we don't have any test infrastructure set up for non-x86 platforms. --- Makefile.in | 1 - configure.ac | 52 +- include/jemalloc/internal/atomic.h | 111 ++++ include/jemalloc/internal/atomic_c11.h | 97 ++++ include/jemalloc/internal/atomic_externs.h | 12 - include/jemalloc/internal/atomic_gcc_atomic.h | 125 +++++ include/jemalloc/internal/atomic_gcc_sync.h | 191 +++++++ include/jemalloc/internal/atomic_inlines.h | 525 ------------------ include/jemalloc/internal/atomic_msvc.h | 158 ++++++ include/jemalloc/internal/atomic_types.h | 8 - .../jemalloc/internal/jemalloc_internal.h.in | 22 +- .../internal/jemalloc_internal_defs.h.in | 13 +- include/jemalloc/internal/private_symbols.txt | 20 - src/atomic.c | 2 - test/unit/atomic.c | 282 +++++++--- 15 files changed, 947 insertions(+), 672 deletions(-) create mode 100644 include/jemalloc/internal/atomic.h create mode 100644 include/jemalloc/internal/atomic_c11.h delete mode 100644 include/jemalloc/internal/atomic_externs.h create mode 100644 include/jemalloc/internal/atomic_gcc_atomic.h create mode 100644 include/jemalloc/internal/atomic_gcc_sync.h delete mode 100644 include/jemalloc/internal/atomic_inlines.h create mode 100644 include/jemalloc/internal/atomic_msvc.h delete mode 100644 include/jemalloc/internal/atomic_types.h delete mode 100644 src/atomic.c diff --git a/Makefile.in b/Makefile.in index 76a73b76..53ebe32e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -90,7 +90,6 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c \ $(srcroot)src/base.c \ $(srcroot)src/bitmap.c \ $(srcroot)src/ckh.c \ diff --git a/configure.ac b/configure.ac index 1653fe7f..0095caf1 100644 --- a/configure.ac +++ b/configure.ac @@ -550,7 +550,7 @@ case "${host}" in AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - AC_DEFINE([JEMALLOC_C11ATOMICS]) + AC_DEFINE([JEMALLOC_C11_ATOMICS]) force_tls="0" default_munmap="0" ;; @@ -1730,36 +1730,44 @@ JE_COMPILABLE([C11 atomics], [ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; uint64_t r = atomic_fetch_add(a, x) + x; return r == 0; -], [je_cv_c11atomics]) -if test "x${je_cv_c11atomics}" = "xyes" ; then - AC_DEFINE([JEMALLOC_C11ATOMICS]) +], [je_cv_c11_atomics]) +if test "x${je_cv_c11_atomics}" = "xyes" ; then + AC_DEFINE([JEMALLOC_C11_ATOMICS]) fi dnl ============================================================================ -dnl Check for atomic(9) operations as provided on FreeBSD. +dnl Check for GCC-style __atomic atomics. -JE_COMPILABLE([atomic(9)], [ -#include -#include -#include +JE_COMPILABLE([GCC __atomic atomics], [ ], [ - { - uint32_t x32 = 0; - volatile uint32_t *x32p = &x32; - atomic_fetchadd_32(x32p, 1); - } - { - unsigned long xlong = 0; - volatile unsigned long *xlongp = &xlong; - atomic_fetchadd_long(xlongp, 1); - } -], [je_cv_atomic9]) -if test "x${je_cv_atomic9}" = "xyes" ; then - AC_DEFINE([JEMALLOC_ATOMIC9]) + int x = 0; + int val = 1; + int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED); + int after_add = x; + return after_add == 1; +], [je_cv_gcc_atomic_atomics]) +if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then + AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS]) +fi + +dnl ============================================================================ +dnl Check for GCC-style __sync atomics. + +JE_COMPILABLE([GCC __sync atomics], [ +], [ + int x = 0; + int before_add = __sync_fetch_and_add(&x, 1); + int after_add = x; + return (before_add == 0) && (after_add == 1); +], [je_cv_gcc_sync_atomics]) +if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then + AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS]) fi dnl ============================================================================ dnl Check for atomic(3) operations as provided on Darwin. +dnl We need this not for the atomic operations (which are provided above), but +dnl rather for the OSSpinLock type it exposes. JE_COMPILABLE([Darwin OSAtomic*()], [ #include diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h new file mode 100644 index 00000000..84fbbdfb --- /dev/null +++ b/include/jemalloc/internal/atomic.h @@ -0,0 +1,111 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_H + +#define ATOMIC_INLINE static inline + +#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_atomic.h" +#elif defined(JEMALLOC_GCC_SYNC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_sync.h" +#elif defined(_MSC_VER) +# include "jemalloc/internal/atomic_msvc.h" +#elif defined(JEMALLOC_C11_ATOMICS) +# include "jemalloc/internal/atomic_c11.h" +#else +# error "Don't have atomics implemented on this platform." +#endif + +/* + * This header gives more or less a backport of C11 atomics. The user can write + * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate + * counterparts of the C11 atomic functions for type, as so: + * JEMALLOC_GENERATE_ATOMICS(int *, pi, 3); + * and then write things like: + * int *some_ptr; + * atomic_pi_t atomic_ptr_to_int; + * atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED); + * int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL); + * assert(some_ptr == prev_value); + * and expect things to work in the obvious way. + * + * Also included (with naming differences to avoid conflicts with the standard + * library): + * atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence). + * ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT). + */ + +/* + * Pure convenience, so that we don't have to type "atomic_memory_order_" + * quite so often. + */ +#define ATOMIC_RELAXED atomic_memory_order_relaxed +#define ATOMIC_ACQUIRE atomic_memory_order_acquire, +#define ATOMIC_RELEASE atomic_memory_order_release, +#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel, +#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst + +/* + * In order to let us transition atomics usage piecemeal (and reason locally + * about memory orders), we'll support the previous API for a while. + */ +#define JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \ +ATOMIC_INLINE type \ +atomic_read_##short_type(type *p) { \ + return atomic_load_##short_type ((atomic_##short_type##_t *)p, \ + ATOMIC_SEQ_CST); \ +} \ + \ +ATOMIC_INLINE void \ +atomic_write_##short_type(type *p, const type val) { \ + atomic_store_##short_type((atomic_##short_type##_t *)p, \ + (type)val, ATOMIC_SEQ_CST); \ +} \ +ATOMIC_INLINE bool \ +atomic_cas_##short_type(type *p, type c, type s) { \ + /* Note the '!' -- atomic_cas inverts the usual semantics. */ \ + return !atomic_compare_exchange_strong_##short_type( \ + (atomic_##short_type##_t *)p, &c, s, ATOMIC_SEQ_CST, \ + ATOMIC_SEQ_CST); \ +} + +#define JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(type, short_type) \ +JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \ + \ +ATOMIC_INLINE type \ +atomic_add_##short_type(type *p, type x) { \ + return atomic_fetch_add_##short_type( \ + (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) + x; \ +} \ +ATOMIC_INLINE type \ +atomic_sub_##short_type(type *p, type x) { \ + return atomic_fetch_sub_##short_type( \ + (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) - x; \ +} + +JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR) +JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(void *, p) + +/* + * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only + * platform that actually needs to know the size, MSVC. + */ +JEMALLOC_GENERATE_ATOMICS(bool, b, 0) +JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(bool, b) + +JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT) +JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u) + +JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR) +JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu) + +JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2) +JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32) + +# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3) +JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64) +# endif + +#undef ATOMIC_INLINE + +#endif /* JEMALLOC_INTERNAL_ATOMIC_H */ diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h new file mode 100644 index 00000000..a5f9313a --- /dev/null +++ b/include/jemalloc/internal/atomic_c11.h @@ -0,0 +1,97 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H +#define JEMALLOC_INTERNAL_ATOMIC_C11_H + +#include + +#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__) + +#define atomic_memory_order_t memory_order +#define atomic_memory_order_relaxed memory_order_relaxed +#define atomic_memory_order_acquire memory_order_acquire +#define atomic_memory_order_release memory_order_release +#define atomic_memory_order_acq_rel memory_order_acq_rel +#define atomic_memory_order_seq_cst memory_order_seq_cst + +#define atomic_fence atomic_thread_fence + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef _Atomic(type) atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + /* \ + * A strict interpretation of the C standard prevents \ + * atomic_load from taking a const argument, but it's \ + * convenient for our purposes. This cast is a workaround. \ + */ \ + atomic_##short_type##_t* a_nonconst = \ + (atomic_##short_type##_t*)a; \ + return atomic_load_explicit(a_nonconst, mo); \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + atomic_store_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return atomic_exchange_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_weak_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_strong_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} + +/* + * Integral types have some special operations available that non-integral ones + * lack. + */ +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_add_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_sub_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_and_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_or_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_xor_explicit(a, val, mo); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */ diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h deleted file mode 100644 index 09f06408..00000000 --- a/include/jemalloc/internal/atomic_externs.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H -#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H - -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -#define atomic_read_u64(p) atomic_add_u64(p, 0) -#endif -#define atomic_read_u32(p) atomic_add_u32(p, 0) -#define atomic_read_p(p) atomic_add_p(p, NULL) -#define atomic_read_zu(p) atomic_add_zu(p, 0) -#define atomic_read_u(p) atomic_add_u(p, 0) - -#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */ diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h new file mode 100644 index 00000000..3d13b4a6 --- /dev/null +++ b/include/jemalloc/internal/atomic_gcc_atomic.h @@ -0,0 +1,125 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE int +atomic_enum_to_builtin(atomic_memory_order_t mo) { + switch (mo) { + case atomic_memory_order_relaxed: + return __ATOMIC_RELAXED; + case atomic_memory_order_acquire: + return __ATOMIC_ACQUIRE; + case atomic_memory_order_release: + return __ATOMIC_RELEASE; + case atomic_memory_order_acq_rel: + return __ATOMIC_ACQ_REL; + case atomic_memory_order_seq_cst: + return __ATOMIC_SEQ_CST; + } + /* Can't actually happen; the switch is exhaustive. */ + return __ATOMIC_SEQ_CST; +} + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + __atomic_thread_fence(atomic_enum_to_builtin(mo)); +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + __atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_exchange(&a->repr, &val, &result, \ + atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + true, atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + false, \ + atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_add(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_sub(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_and(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_or(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_xor(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */ diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h new file mode 100644 index 00000000..30846e4d --- /dev/null +++ b/include/jemalloc/internal/atomic_gcc_sync.h @@ -0,0 +1,191 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + /* Easy cases first: no barrier, and full barrier. */ + if (mo == atomic_memory_order_relaxed) { + asm volatile("" ::: "memory"); + return; + } + if (mo == atomic_memory_order_seq_cst) { + asm volatile("" ::: "memory"); + __sync_synchronize(); + asm volatile("" ::: "memory"); + return; + } + asm volatile("" ::: "memory"); +# if defined(__i386__) || defined(__x86_64__) + /* This is implicit on x86. */ +# elif defined(__ppc__) + asm volatile("lwsync"); +# elif defined(__sparc__) && defined(__arch64__) + if (mo == atomic_memory_order_acquire) { + asm volatile("membar #LoadLoad | #LoadStore"); + } else if (mo == atomic_memory_order_release) { + asm volatile("membar #LoadStore | #StoreStore"); + } else { + asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); + } +# else + __sync_synchronize(); +# endif + asm volatile("" ::: "memory"); +} + +/* + * A correct implementation of seq_cst loads and stores on weakly ordered + * architectures could do either of the following: + * 1. store() is weak-fence -> store -> strong fence, load() is load -> + * strong-fence. + * 2. store() is strong-fence -> store, load() is strong-fence -> load -> + * weak-fence. + * The tricky thing is, load() and store() above can be the load or store + * portions of a gcc __sync builtin, so we have to follow GCC's lead, which + * means going with strategy 2. + * On strongly ordered architectures, the natural strategy is to stick a strong + * fence after seq_cst stores, and have naked loads. So we want the strong + * fences in different places on different architectures. + * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to + * accomplish this. + */ + +ATOMIC_INLINE void +atomic_pre_sc_load_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_relaxed); +# else + atomic_fence(atomic_memory_order_seq_cst); +# endif +} + +ATOMIC_INLINE void +atomic_post_sc_store_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_seq_cst); +# else + atomic_fence(atomic_memory_order_relaxed); +# endif + +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type volatile repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_pre_sc_load_fence(); \ + } \ + type result = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_post_sc_store_fence(); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + /* \ + * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ + * an atomic exchange builtin. We fake it with a CAS loop. \ + */ \ + while (true) { \ + type old = a->repr; \ + if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ + return old; \ + } \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_add(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_sub(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_and(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_or(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_xor(&a->repr, val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h deleted file mode 100644 index de66d57d..00000000 --- a/include/jemalloc/internal/atomic_inlines.h +++ /dev/null @@ -1,525 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H -#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H - -/* - * All arithmetic functions return the arithmetic result of the atomic - * operation. Some atomic operation APIs return the value prior to mutation, in - * which case the following functions must redundantly compute the result so - * that it can be returned. These functions are normally inlined, so the extra - * operations can be optimized away if the return values aren't used by the - * callers. - * - * atomic_read_( *p) { return *p; } - * atomic_add_( *p, x) { return *p += x; } - * atomic_sub_( *p, x) { return *p -= x; } - * bool atomic_cas_( *p, c, s) - * { - * if (*p != c) - * return true; - * *p = s; - * return false; - * } - * void atomic_write_( *p, x) { *p = x; } - */ - -#ifndef JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_ATOMIC_U64 -uint64_t atomic_add_u64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); -bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); -void atomic_write_u64(uint64_t *p, uint64_t x); -# endif -uint32_t atomic_add_u32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_u32(uint32_t *p, uint32_t x); -bool atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s); -void atomic_write_u32(uint32_t *p, uint32_t x); -void *atomic_add_p(void **p, void *x); -void *atomic_sub_p(void **p, void *x); -bool atomic_cas_p(void **p, void *c, void *s); -void atomic_write_p(void **p, const void *x); -size_t atomic_add_zu(size_t *p, size_t x); -size_t atomic_sub_zu(size_t *p, size_t x); -bool atomic_cas_zu(size_t *p, size_t c, size_t s); -void atomic_write_zu(size_t *p, size_t x); -unsigned atomic_add_u(unsigned *p, unsigned x); -unsigned atomic_sub_u(unsigned *p, unsigned x); -bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); -void atomic_write_u(unsigned *p, unsigned x); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ -#ifdef JEMALLOC_ATOMIC_U64 -# if (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - uint64_t t = x; - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return t + x; -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - uint64_t t; - - x = (uint64_t)(-(int64_t)x); - t = x; - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return t + x; -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - uint8_t success; - - asm volatile ( - "lock; cmpxchgq %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" /* Clobbers. */ - ); - - return !(bool)success; -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - asm volatile ( - "xchgq %1, %0;" /* Lock is implied by xchgq. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return atomic_fetch_add(a, x) + x; -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return atomic_fetch_sub(a, x) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return !atomic_compare_exchange_strong(a, &c, s); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - atomic_store(a, x); -} -# elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)x) + x; -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return !atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - atomic_store_rel_long(p, x); -} -# elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - return OSAtomicAdd64((int64_t)x, (int64_t *)p); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - return OSAtomicAdd64(-((int64_t)x), (int64_t *)p); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - return !OSAtomicCompareAndSwap64(c, s, (int64_t *)p); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - uint64_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_u64(p); - } while (atomic_cas_u64(p, o, x)); -} -# elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - return InterlockedExchangeAdd64(p, x) + x; -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - return InterlockedExchangeAdd64(p, -((int64_t)x)) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - uint64_t o; - - o = InterlockedCompareExchange64(p, s, c); - return o != c; -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - InterlockedExchange64(p, x); -} -# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -JEMALLOC_INLINE uint64_t -atomic_add_u64(uint64_t *p, uint64_t x) { - return __sync_add_and_fetch(p, x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_u64(uint64_t *p, uint64_t x) { - return __sync_sub_and_fetch(p, x); -} - -JEMALLOC_INLINE bool -atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) { - return !__sync_bool_compare_and_swap(p, c, s); -} - -JEMALLOC_INLINE void -atomic_write_u64(uint64_t *p, uint64_t x) { - __sync_lock_test_and_set(p, x); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif - -/******************************************************************************/ -/* 32-bit operations. */ -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - uint32_t t = x; - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return t + x; -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - uint32_t t; - - x = (uint32_t)(-(int32_t)x); - t = x; - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return t + x; -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - uint8_t success; - - asm volatile ( - "lock; cmpxchgl %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" - ); - - return !(bool)success; -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - asm volatile ( - "xchgl %1, %0;" /* Lock is implied by xchgl. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return atomic_fetch_add(a, x) + x; -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return atomic_fetch_sub(a, x) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return !atomic_compare_exchange_strong(a, &c, s); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - atomic_store(a, x); -} -#elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - return atomic_fetchadd_32(p, x) + x; -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - return !atomic_cmpset_32(p, c, s); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - atomic_store_rel_32(p, x); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - return OSAtomicAdd32((int32_t)x, (int32_t *)p); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - return OSAtomicAdd32(-((int32_t)x), (int32_t *)p); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - return !OSAtomicCompareAndSwap32(c, s, (int32_t *)p); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - uint32_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_u32(p); - } while (atomic_cas_u32(p, o, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - return InterlockedExchangeAdd(p, x) + x; -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - return InterlockedExchangeAdd(p, -((int32_t)x)) - x; -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - uint32_t o; - - o = InterlockedCompareExchange(p, s, c); - return o != c; -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - InterlockedExchange(p, x); -} -#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -JEMALLOC_INLINE uint32_t -atomic_add_u32(uint32_t *p, uint32_t x) { - return __sync_add_and_fetch(p, x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_u32(uint32_t *p, uint32_t x) { - return __sync_sub_and_fetch(p, x); -} - -JEMALLOC_INLINE bool -atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) { - return !__sync_bool_compare_and_swap(p, c, s); -} - -JEMALLOC_INLINE void -atomic_write_u32(uint32_t *p, uint32_t x) { - __sync_lock_test_and_set(p, x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* Pointer operations. */ -JEMALLOC_INLINE void * -atomic_add_p(void **p, void *x) { -#if (LG_SIZEOF_PTR == 3) - return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -JEMALLOC_INLINE void * -atomic_sub_p(void **p, void *x) { -#if (LG_SIZEOF_PTR == 3) - return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_p(void **p, void *c, void *s) { -#if (LG_SIZEOF_PTR == 3) - return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s); -#elif (LG_SIZEOF_PTR == 2) - return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s); -#endif -} - -JEMALLOC_INLINE void -atomic_write_p(void **p, const void *x) { -#if (LG_SIZEOF_PTR == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* size_t operations. */ -JEMALLOC_INLINE size_t -atomic_add_zu(size_t *p, size_t x) { -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -JEMALLOC_INLINE size_t -atomic_sub_zu(size_t *p, size_t x) { -#if (LG_SIZEOF_PTR == 3) - return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_zu(size_t *p, size_t c, size_t s) { -#if (LG_SIZEOF_PTR == 3) - return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s); -#elif (LG_SIZEOF_PTR == 2) - return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s); -#endif -} - -JEMALLOC_INLINE void -atomic_write_zu(size_t *p, size_t x) { -#if (LG_SIZEOF_PTR == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -JEMALLOC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) { -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -JEMALLOC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) { -#if (LG_SIZEOF_INT == 3) - return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_u(unsigned *p, unsigned c, unsigned s) { -#if (LG_SIZEOF_INT == 3) - return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s); -#elif (LG_SIZEOF_INT == 2) - return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s); -#endif -} - -JEMALLOC_INLINE void -atomic_write_u(unsigned *p, unsigned x) { -#if (LG_SIZEOF_INT == 3) - atomic_write_u64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - atomic_write_u32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -#endif -#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */ diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h new file mode 100644 index 00000000..67057ce5 --- /dev/null +++ b/include/jemalloc/internal/atomic_msvc.h @@ -0,0 +1,158 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H +#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +typedef char atomic_repr_0_t; +typedef short atomic_repr_1_t; +typedef long atomic_repr_2_t; +typedef __int64 atomic_repr_3_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + _ReadWriteBarrier(); +# if defined(_M_ARM) || defined(_M_ARM64) + /* ARM needs a barrier for everything but relaxed. */ + if (mo != atomic_memory_order_relaxed) { + MemoryBarrier(); + } +# elif defined(_M_IX86) || defined (_M_X64) + /* x86 needs a barrier only for seq_cst. */ + if (mo == atomic_memory_order_seq_cst) { + MemoryBarrier(); + } +# else +# error "Don't know how to create atomics for this platform for MSVC." +# endif + _ReadWriteBarrier(); +} + +#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t + +#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b) +#define ATOMIC_RAW_CONCAT(a, b) a ## b + +#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \ + base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size)) + +#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \ + ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size) + +#define ATOMIC_INTERLOCKED_SUFFIX_0 8 +#define ATOMIC_INTERLOCKED_SUFFIX_1 16 +#define ATOMIC_INTERLOCKED_SUFFIX_2 +#define ATOMIC_INTERLOCKED_SUFFIX_3 64 + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ +typedef struct { \ + ATOMIC_INTERLOCKED_REPR(lg_size) repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return (type) ret; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_fence(atomic_memory_order_seq_cst); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) e = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \ + ATOMIC_INTERLOCKED_REPR(lg_size) d = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))desired; \ + ATOMIC_INTERLOCKED_REPR(lg_size) old = \ + ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \ + lg_size)(&a->repr, d, e); \ + if (old == e) { \ + return true; \ + } else { \ + *expected = (type)old; \ + return false; \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + /* We implement the weak version with strong semantics. */ \ + return atomic_compare_exchange_weak_##short_type(a, expected, \ + desired, success_mo, failure_mo); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + /* \ + * MSVC warns on negation of unsigned operands, but for us it \ + * gives exactly the right semantics (MAX_TYPE + 1 - operand). \ + */ \ + __pragma(warning(push)) \ + __pragma(warning(disable: 4146)) \ + return atomic_fetch_add_##short_type(a, -val, mo); \ + __pragma(warning(pop)) \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */ diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h deleted file mode 100644 index 0fd5e5b5..00000000 --- a/include/jemalloc/internal/atomic_types.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H -#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H - -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# define JEMALLOC_ATOMIC_U64 -#endif - -#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 0d0440b5..f18acabb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -146,14 +146,6 @@ static const bool have_thp = #endif ; -#if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus) -#include -#endif - -#ifdef JEMALLOC_ATOMIC9 -#include -#endif - #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif @@ -199,10 +191,21 @@ static const bool have_thp = * its translation unit). Each component is now broken up into multiple header * files, corresponding to the sections above (e.g. instead of "tsd.h", we now * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h"). + * + * Those files which have been converted to explicitly include their + * inter-component dependencies are now in the initial HERMETIC HEADERS + * section. These headers may still rely on this file for system headers and + * global jemalloc headers, however. */ #include "jemalloc/internal/jemalloc_internal_macros.h" +/******************************************************************************/ +/* HERMETIC HEADERS */ +/******************************************************************************/ + +#include "jemalloc/internal/atomic.h" + /******************************************************************************/ /* TYPES */ /******************************************************************************/ @@ -380,7 +383,6 @@ typedef unsigned szind_t; #include "jemalloc/internal/nstime_types.h" #include "jemalloc/internal/util_types.h" -#include "jemalloc/internal/atomic_types.h" #include "jemalloc/internal/spin_types.h" #include "jemalloc/internal/prng_types.h" #include "jemalloc/internal/ticker_types.h" @@ -489,7 +491,6 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/nstime_externs.h" #include "jemalloc/internal/util_externs.h" -#include "jemalloc/internal/atomic_externs.h" #include "jemalloc/internal/ckh_externs.h" #include "jemalloc/internal/stats_externs.h" #include "jemalloc/internal/ctl_externs.h" @@ -513,7 +514,6 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #include "jemalloc/internal/util_inlines.h" -#include "jemalloc/internal/atomic_inlines.h" #include "jemalloc/internal/spin_inlines.h" #include "jemalloc/internal/prng_inlines.h" #include "jemalloc/internal/ticker_inlines.h" diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 6c70e167..b2e0077e 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -30,16 +30,13 @@ #undef LG_VADDR /* Defined if C11 atomics are available. */ -#undef JEMALLOC_C11ATOMICS +#undef JEMALLOC_C11_ATOMICS -/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ -#undef JEMALLOC_ATOMIC9 +/* Defined if GCC __atomic atomics are available. */ +#undef JEMALLOC_GCC_ATOMIC_ATOMICS -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -#undef JEMALLOC_OSATOMIC +/* Defined if GCC __sync atomics are available. */ +#undef JEMALLOC_GCC_SYNC_ATOMICS /* * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 0234181e..b122dae6 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -72,26 +72,6 @@ arena_tdata_get arena_tdata_get_hard arenas arenas_tdata_cleanup -atomic_add_p -atomic_add_u -atomic_add_u32 -atomic_add_u64 -atomic_add_zu -atomic_cas_p -atomic_cas_u -atomic_cas_u32 -atomic_cas_u64 -atomic_cas_zu -atomic_sub_p -atomic_sub_u -atomic_sub_u32 -atomic_sub_u64 -atomic_sub_zu -atomic_write_p -atomic_write_u -atomic_write_u32 -atomic_write_u64 -atomic_write_zu b0get base_alloc base_boot diff --git a/src/atomic.c b/src/atomic.c deleted file mode 100644 index 9871390d..00000000 --- a/src/atomic.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_ATOMIC_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/test/unit/atomic.c b/test/unit/atomic.c index 78661597..237c7474 100644 --- a/test/unit/atomic.c +++ b/test/unit/atomic.c @@ -1,101 +1,257 @@ #include "test/jemalloc_test.h" -#define TEST_STRUCT(p, t) \ -struct p##_test_s { \ - t accum0; \ - t x; \ - t s; \ -}; \ -typedef struct p##_test_s p##_test_t; +/* + * We *almost* have consistent short names (e.g. "u32" for uint32_t, "b" for + * bool, etc. The one exception is that the short name for void * is "p" in + * some places and "ptr" in others. In the long run it would be nice to unify + * these, but in the short run we'll use this shim. + */ +#define assert_p_eq assert_ptr_eq -#define TEST_BODY(p, t, tc, ta, FMT) do { \ - const p##_test_t tests[] = { \ - {(t)-1, (t)-1, (t)-2}, \ - {(t)-1, (t) 0, (t)-2}, \ - {(t)-1, (t) 1, (t)-2}, \ +/* + * t: the non-atomic type, like "uint32_t". + * ta: the short name for the type, like "u32". + * val[1,2,3]: Values of the given type. The CAS tests use val2 for expected, + * and val3 for desired. + */ + +#define DO_TESTS(t, ta, val1, val2, val3) do { \ + t val; \ + t raw_atomic; \ + t expected; \ + bool success; \ + /* This (along with the load below) also tests ATOMIC_LOAD. */ \ + atomic_##ta##_t atom = ATOMIC_INIT(val1); \ \ - {(t) 0, (t)-1, (t)-2}, \ - {(t) 0, (t) 0, (t)-2}, \ - {(t) 0, (t) 1, (t)-2}, \ + /* ATOMIC_INIT and load. */ \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, "Load or init failed"); \ \ - {(t) 1, (t)-1, (t)-2}, \ - {(t) 1, (t) 0, (t)-2}, \ - {(t) 1, (t) 1, (t)-2}, \ + /* Store. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + atomic_store_##ta(&atom, val2, ATOMIC_RELAXED); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val2, val, "Store failed"); \ \ - {(t)0, (t)-(1 << 22), (t)-2}, \ - {(t)0, (t)(1 << 22), (t)-2}, \ - {(t)(1 << 22), (t)-(1 << 22), (t)-2}, \ - {(t)(1 << 22), (t)(1 << 22), (t)-2} \ - }; \ - unsigned i; \ + /* Exchange. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, "Exchange returned invalid value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val2, val, "Exchange store invalid value"); \ \ - for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) { \ - bool err; \ - t accum = tests[i].accum0; \ - assert_##ta##_eq(atomic_read_##p(&accum), \ - tests[i].accum0, \ - "Erroneous read, i=%u", i); \ + /* \ + * Weak CAS. Spurious failures are allowed, so we loop a few \ + * times. \ + */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + success = false; \ + for (int i = 0; i < 10 && !success; i++) { \ + expected = val2; \ + success = atomic_compare_exchange_weak_##ta(&atom, \ + &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, expected, \ + "CAS should update expected"); \ + } \ + assert_b_eq(val1 == val2, success, \ + "Weak CAS did the wrong state update"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + if (success) { \ + assert_##ta##_eq(val3, val, \ + "Successful CAS should update atomic"); \ + } else { \ + assert_##ta##_eq(val1, val, \ + "Unsuccessful CAS should not update atomic"); \ + } \ \ - assert_##ta##_eq(atomic_add_##p(&accum, tests[i].x), \ - (t)((tc)tests[i].accum0 + (tc)tests[i].x), \ - "i=%u, accum=%"FMT", x=%"FMT, \ - i, tests[i].accum0, tests[i].x); \ - assert_##ta##_eq(atomic_read_##p(&accum), accum, \ - "Erroneous add, i=%u", i); \ + /* Strong CAS. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + expected = val2; \ + success = atomic_compare_exchange_strong_##ta(&atom, &expected, \ + val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \ + assert_b_eq(val1 == val2, success, \ + "Strong CAS did the wrong state update"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + if (success) { \ + assert_##ta##_eq(val3, val, \ + "Successful CAS should update atomic"); \ + } else { \ + assert_##ta##_eq(val1, val, \ + "Unsuccessful CAS should not update atomic"); \ + } \ \ - accum = tests[i].accum0; \ - assert_##ta##_eq(atomic_sub_##p(&accum, tests[i].x), \ - (t)((tc)tests[i].accum0 - (tc)tests[i].x), \ - "i=%u, accum=%"FMT", x=%"FMT, \ - i, tests[i].accum0, tests[i].x); \ - assert_##ta##_eq(atomic_read_##p(&accum), accum, \ - "Erroneous sub, i=%u", i); \ \ - accum = tests[i].accum0; \ - err = atomic_cas_##p(&accum, tests[i].x, tests[i].s); \ - assert_b_eq(err, tests[i].accum0 != tests[i].x, \ - "Erroneous cas success/failure result"); \ - assert_##ta##_eq(accum, err ? tests[i].accum0 : \ - tests[i].s, "Erroneous cas effect, i=%u", i); \ + /* Previous atomics API. */ \ \ - accum = tests[i].accum0; \ - atomic_write_##p(&accum, tests[i].s); \ - assert_##ta##_eq(accum, tests[i].s, \ - "Erroneous write, i=%u", i); \ + /* Read. */ \ + raw_atomic = val1; \ + val = atomic_read_##ta(&raw_atomic); \ + assert_##ta##_eq(val1, val, "Read failed"); \ + \ + /* Write. */ \ + raw_atomic = val1; \ + atomic_write_##ta(&raw_atomic, val2); \ + assert_##ta##_eq(val2, raw_atomic, "Write failed"); \ + \ + /* CAS. */ \ + raw_atomic = val1; \ + success = !atomic_cas_##ta(&raw_atomic, val2, val3); \ + assert_b_eq(val1 == val2, success, \ + "CAS did the wrong state update"); \ + val = raw_atomic; \ + if (success) { \ + assert_##ta##_eq(val3, val, \ + "Successful CAS should update atomic"); \ + } else { \ + assert_##ta##_eq(val1, val, \ + "Unsuccessful CAS should not update atomic"); \ } \ } while (0) -TEST_STRUCT(u64, uint64_t) +#define DO_INTEGER_TESTS(t, ta, val1, val2) do { \ + atomic_##ta##_t atom; \ + t val; \ + t raw_atomic; \ + \ + /* Fetch-add. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, \ + "Fetch-add should return previous value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1 + val2, val, \ + "Fetch-add should update atomic"); \ + \ + /* Fetch-sub. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, \ + "Fetch-sub should return previous value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1 - val2, val, \ + "Fetch-sub should update atomic"); \ + \ + /* Fetch-and. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, \ + "Fetch-and should return previous value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1 & val2, val, \ + "Fetch-and should update atomic"); \ + \ + /* Fetch-or. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, \ + "Fetch-or should return previous value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1 | val2, val, \ + "Fetch-or should update atomic"); \ + \ + /* Fetch-xor. */ \ + atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \ + val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1, val, \ + "Fetch-xor should return previous value"); \ + val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \ + assert_##ta##_eq(val1 ^ val2, val, \ + "Fetch-xor should update atomic"); \ + \ + /* Previous atomics API. */ \ + \ + /* Add. */ \ + raw_atomic = val1; \ + val = atomic_add_##ta(&raw_atomic, val2); \ + assert_##ta##_eq(val1 + val2, val, \ + "atomic_add should return new value"); \ + assert_##ta##_eq(val1 + val2, raw_atomic, \ + "atomic_add should update atomic"); \ + \ + /* Sub. */ \ + raw_atomic = val1; \ + val = atomic_sub_##ta(&raw_atomic, val2); \ + assert_##ta##_eq(val1 - val2, val, \ + "atomic_sub should return new value"); \ + assert_##ta##_eq(val1 - val2, raw_atomic, \ + "atomic_add should update atomic"); \ +} while (0) + +#define TEST_STRUCT(t, ta) \ +typedef struct { \ + t val1; \ + t val2; \ + t val3; \ +} ta##_test_t; + +#define TEST_CASES(t) { \ + {(t)-1, (t)-1, (t)-2}, \ + {(t)-1, (t) 0, (t)-2}, \ + {(t)-1, (t) 1, (t)-2}, \ + \ + {(t) 0, (t)-1, (t)-2}, \ + {(t) 0, (t) 0, (t)-2}, \ + {(t) 0, (t) 1, (t)-2}, \ + \ + {(t) 1, (t)-1, (t)-2}, \ + {(t) 1, (t) 0, (t)-2}, \ + {(t) 1, (t) 1, (t)-2}, \ + \ + {(t)0, (t)-(1 << 22), (t)-2}, \ + {(t)0, (t)(1 << 22), (t)-2}, \ + {(t)(1 << 22), (t)-(1 << 22), (t)-2}, \ + {(t)(1 << 22), (t)(1 << 22), (t)-2} \ +} + +#define TEST_BODY(t, ta) do { \ + const ta##_test_t tests[] = TEST_CASES(t); \ + for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \ + ta##_test_t test = tests[i]; \ + DO_TESTS(t, ta, test.val1, test.val2, test.val3); \ + } \ +} while (0) + +#define INTEGER_TEST_BODY(t, ta) do { \ + const ta##_test_t tests[] = TEST_CASES(t); \ + for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \ + ta##_test_t test = tests[i]; \ + DO_TESTS(t, ta, test.val1, test.val2, test.val3); \ + DO_INTEGER_TESTS(t, ta, test.val1, test.val2); \ + } \ +} while (0) + +TEST_STRUCT(uint64_t, u64); TEST_BEGIN(test_atomic_u64) { #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) test_skip("64-bit atomic operations not supported"); #else - TEST_BODY(u64, uint64_t, uint64_t, u64, FMTx64); + INTEGER_TEST_BODY(uint64_t, u64); #endif } TEST_END -TEST_STRUCT(u32, uint32_t) + +TEST_STRUCT(uint32_t, u32); TEST_BEGIN(test_atomic_u32) { - TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32); + INTEGER_TEST_BODY(uint32_t, u32); } TEST_END -TEST_STRUCT(p, void *) +TEST_STRUCT(void *, p); TEST_BEGIN(test_atomic_p) { - TEST_BODY(p, void *, uintptr_t, ptr, "p"); + TEST_BODY(void *, p); } TEST_END -TEST_STRUCT(zu, size_t) +TEST_STRUCT(size_t, zu); TEST_BEGIN(test_atomic_zu) { - TEST_BODY(zu, size_t, size_t, zu, "#zx"); + INTEGER_TEST_BODY(size_t, zu); } TEST_END -TEST_STRUCT(u, unsigned) +TEST_STRUCT(unsigned, u); TEST_BEGIN(test_atomic_u) { - TEST_BODY(u, unsigned, unsigned, u, "#x"); + INTEGER_TEST_BODY(unsigned, u); } TEST_END