Introduce a backport of C11 atomics

This introduces a backport of C11 atomics.  It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>

The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
  that it's impossible to mix up atomic and non-atomic updates (which is
  undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
  atomic operations on strongly ordered architectures (example:
  `atomic_write_u32(ptr, val);` involves a CAS loop, whereas
  `atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.

This diff leaves in the current atomics API (implementing them in terms of the
backport).  This lets us transition uses over piecemeal.

Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang.  All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
This commit is contained in:
David Goldblatt 2017-01-25 09:54:27 -08:00 committed by David Goldblatt
parent 957b8c5f21
commit d4ac7582f3
15 changed files with 947 additions and 672 deletions

View File

@ -90,7 +90,6 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je
C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
C_SRCS := $(srcroot)src/jemalloc.c \ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/arena.c \ $(srcroot)src/arena.c \
$(srcroot)src/atomic.c \
$(srcroot)src/base.c \ $(srcroot)src/base.c \
$(srcroot)src/bitmap.c \ $(srcroot)src/bitmap.c \
$(srcroot)src/ckh.c \ $(srcroot)src/ckh.c \

View File

@ -550,7 +550,7 @@ case "${host}" in
AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ]) AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
AC_DEFINE([JEMALLOC_C11ATOMICS]) AC_DEFINE([JEMALLOC_C11_ATOMICS])
force_tls="0" force_tls="0"
default_munmap="0" default_munmap="0"
;; ;;
@ -1730,36 +1730,44 @@ JE_COMPILABLE([C11 atomics], [
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
uint64_t r = atomic_fetch_add(a, x) + x; uint64_t r = atomic_fetch_add(a, x) + x;
return r == 0; return r == 0;
], [je_cv_c11atomics]) ], [je_cv_c11_atomics])
if test "x${je_cv_c11atomics}" = "xyes" ; then if test "x${je_cv_c11_atomics}" = "xyes" ; then
AC_DEFINE([JEMALLOC_C11ATOMICS]) AC_DEFINE([JEMALLOC_C11_ATOMICS])
fi fi
dnl ============================================================================ dnl ============================================================================
dnl Check for atomic(9) operations as provided on FreeBSD. dnl Check for GCC-style __atomic atomics.
JE_COMPILABLE([atomic(9)], [ JE_COMPILABLE([GCC __atomic atomics], [
#include <sys/types.h>
#include <machine/atomic.h>
#include <inttypes.h>
], [ ], [
{ int x = 0;
uint32_t x32 = 0; int val = 1;
volatile uint32_t *x32p = &x32; int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED);
atomic_fetchadd_32(x32p, 1); int after_add = x;
} return after_add == 1;
{ ], [je_cv_gcc_atomic_atomics])
unsigned long xlong = 0; if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
volatile unsigned long *xlongp = &xlong; AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
atomic_fetchadd_long(xlongp, 1); fi
}
], [je_cv_atomic9]) dnl ============================================================================
if test "x${je_cv_atomic9}" = "xyes" ; then dnl Check for GCC-style __sync atomics.
AC_DEFINE([JEMALLOC_ATOMIC9])
JE_COMPILABLE([GCC __sync atomics], [
], [
int x = 0;
int before_add = __sync_fetch_and_add(&x, 1);
int after_add = x;
return (before_add == 0) && (after_add == 1);
], [je_cv_gcc_sync_atomics])
if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
fi fi
dnl ============================================================================ dnl ============================================================================
dnl Check for atomic(3) operations as provided on Darwin. dnl Check for atomic(3) operations as provided on Darwin.
dnl We need this not for the atomic operations (which are provided above), but
dnl rather for the OSSpinLock type it exposes.
JE_COMPILABLE([Darwin OSAtomic*()], [ JE_COMPILABLE([Darwin OSAtomic*()], [
#include <libkern/OSAtomic.h> #include <libkern/OSAtomic.h>

View File

@ -0,0 +1,111 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_H
#define JEMALLOC_INTERNAL_ATOMIC_H
#define ATOMIC_INLINE static inline
#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_atomic.h"
#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_sync.h"
#elif defined(_MSC_VER)
# include "jemalloc/internal/atomic_msvc.h"
#elif defined(JEMALLOC_C11_ATOMICS)
# include "jemalloc/internal/atomic_c11.h"
#else
# error "Don't have atomics implemented on this platform."
#endif
/*
* This header gives more or less a backport of C11 atomics. The user can write
* JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
* counterparts of the C11 atomic functions for type, as so:
* JEMALLOC_GENERATE_ATOMICS(int *, pi, 3);
* and then write things like:
* int *some_ptr;
* atomic_pi_t atomic_ptr_to_int;
* atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED);
* int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL);
* assert(some_ptr == prev_value);
* and expect things to work in the obvious way.
*
* Also included (with naming differences to avoid conflicts with the standard
* library):
* atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence).
* ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT).
*/
/*
* Pure convenience, so that we don't have to type "atomic_memory_order_"
* quite so often.
*/
#define ATOMIC_RELAXED atomic_memory_order_relaxed
#define ATOMIC_ACQUIRE atomic_memory_order_acquire,
#define ATOMIC_RELEASE atomic_memory_order_release,
#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel,
#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
/*
* In order to let us transition atomics usage piecemeal (and reason locally
* about memory orders), we'll support the previous API for a while.
*/
#define JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \
ATOMIC_INLINE type \
atomic_read_##short_type(type *p) { \
return atomic_load_##short_type ((atomic_##short_type##_t *)p, \
ATOMIC_SEQ_CST); \
} \
\
ATOMIC_INLINE void \
atomic_write_##short_type(type *p, const type val) { \
atomic_store_##short_type((atomic_##short_type##_t *)p, \
(type)val, ATOMIC_SEQ_CST); \
} \
ATOMIC_INLINE bool \
atomic_cas_##short_type(type *p, type c, type s) { \
/* Note the '!' -- atomic_cas inverts the usual semantics. */ \
return !atomic_compare_exchange_strong_##short_type( \
(atomic_##short_type##_t *)p, &c, s, ATOMIC_SEQ_CST, \
ATOMIC_SEQ_CST); \
}
#define JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(type, short_type) \
JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \
\
ATOMIC_INLINE type \
atomic_add_##short_type(type *p, type x) { \
return atomic_fetch_add_##short_type( \
(atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) + x; \
} \
ATOMIC_INLINE type \
atomic_sub_##short_type(type *p, type x) { \
return atomic_fetch_sub_##short_type( \
(atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) - x; \
}
JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(void *, p)
/*
* There's no actual guarantee that sizeof(bool) == 1, but it's true on the only
* platform that actually needs to know the size, MSVC.
*/
JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(bool, b)
JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u)
JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu)
JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64)
# endif
#undef ATOMIC_INLINE
#endif /* JEMALLOC_INTERNAL_ATOMIC_H */

View File

@ -0,0 +1,97 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
#define JEMALLOC_INTERNAL_ATOMIC_C11_H
#include <stdatomic.h>
#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
#define atomic_memory_order_t memory_order
#define atomic_memory_order_relaxed memory_order_relaxed
#define atomic_memory_order_acquire memory_order_acquire
#define atomic_memory_order_release memory_order_release
#define atomic_memory_order_acq_rel memory_order_acq_rel
#define atomic_memory_order_seq_cst memory_order_seq_cst
#define atomic_fence atomic_thread_fence
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
/* unused */ lg_size) \
typedef _Atomic(type) atomic_##short_type##_t; \
\
ATOMIC_INLINE type \
atomic_load_##short_type(const atomic_##short_type##_t *a, \
atomic_memory_order_t mo) { \
/* \
* A strict interpretation of the C standard prevents \
* atomic_load from taking a const argument, but it's \
* convenient for our purposes. This cast is a workaround. \
*/ \
atomic_##short_type##_t* a_nonconst = \
(atomic_##short_type##_t*)a; \
return atomic_load_explicit(a_nonconst, mo); \
} \
\
ATOMIC_INLINE void \
atomic_store_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
atomic_store_explicit(a, val, mo); \
} \
\
ATOMIC_INLINE type \
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return atomic_exchange_explicit(a, val, mo); \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return atomic_compare_exchange_weak_explicit(a, expected, \
desired, success_mo, failure_mo); \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return atomic_compare_exchange_strong_explicit(a, expected, \
desired, success_mo, failure_mo); \
}
/*
* Integral types have some special operations available that non-integral ones
* lack.
*/
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
/* unused */ lg_size) \
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
\
ATOMIC_INLINE type \
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return atomic_fetch_add_explicit(a, val, mo); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return atomic_fetch_sub_explicit(a, val, mo); \
} \
ATOMIC_INLINE type \
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return atomic_fetch_and_explicit(a, val, mo); \
} \
ATOMIC_INLINE type \
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return atomic_fetch_or_explicit(a, val, mo); \
} \
ATOMIC_INLINE type \
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return atomic_fetch_xor_explicit(a, val, mo); \
}
#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */

View File

@ -1,12 +0,0 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
#define atomic_read_u64(p) atomic_add_u64(p, 0)
#endif
#define atomic_read_u32(p) atomic_add_u32(p, 0)
#define atomic_read_p(p) atomic_add_p(p, NULL)
#define atomic_read_zu(p) atomic_add_zu(p, 0)
#define atomic_read_u(p) atomic_add_u(p, 0)
#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */

View File

@ -0,0 +1,125 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
#define ATOMIC_INIT(...) {__VA_ARGS__}
typedef enum {
atomic_memory_order_relaxed,
atomic_memory_order_acquire,
atomic_memory_order_release,
atomic_memory_order_acq_rel,
atomic_memory_order_seq_cst
} atomic_memory_order_t;
ATOMIC_INLINE int
atomic_enum_to_builtin(atomic_memory_order_t mo) {
switch (mo) {
case atomic_memory_order_relaxed:
return __ATOMIC_RELAXED;
case atomic_memory_order_acquire:
return __ATOMIC_ACQUIRE;
case atomic_memory_order_release:
return __ATOMIC_RELEASE;
case atomic_memory_order_acq_rel:
return __ATOMIC_ACQ_REL;
case atomic_memory_order_seq_cst:
return __ATOMIC_SEQ_CST;
}
/* Can't actually happen; the switch is exhaustive. */
return __ATOMIC_SEQ_CST;
}
ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo) {
__atomic_thread_fence(atomic_enum_to_builtin(mo));
}
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
/* unused */ lg_size) \
typedef struct { \
type repr; \
} atomic_##short_type##_t; \
\
ATOMIC_INLINE type \
atomic_load_##short_type(const atomic_##short_type##_t *a, \
atomic_memory_order_t mo) { \
type result; \
__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \
return result; \
} \
\
ATOMIC_INLINE void \
atomic_store_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \
} \
\
ATOMIC_INLINE type \
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
type result; \
__atomic_exchange(&a->repr, &val, &result, \
atomic_enum_to_builtin(mo)); \
return result; \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
true, atomic_enum_to_builtin(success_mo), \
atomic_enum_to_builtin(failure_mo)); \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
false, \
atomic_enum_to_builtin(success_mo), \
atomic_enum_to_builtin(failure_mo)); \
}
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
/* unused */ lg_size) \
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
\
ATOMIC_INLINE type \
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __atomic_fetch_add(&a->repr, val, \
atomic_enum_to_builtin(mo)); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __atomic_fetch_sub(&a->repr, val, \
atomic_enum_to_builtin(mo)); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __atomic_fetch_and(&a->repr, val, \
atomic_enum_to_builtin(mo)); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __atomic_fetch_or(&a->repr, val, \
atomic_enum_to_builtin(mo)); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __atomic_fetch_xor(&a->repr, val, \
atomic_enum_to_builtin(mo)); \
}
#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */

View File

@ -0,0 +1,191 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
#define ATOMIC_INIT(...) {__VA_ARGS__}
typedef enum {
atomic_memory_order_relaxed,
atomic_memory_order_acquire,
atomic_memory_order_release,
atomic_memory_order_acq_rel,
atomic_memory_order_seq_cst
} atomic_memory_order_t;
ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo) {
/* Easy cases first: no barrier, and full barrier. */
if (mo == atomic_memory_order_relaxed) {
asm volatile("" ::: "memory");
return;
}
if (mo == atomic_memory_order_seq_cst) {
asm volatile("" ::: "memory");
__sync_synchronize();
asm volatile("" ::: "memory");
return;
}
asm volatile("" ::: "memory");
# if defined(__i386__) || defined(__x86_64__)
/* This is implicit on x86. */
# elif defined(__ppc__)
asm volatile("lwsync");
# elif defined(__sparc__) && defined(__arch64__)
if (mo == atomic_memory_order_acquire) {
asm volatile("membar #LoadLoad | #LoadStore");
} else if (mo == atomic_memory_order_release) {
asm volatile("membar #LoadStore | #StoreStore");
} else {
asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
}
# else
__sync_synchronize();
# endif
asm volatile("" ::: "memory");
}
/*
* A correct implementation of seq_cst loads and stores on weakly ordered
* architectures could do either of the following:
* 1. store() is weak-fence -> store -> strong fence, load() is load ->
* strong-fence.
* 2. store() is strong-fence -> store, load() is strong-fence -> load ->
* weak-fence.
* The tricky thing is, load() and store() above can be the load or store
* portions of a gcc __sync builtin, so we have to follow GCC's lead, which
* means going with strategy 2.
* On strongly ordered architectures, the natural strategy is to stick a strong
* fence after seq_cst stores, and have naked loads. So we want the strong
* fences in different places on different architectures.
* atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
* accomplish this.
*/
ATOMIC_INLINE void
atomic_pre_sc_load_fence() {
# if defined(__i386__) || defined(__x86_64__) || \
(defined(__sparc__) && defined(__arch64__))
atomic_fence(atomic_memory_order_relaxed);
# else
atomic_fence(atomic_memory_order_seq_cst);
# endif
}
ATOMIC_INLINE void
atomic_post_sc_store_fence() {
# if defined(__i386__) || defined(__x86_64__) || \
(defined(__sparc__) && defined(__arch64__))
atomic_fence(atomic_memory_order_seq_cst);
# else
atomic_fence(atomic_memory_order_relaxed);
# endif
}
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
/* unused */ lg_size) \
typedef struct { \
type volatile repr; \
} atomic_##short_type##_t; \
\
ATOMIC_INLINE type \
atomic_load_##short_type(const atomic_##short_type##_t *a, \
atomic_memory_order_t mo) { \
if (mo == atomic_memory_order_seq_cst) { \
atomic_pre_sc_load_fence(); \
} \
type result = a->repr; \
if (mo != atomic_memory_order_relaxed) { \
atomic_fence(atomic_memory_order_acquire); \
} \
return result; \
} \
\
ATOMIC_INLINE void \
atomic_store_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
if (mo != atomic_memory_order_relaxed) { \
atomic_fence(atomic_memory_order_release); \
} \
a->repr = val; \
if (mo == atomic_memory_order_seq_cst) { \
atomic_post_sc_store_fence(); \
} \
} \
\
ATOMIC_INLINE type \
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
/* \
* Because of FreeBSD, we care about gcc 4.2, which doesn't have\
* an atomic exchange builtin. We fake it with a CAS loop. \
*/ \
while (true) { \
type old = a->repr; \
if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
return old; \
} \
} \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \
if (prev == *expected) { \
return true; \
} else { \
*expected = prev; \
return false; \
} \
} \
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \
if (prev == *expected) { \
return true; \
} else { \
*expected = prev; \
return false; \
} \
}
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
/* unused */ lg_size) \
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
\
ATOMIC_INLINE type \
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __sync_fetch_and_add(&a->repr, val); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __sync_fetch_and_sub(&a->repr, val); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __sync_fetch_and_and(&a->repr, val); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __sync_fetch_and_or(&a->repr, val); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return __sync_fetch_and_xor(&a->repr, val); \
}
#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */

View File

@ -1,525 +0,0 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H
#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H
/*
* All arithmetic functions return the arithmetic result of the atomic
* operation. Some atomic operation APIs return the value prior to mutation, in
* which case the following functions must redundantly compute the result so
* that it can be returned. These functions are normally inlined, so the extra
* operations can be optimized away if the return values aren't used by the
* callers.
*
* <t> atomic_read_<t>(<t> *p) { return *p; }
* <t> atomic_add_<t>(<t> *p, <t> x) { return *p += x; }
* <t> atomic_sub_<t>(<t> *p, <t> x) { return *p -= x; }
* bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
* {
* if (*p != c)
* return true;
* *p = s;
* return false;
* }
* void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
*/
#ifndef JEMALLOC_ENABLE_INLINE
# ifdef JEMALLOC_ATOMIC_U64
uint64_t atomic_add_u64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_u64(uint64_t *p, uint64_t x);
bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
void atomic_write_u64(uint64_t *p, uint64_t x);
# endif
uint32_t atomic_add_u32(uint32_t *p, uint32_t x);
uint32_t atomic_sub_u32(uint32_t *p, uint32_t x);
bool atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s);
void atomic_write_u32(uint32_t *p, uint32_t x);
void *atomic_add_p(void **p, void *x);
void *atomic_sub_p(void **p, void *x);
bool atomic_cas_p(void **p, void *c, void *s);
void atomic_write_p(void **p, const void *x);
size_t atomic_add_zu(size_t *p, size_t x);
size_t atomic_sub_zu(size_t *p, size_t x);
bool atomic_cas_zu(size_t *p, size_t c, size_t s);
void atomic_write_zu(size_t *p, size_t x);
unsigned atomic_add_u(unsigned *p, unsigned x);
unsigned atomic_sub_u(unsigned *p, unsigned x);
bool atomic_cas_u(unsigned *p, unsigned c, unsigned s);
void atomic_write_u(unsigned *p, unsigned x);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
#ifdef JEMALLOC_ATOMIC_U64
# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
uint64_t t = x;
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (t), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return t + x;
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
uint64_t t;
x = (uint64_t)(-(int64_t)x);
t = x;
asm volatile (
"lock; xaddq %0, %1;"
: "+r" (t), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return t + x;
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
uint8_t success;
asm volatile (
"lock; cmpxchgq %4, %0;"
"sete %1;"
: "=m" (*p), "=a" (success) /* Outputs. */
: "m" (*p), "a" (c), "r" (s) /* Inputs. */
: "memory" /* Clobbers. */
);
return !(bool)success;
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
asm volatile (
"xchgq %1, %0;" /* Lock is implied by xchgq. */
: "=m" (*p), "+r" (x) /* Outputs. */
: "m" (*p) /* Inputs. */
: "memory" /* Clobbers. */
);
}
# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
return atomic_fetch_add(a, x) + x;
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
return atomic_fetch_sub(a, x) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
return !atomic_compare_exchange_strong(a, &c, s);
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
atomic_store(a, x);
}
# elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
/*
* atomic_fetchadd_64() doesn't exist, but we only ever use this
* function on LP64 systems, so atomic_fetchadd_long() will do.
*/
assert(sizeof(uint64_t) == sizeof(unsigned long));
return atomic_fetchadd_long(p, (unsigned long)x) + x;
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
assert(sizeof(uint64_t) == sizeof(unsigned long));
return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
assert(sizeof(uint64_t) == sizeof(unsigned long));
return !atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s);
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
assert(sizeof(uint64_t) == sizeof(unsigned long));
atomic_store_rel_long(p, x);
}
# elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
return OSAtomicAdd64((int64_t)x, (int64_t *)p);
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
return OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
return !OSAtomicCompareAndSwap64(c, s, (int64_t *)p);
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
uint64_t o;
/*The documented OSAtomic*() API does not expose an atomic exchange. */
do {
o = atomic_read_u64(p);
} while (atomic_cas_u64(p, o, x));
}
# elif (defined(_MSC_VER))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
return InterlockedExchangeAdd64(p, x) + x;
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
return InterlockedExchangeAdd64(p, -((int64_t)x)) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
uint64_t o;
o = InterlockedCompareExchange64(p, s, c);
return o != c;
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
InterlockedExchange64(p, x);
}
# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
return __sync_add_and_fetch(p, x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_u64(uint64_t *p, uint64_t x) {
return __sync_sub_and_fetch(p, x);
}
JEMALLOC_INLINE bool
atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
return !__sync_bool_compare_and_swap(p, c, s);
}
JEMALLOC_INLINE void
atomic_write_u64(uint64_t *p, uint64_t x) {
__sync_lock_test_and_set(p, x);
}
# else
# error "Missing implementation for 64-bit atomic operations"
# endif
#endif
/******************************************************************************/
/* 32-bit operations. */
#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
uint32_t t = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (t), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return t + x;
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
uint32_t t;
x = (uint32_t)(-(int32_t)x);
t = x;
asm volatile (
"lock; xaddl %0, %1;"
: "+r" (t), "=m" (*p) /* Outputs. */
: "m" (*p) /* Inputs. */
);
return t + x;
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
uint8_t success;
asm volatile (
"lock; cmpxchgl %4, %0;"
"sete %1;"
: "=m" (*p), "=a" (success) /* Outputs. */
: "m" (*p), "a" (c), "r" (s) /* Inputs. */
: "memory"
);
return !(bool)success;
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
asm volatile (
"xchgl %1, %0;" /* Lock is implied by xchgl. */
: "=m" (*p), "+r" (x) /* Outputs. */
: "m" (*p) /* Inputs. */
: "memory" /* Clobbers. */
);
}
# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
return atomic_fetch_add(a, x) + x;
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
return atomic_fetch_sub(a, x) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
return !atomic_compare_exchange_strong(a, &c, s);
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
atomic_store(a, x);
}
#elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
return atomic_fetchadd_32(p, x) + x;
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
return !atomic_cmpset_32(p, c, s);
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
atomic_store_rel_32(p, x);
}
#elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
return OSAtomicAdd32((int32_t)x, (int32_t *)p);
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
return OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
return !OSAtomicCompareAndSwap32(c, s, (int32_t *)p);
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
uint32_t o;
/*The documented OSAtomic*() API does not expose an atomic exchange. */
do {
o = atomic_read_u32(p);
} while (atomic_cas_u32(p, o, x));
}
#elif (defined(_MSC_VER))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
return InterlockedExchangeAdd(p, x) + x;
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
uint32_t o;
o = InterlockedCompareExchange(p, s, c);
return o != c;
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
InterlockedExchange(p, x);
}
#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
JEMALLOC_INLINE uint32_t
atomic_add_u32(uint32_t *p, uint32_t x) {
return __sync_add_and_fetch(p, x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_u32(uint32_t *p, uint32_t x) {
return __sync_sub_and_fetch(p, x);
}
JEMALLOC_INLINE bool
atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
return !__sync_bool_compare_and_swap(p, c, s);
}
JEMALLOC_INLINE void
atomic_write_u32(uint32_t *p, uint32_t x) {
__sync_lock_test_and_set(p, x);
}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
/******************************************************************************/
/* Pointer operations. */
JEMALLOC_INLINE void *
atomic_add_p(void **p, void *x) {
#if (LG_SIZEOF_PTR == 3)
return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 2)
return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)x);
#endif
}
JEMALLOC_INLINE void *
atomic_sub_p(void **p, void *x) {
#if (LG_SIZEOF_PTR == 3)
return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_PTR == 2)
return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
JEMALLOC_INLINE bool
atomic_cas_p(void **p, void *c, void *s) {
#if (LG_SIZEOF_PTR == 3)
return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
#elif (LG_SIZEOF_PTR == 2)
return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
#endif
}
JEMALLOC_INLINE void
atomic_write_p(void **p, const void *x) {
#if (LG_SIZEOF_PTR == 3)
atomic_write_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 2)
atomic_write_u32((uint32_t *)p, (uint32_t)x);
#endif
}
/******************************************************************************/
/* size_t operations. */
JEMALLOC_INLINE size_t
atomic_add_zu(size_t *p, size_t x) {
#if (LG_SIZEOF_PTR == 3)
return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 2)
return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x);
#endif
}
JEMALLOC_INLINE size_t
atomic_sub_zu(size_t *p, size_t x) {
#if (LG_SIZEOF_PTR == 3)
return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_PTR == 2)
return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
JEMALLOC_INLINE bool
atomic_cas_zu(size_t *p, size_t c, size_t s) {
#if (LG_SIZEOF_PTR == 3)
return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
#elif (LG_SIZEOF_PTR == 2)
return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
#endif
}
JEMALLOC_INLINE void
atomic_write_zu(size_t *p, size_t x) {
#if (LG_SIZEOF_PTR == 3)
atomic_write_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_PTR == 2)
atomic_write_u32((uint32_t *)p, (uint32_t)x);
#endif
}
/******************************************************************************/
/* unsigned operations. */
JEMALLOC_INLINE unsigned
atomic_add_u(unsigned *p, unsigned x) {
#if (LG_SIZEOF_INT == 3)
return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 2)
return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x);
#endif
}
JEMALLOC_INLINE unsigned
atomic_sub_u(unsigned *p, unsigned x) {
#if (LG_SIZEOF_INT == 3)
return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
#elif (LG_SIZEOF_INT == 2)
return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
#endif
}
JEMALLOC_INLINE bool
atomic_cas_u(unsigned *p, unsigned c, unsigned s) {
#if (LG_SIZEOF_INT == 3)
return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
#elif (LG_SIZEOF_INT == 2)
return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
#endif
}
JEMALLOC_INLINE void
atomic_write_u(unsigned *p, unsigned x) {
#if (LG_SIZEOF_INT == 3)
atomic_write_u64((uint64_t *)p, (uint64_t)x);
#elif (LG_SIZEOF_INT == 2)
atomic_write_u32((uint32_t *)p, (uint32_t)x);
#endif
}
/******************************************************************************/
#endif
#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */

View File

@ -0,0 +1,158 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
#define ATOMIC_INIT(...) {__VA_ARGS__}
typedef enum {
atomic_memory_order_relaxed,
atomic_memory_order_acquire,
atomic_memory_order_release,
atomic_memory_order_acq_rel,
atomic_memory_order_seq_cst
} atomic_memory_order_t;
typedef char atomic_repr_0_t;
typedef short atomic_repr_1_t;
typedef long atomic_repr_2_t;
typedef __int64 atomic_repr_3_t;
ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo) {
_ReadWriteBarrier();
# if defined(_M_ARM) || defined(_M_ARM64)
/* ARM needs a barrier for everything but relaxed. */
if (mo != atomic_memory_order_relaxed) {
MemoryBarrier();
}
# elif defined(_M_IX86) || defined (_M_X64)
/* x86 needs a barrier only for seq_cst. */
if (mo == atomic_memory_order_seq_cst) {
MemoryBarrier();
}
# else
# error "Don't know how to create atomics for this platform for MSVC."
# endif
_ReadWriteBarrier();
}
#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
#define ATOMIC_RAW_CONCAT(a, b) a ## b
#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \
base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \
ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
#define ATOMIC_INTERLOCKED_SUFFIX_0 8
#define ATOMIC_INTERLOCKED_SUFFIX_1 16
#define ATOMIC_INTERLOCKED_SUFFIX_2
#define ATOMIC_INTERLOCKED_SUFFIX_3 64
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
typedef struct { \
ATOMIC_INTERLOCKED_REPR(lg_size) repr; \
} atomic_##short_type##_t; \
\
ATOMIC_INLINE type \
atomic_load_##short_type(const atomic_##short_type##_t *a, \
atomic_memory_order_t mo) { \
ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \
if (mo != atomic_memory_order_relaxed) { \
atomic_fence(atomic_memory_order_acquire); \
} \
return (type) ret; \
} \
\
ATOMIC_INLINE void \
atomic_store_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
if (mo != atomic_memory_order_relaxed) { \
atomic_fence(atomic_memory_order_release); \
} \
a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \
if (mo == atomic_memory_order_seq_cst) { \
atomic_fence(atomic_memory_order_seq_cst); \
} \
} \
\
ATOMIC_INLINE type \
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
atomic_memory_order_t mo) { \
return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \
lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
ATOMIC_INTERLOCKED_REPR(lg_size) e = \
(ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \
ATOMIC_INTERLOCKED_REPR(lg_size) d = \
(ATOMIC_INTERLOCKED_REPR(lg_size))desired; \
ATOMIC_INTERLOCKED_REPR(lg_size) old = \
ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \
lg_size)(&a->repr, d, e); \
if (old == e) { \
return true; \
} else { \
*expected = (type)old; \
return false; \
} \
} \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
/* We implement the weak version with strong semantics. */ \
return atomic_compare_exchange_weak_##short_type(a, expected, \
desired, success_mo, failure_mo); \
}
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \
JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
\
ATOMIC_INLINE type \
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \
lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
} \
\
ATOMIC_INLINE type \
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
/* \
* MSVC warns on negation of unsigned operands, but for us it \
* gives exactly the right semantics (MAX_TYPE + 1 - operand). \
*/ \
__pragma(warning(push)) \
__pragma(warning(disable: 4146)) \
return atomic_fetch_add_##short_type(a, -val, mo); \
__pragma(warning(pop)) \
} \
ATOMIC_INLINE type \
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \
&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
} \
ATOMIC_INLINE type \
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \
&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
} \
ATOMIC_INLINE type \
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
type val, atomic_memory_order_t mo) { \
return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \
&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
}
#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */

View File

@ -1,8 +0,0 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
# define JEMALLOC_ATOMIC_U64
#endif
#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */

View File

@ -146,14 +146,6 @@ static const bool have_thp =
#endif #endif
; ;
#if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus)
#include <stdatomic.h>
#endif
#ifdef JEMALLOC_ATOMIC9
#include <machine/atomic.h>
#endif
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
#include <libkern/OSAtomic.h> #include <libkern/OSAtomic.h>
#endif #endif
@ -199,10 +191,21 @@ static const bool have_thp =
* its translation unit). Each component is now broken up into multiple header * its translation unit). Each component is now broken up into multiple header
* files, corresponding to the sections above (e.g. instead of "tsd.h", we now * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
* have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h"). * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
*
* Those files which have been converted to explicitly include their
* inter-component dependencies are now in the initial HERMETIC HEADERS
* section. These headers may still rely on this file for system headers and
* global jemalloc headers, however.
*/ */
#include "jemalloc/internal/jemalloc_internal_macros.h" #include "jemalloc/internal/jemalloc_internal_macros.h"
/******************************************************************************/
/* HERMETIC HEADERS */
/******************************************************************************/
#include "jemalloc/internal/atomic.h"
/******************************************************************************/ /******************************************************************************/
/* TYPES */ /* TYPES */
/******************************************************************************/ /******************************************************************************/
@ -380,7 +383,6 @@ typedef unsigned szind_t;
#include "jemalloc/internal/nstime_types.h" #include "jemalloc/internal/nstime_types.h"
#include "jemalloc/internal/util_types.h" #include "jemalloc/internal/util_types.h"
#include "jemalloc/internal/atomic_types.h"
#include "jemalloc/internal/spin_types.h" #include "jemalloc/internal/spin_types.h"
#include "jemalloc/internal/prng_types.h" #include "jemalloc/internal/prng_types.h"
#include "jemalloc/internal/ticker_types.h" #include "jemalloc/internal/ticker_types.h"
@ -489,7 +491,6 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/nstime_externs.h" #include "jemalloc/internal/nstime_externs.h"
#include "jemalloc/internal/util_externs.h" #include "jemalloc/internal/util_externs.h"
#include "jemalloc/internal/atomic_externs.h"
#include "jemalloc/internal/ckh_externs.h" #include "jemalloc/internal/ckh_externs.h"
#include "jemalloc/internal/stats_externs.h" #include "jemalloc/internal/stats_externs.h"
#include "jemalloc/internal/ctl_externs.h" #include "jemalloc/internal/ctl_externs.h"
@ -513,7 +514,6 @@ void jemalloc_postfork_child(void);
/******************************************************************************/ /******************************************************************************/
#include "jemalloc/internal/util_inlines.h" #include "jemalloc/internal/util_inlines.h"
#include "jemalloc/internal/atomic_inlines.h"
#include "jemalloc/internal/spin_inlines.h" #include "jemalloc/internal/spin_inlines.h"
#include "jemalloc/internal/prng_inlines.h" #include "jemalloc/internal/prng_inlines.h"
#include "jemalloc/internal/ticker_inlines.h" #include "jemalloc/internal/ticker_inlines.h"

View File

@ -30,16 +30,13 @@
#undef LG_VADDR #undef LG_VADDR
/* Defined if C11 atomics are available. */ /* Defined if C11 atomics are available. */
#undef JEMALLOC_C11ATOMICS #undef JEMALLOC_C11_ATOMICS
/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ /* Defined if GCC __atomic atomics are available. */
#undef JEMALLOC_ATOMIC9 #undef JEMALLOC_GCC_ATOMIC_ATOMICS
/* /* Defined if GCC __sync atomics are available. */
* Defined if OSAtomic*() functions are available, as provided by Darwin, and #undef JEMALLOC_GCC_SYNC_ATOMICS
* documented in the atomic(3) manual page.
*/
#undef JEMALLOC_OSATOMIC
/* /*
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and

View File

@ -72,26 +72,6 @@ arena_tdata_get
arena_tdata_get_hard arena_tdata_get_hard
arenas arenas
arenas_tdata_cleanup arenas_tdata_cleanup
atomic_add_p
atomic_add_u
atomic_add_u32
atomic_add_u64
atomic_add_zu
atomic_cas_p
atomic_cas_u
atomic_cas_u32
atomic_cas_u64
atomic_cas_zu
atomic_sub_p
atomic_sub_u
atomic_sub_u32
atomic_sub_u64
atomic_sub_zu
atomic_write_p
atomic_write_u
atomic_write_u32
atomic_write_u64
atomic_write_zu
b0get b0get
base_alloc base_alloc
base_boot base_boot

View File

@ -1,2 +0,0 @@
#define JEMALLOC_ATOMIC_C_
#include "jemalloc/internal/jemalloc_internal.h"

View File

@ -1,101 +1,257 @@
#include "test/jemalloc_test.h" #include "test/jemalloc_test.h"
#define TEST_STRUCT(p, t) \ /*
struct p##_test_s { \ * We *almost* have consistent short names (e.g. "u32" for uint32_t, "b" for
t accum0; \ * bool, etc. The one exception is that the short name for void * is "p" in
t x; \ * some places and "ptr" in others. In the long run it would be nice to unify
t s; \ * these, but in the short run we'll use this shim.
}; \ */
typedef struct p##_test_s p##_test_t; #define assert_p_eq assert_ptr_eq
#define TEST_BODY(p, t, tc, ta, FMT) do { \ /*
const p##_test_t tests[] = { \ * t: the non-atomic type, like "uint32_t".
{(t)-1, (t)-1, (t)-2}, \ * ta: the short name for the type, like "u32".
{(t)-1, (t) 0, (t)-2}, \ * val[1,2,3]: Values of the given type. The CAS tests use val2 for expected,
{(t)-1, (t) 1, (t)-2}, \ * and val3 for desired.
*/
#define DO_TESTS(t, ta, val1, val2, val3) do { \
t val; \
t raw_atomic; \
t expected; \
bool success; \
/* This (along with the load below) also tests ATOMIC_LOAD. */ \
atomic_##ta##_t atom = ATOMIC_INIT(val1); \
\ \
{(t) 0, (t)-1, (t)-2}, \ /* ATOMIC_INIT and load. */ \
{(t) 0, (t) 0, (t)-2}, \ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
{(t) 0, (t) 1, (t)-2}, \ assert_##ta##_eq(val1, val, "Load or init failed"); \
\ \
{(t) 1, (t)-1, (t)-2}, \ /* Store. */ \
{(t) 1, (t) 0, (t)-2}, \ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
{(t) 1, (t) 1, (t)-2}, \ atomic_store_##ta(&atom, val2, ATOMIC_RELAXED); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val2, val, "Store failed"); \
\ \
{(t)0, (t)-(1 << 22), (t)-2}, \ /* Exchange. */ \
{(t)0, (t)(1 << 22), (t)-2}, \ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
{(t)(1 << 22), (t)-(1 << 22), (t)-2}, \ val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED); \
{(t)(1 << 22), (t)(1 << 22), (t)-2} \ assert_##ta##_eq(val1, val, "Exchange returned invalid value"); \
}; \ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
unsigned i; \ assert_##ta##_eq(val2, val, "Exchange store invalid value"); \
\ \
for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) { \ /* \
bool err; \ * Weak CAS. Spurious failures are allowed, so we loop a few \
t accum = tests[i].accum0; \ * times. \
assert_##ta##_eq(atomic_read_##p(&accum), \ */ \
tests[i].accum0, \ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
"Erroneous read, i=%u", i); \ success = false; \
for (int i = 0; i < 10 && !success; i++) { \
expected = val2; \
success = atomic_compare_exchange_weak_##ta(&atom, \
&expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, expected, \
"CAS should update expected"); \
} \
assert_b_eq(val1 == val2, success, \
"Weak CAS did the wrong state update"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
if (success) { \
assert_##ta##_eq(val3, val, \
"Successful CAS should update atomic"); \
} else { \
assert_##ta##_eq(val1, val, \
"Unsuccessful CAS should not update atomic"); \
} \
\ \
assert_##ta##_eq(atomic_add_##p(&accum, tests[i].x), \ /* Strong CAS. */ \
(t)((tc)tests[i].accum0 + (tc)tests[i].x), \ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
"i=%u, accum=%"FMT", x=%"FMT, \ expected = val2; \
i, tests[i].accum0, tests[i].x); \ success = atomic_compare_exchange_strong_##ta(&atom, &expected, \
assert_##ta##_eq(atomic_read_##p(&accum), accum, \ val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \
"Erroneous add, i=%u", i); \ assert_b_eq(val1 == val2, success, \
"Strong CAS did the wrong state update"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
if (success) { \
assert_##ta##_eq(val3, val, \
"Successful CAS should update atomic"); \
} else { \
assert_##ta##_eq(val1, val, \
"Unsuccessful CAS should not update atomic"); \
} \
\ \
accum = tests[i].accum0; \
assert_##ta##_eq(atomic_sub_##p(&accum, tests[i].x), \
(t)((tc)tests[i].accum0 - (tc)tests[i].x), \
"i=%u, accum=%"FMT", x=%"FMT, \
i, tests[i].accum0, tests[i].x); \
assert_##ta##_eq(atomic_read_##p(&accum), accum, \
"Erroneous sub, i=%u", i); \
\ \
accum = tests[i].accum0; \ /* Previous atomics API. */ \
err = atomic_cas_##p(&accum, tests[i].x, tests[i].s); \
assert_b_eq(err, tests[i].accum0 != tests[i].x, \
"Erroneous cas success/failure result"); \
assert_##ta##_eq(accum, err ? tests[i].accum0 : \
tests[i].s, "Erroneous cas effect, i=%u", i); \
\ \
accum = tests[i].accum0; \ /* Read. */ \
atomic_write_##p(&accum, tests[i].s); \ raw_atomic = val1; \
assert_##ta##_eq(accum, tests[i].s, \ val = atomic_read_##ta(&raw_atomic); \
"Erroneous write, i=%u", i); \ assert_##ta##_eq(val1, val, "Read failed"); \
\
/* Write. */ \
raw_atomic = val1; \
atomic_write_##ta(&raw_atomic, val2); \
assert_##ta##_eq(val2, raw_atomic, "Write failed"); \
\
/* CAS. */ \
raw_atomic = val1; \
success = !atomic_cas_##ta(&raw_atomic, val2, val3); \
assert_b_eq(val1 == val2, success, \
"CAS did the wrong state update"); \
val = raw_atomic; \
if (success) { \
assert_##ta##_eq(val3, val, \
"Successful CAS should update atomic"); \
} else { \
assert_##ta##_eq(val1, val, \
"Unsuccessful CAS should not update atomic"); \
} \ } \
} while (0) } while (0)
TEST_STRUCT(u64, uint64_t) #define DO_INTEGER_TESTS(t, ta, val1, val2) do { \
atomic_##ta##_t atom; \
t val; \
t raw_atomic; \
\
/* Fetch-add. */ \
atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, val, \
"Fetch-add should return previous value"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val1 + val2, val, \
"Fetch-add should update atomic"); \
\
/* Fetch-sub. */ \
atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, val, \
"Fetch-sub should return previous value"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val1 - val2, val, \
"Fetch-sub should update atomic"); \
\
/* Fetch-and. */ \
atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, val, \
"Fetch-and should return previous value"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val1 & val2, val, \
"Fetch-and should update atomic"); \
\
/* Fetch-or. */ \
atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, val, \
"Fetch-or should return previous value"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val1 | val2, val, \
"Fetch-or should update atomic"); \
\
/* Fetch-xor. */ \
atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED); \
assert_##ta##_eq(val1, val, \
"Fetch-xor should return previous value"); \
val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
assert_##ta##_eq(val1 ^ val2, val, \
"Fetch-xor should update atomic"); \
\
/* Previous atomics API. */ \
\
/* Add. */ \
raw_atomic = val1; \
val = atomic_add_##ta(&raw_atomic, val2); \
assert_##ta##_eq(val1 + val2, val, \
"atomic_add should return new value"); \
assert_##ta##_eq(val1 + val2, raw_atomic, \
"atomic_add should update atomic"); \
\
/* Sub. */ \
raw_atomic = val1; \
val = atomic_sub_##ta(&raw_atomic, val2); \
assert_##ta##_eq(val1 - val2, val, \
"atomic_sub should return new value"); \
assert_##ta##_eq(val1 - val2, raw_atomic, \
"atomic_add should update atomic"); \
} while (0)
#define TEST_STRUCT(t, ta) \
typedef struct { \
t val1; \
t val2; \
t val3; \
} ta##_test_t;
#define TEST_CASES(t) { \
{(t)-1, (t)-1, (t)-2}, \
{(t)-1, (t) 0, (t)-2}, \
{(t)-1, (t) 1, (t)-2}, \
\
{(t) 0, (t)-1, (t)-2}, \
{(t) 0, (t) 0, (t)-2}, \
{(t) 0, (t) 1, (t)-2}, \
\
{(t) 1, (t)-1, (t)-2}, \
{(t) 1, (t) 0, (t)-2}, \
{(t) 1, (t) 1, (t)-2}, \
\
{(t)0, (t)-(1 << 22), (t)-2}, \
{(t)0, (t)(1 << 22), (t)-2}, \
{(t)(1 << 22), (t)-(1 << 22), (t)-2}, \
{(t)(1 << 22), (t)(1 << 22), (t)-2} \
}
#define TEST_BODY(t, ta) do { \
const ta##_test_t tests[] = TEST_CASES(t); \
for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \
ta##_test_t test = tests[i]; \
DO_TESTS(t, ta, test.val1, test.val2, test.val3); \
} \
} while (0)
#define INTEGER_TEST_BODY(t, ta) do { \
const ta##_test_t tests[] = TEST_CASES(t); \
for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \
ta##_test_t test = tests[i]; \
DO_TESTS(t, ta, test.val1, test.val2, test.val3); \
DO_INTEGER_TESTS(t, ta, test.val1, test.val2); \
} \
} while (0)
TEST_STRUCT(uint64_t, u64);
TEST_BEGIN(test_atomic_u64) { TEST_BEGIN(test_atomic_u64) {
#if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
test_skip("64-bit atomic operations not supported"); test_skip("64-bit atomic operations not supported");
#else #else
TEST_BODY(u64, uint64_t, uint64_t, u64, FMTx64); INTEGER_TEST_BODY(uint64_t, u64);
#endif #endif
} }
TEST_END TEST_END
TEST_STRUCT(u32, uint32_t)
TEST_STRUCT(uint32_t, u32);
TEST_BEGIN(test_atomic_u32) { TEST_BEGIN(test_atomic_u32) {
TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32); INTEGER_TEST_BODY(uint32_t, u32);
} }
TEST_END TEST_END
TEST_STRUCT(p, void *) TEST_STRUCT(void *, p);
TEST_BEGIN(test_atomic_p) { TEST_BEGIN(test_atomic_p) {
TEST_BODY(p, void *, uintptr_t, ptr, "p"); TEST_BODY(void *, p);
} }
TEST_END TEST_END
TEST_STRUCT(zu, size_t) TEST_STRUCT(size_t, zu);
TEST_BEGIN(test_atomic_zu) { TEST_BEGIN(test_atomic_zu) {
TEST_BODY(zu, size_t, size_t, zu, "#zx"); INTEGER_TEST_BODY(size_t, zu);
} }
TEST_END TEST_END
TEST_STRUCT(u, unsigned) TEST_STRUCT(unsigned, u);
TEST_BEGIN(test_atomic_u) { TEST_BEGIN(test_atomic_u) {
TEST_BODY(u, unsigned, unsigned, u, "#x"); INTEGER_TEST_BODY(unsigned, u);
} }
TEST_END TEST_END