Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
|
|
|
|
#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
|
|
|
|
|
2023-06-10 08:37:47 +08:00
|
|
|
#include "jemalloc/internal/jemalloc_preamble.h"
|
2017-03-04 08:43:47 +08:00
|
|
|
#include "jemalloc/internal/assert.h"
|
|
|
|
|
2023-06-10 08:37:47 +08:00
|
|
|
#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
|
|
|
|
|
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
#define ATOMIC_INIT(...) {__VA_ARGS__}
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
atomic_memory_order_relaxed,
|
|
|
|
atomic_memory_order_acquire,
|
|
|
|
atomic_memory_order_release,
|
|
|
|
atomic_memory_order_acq_rel,
|
|
|
|
atomic_memory_order_seq_cst
|
|
|
|
} atomic_memory_order_t;
|
|
|
|
|
|
|
|
ATOMIC_INLINE int
|
|
|
|
atomic_enum_to_builtin(atomic_memory_order_t mo) {
|
|
|
|
switch (mo) {
|
|
|
|
case atomic_memory_order_relaxed:
|
|
|
|
return __ATOMIC_RELAXED;
|
|
|
|
case atomic_memory_order_acquire:
|
|
|
|
return __ATOMIC_ACQUIRE;
|
|
|
|
case atomic_memory_order_release:
|
|
|
|
return __ATOMIC_RELEASE;
|
|
|
|
case atomic_memory_order_acq_rel:
|
|
|
|
return __ATOMIC_ACQ_REL;
|
|
|
|
case atomic_memory_order_seq_cst:
|
|
|
|
return __ATOMIC_SEQ_CST;
|
|
|
|
}
|
2017-03-04 08:43:47 +08:00
|
|
|
/* Can't happen; the switch is exhaustive. */
|
|
|
|
not_reached();
|
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ATOMIC_INLINE void
|
|
|
|
atomic_fence(atomic_memory_order_t mo) {
|
|
|
|
__atomic_thread_fence(atomic_enum_to_builtin(mo));
|
|
|
|
}
|
|
|
|
|
|
|
|
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
|
|
|
|
/* unused */ lg_size) \
|
|
|
|
typedef struct { \
|
|
|
|
type repr; \
|
|
|
|
} atomic_##short_type##_t; \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_load_##short_type(const atomic_##short_type##_t *a, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
type result; \
|
|
|
|
__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \
|
|
|
|
return result; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE void \
|
|
|
|
atomic_store_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
type result; \
|
|
|
|
__atomic_exchange(&a->repr, &val, &result, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
return result; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE bool \
|
|
|
|
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
|
2019-07-27 08:00:24 +08:00
|
|
|
UNUSED type *expected, type desired, \
|
|
|
|
atomic_memory_order_t success_mo, \
|
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
atomic_memory_order_t failure_mo) { \
|
|
|
|
return __atomic_compare_exchange(&a->repr, expected, &desired, \
|
|
|
|
true, atomic_enum_to_builtin(success_mo), \
|
|
|
|
atomic_enum_to_builtin(failure_mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE bool \
|
|
|
|
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
|
2019-07-27 08:00:24 +08:00
|
|
|
UNUSED type *expected, type desired, \
|
|
|
|
atomic_memory_order_t success_mo, \
|
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
atomic_memory_order_t failure_mo) { \
|
|
|
|
return __atomic_compare_exchange(&a->repr, expected, &desired, \
|
|
|
|
false, \
|
|
|
|
atomic_enum_to_builtin(success_mo), \
|
|
|
|
atomic_enum_to_builtin(failure_mo)); \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
|
|
|
|
/* unused */ lg_size) \
|
|
|
|
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
return __atomic_fetch_add(&a->repr, val, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
return __atomic_fetch_sub(&a->repr, val, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
return __atomic_fetch_and(&a->repr, val, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
return __atomic_fetch_or(&a->repr, val, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
ATOMIC_INLINE type \
|
|
|
|
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
|
|
|
|
atomic_memory_order_t mo) { \
|
|
|
|
return __atomic_fetch_xor(&a->repr, val, \
|
|
|
|
atomic_enum_to_builtin(mo)); \
|
|
|
|
}
|
|
|
|
|
2023-06-10 08:37:47 +08:00
|
|
|
#undef ATOMIC_INLINE
|
|
|
|
|
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>
The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
that it's impossible to mix up atomic and non-atomic updates (which is
undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
atomic operations on strongly ordered architectures (example:
`atomic_write_u32(ptr, val);` involves a CAS loop, whereas
`atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.
This diff leaves in the current atomics API (implementing them in terms of the
backport). This lets us transition uses over piecemeal.
Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang. All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
2017-01-26 01:54:27 +08:00
|
|
|
#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
|