be0749f591
Nearly all 32-bit powerpc hardware treats lwsync as sync, and some cores (Freescale e500) trap lwsync as an illegal instruction, which then gets emulated in the kernel. To avoid unnecessary traps on the e500, use sync on all 32-bit powerpc. This pessimizes 32-bit software running on 64-bit hardware, but those numbers should be slim.
196 lines
6.2 KiB
C
196 lines
6.2 KiB
C
#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
|
|
#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
|
|
|
|
#define ATOMIC_INIT(...) {__VA_ARGS__}
|
|
|
|
typedef enum {
|
|
atomic_memory_order_relaxed,
|
|
atomic_memory_order_acquire,
|
|
atomic_memory_order_release,
|
|
atomic_memory_order_acq_rel,
|
|
atomic_memory_order_seq_cst
|
|
} atomic_memory_order_t;
|
|
|
|
ATOMIC_INLINE void
|
|
atomic_fence(atomic_memory_order_t mo) {
|
|
/* Easy cases first: no barrier, and full barrier. */
|
|
if (mo == atomic_memory_order_relaxed) {
|
|
asm volatile("" ::: "memory");
|
|
return;
|
|
}
|
|
if (mo == atomic_memory_order_seq_cst) {
|
|
asm volatile("" ::: "memory");
|
|
__sync_synchronize();
|
|
asm volatile("" ::: "memory");
|
|
return;
|
|
}
|
|
asm volatile("" ::: "memory");
|
|
# if defined(__i386__) || defined(__x86_64__)
|
|
/* This is implicit on x86. */
|
|
# elif defined(__ppc64__)
|
|
asm volatile("lwsync");
|
|
# elif defined(__ppc__)
|
|
asm volatile("sync");
|
|
# elif defined(__sparc__) && defined(__arch64__)
|
|
if (mo == atomic_memory_order_acquire) {
|
|
asm volatile("membar #LoadLoad | #LoadStore");
|
|
} else if (mo == atomic_memory_order_release) {
|
|
asm volatile("membar #LoadStore | #StoreStore");
|
|
} else {
|
|
asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
|
|
}
|
|
# else
|
|
__sync_synchronize();
|
|
# endif
|
|
asm volatile("" ::: "memory");
|
|
}
|
|
|
|
/*
|
|
* A correct implementation of seq_cst loads and stores on weakly ordered
|
|
* architectures could do either of the following:
|
|
* 1. store() is weak-fence -> store -> strong fence, load() is load ->
|
|
* strong-fence.
|
|
* 2. store() is strong-fence -> store, load() is strong-fence -> load ->
|
|
* weak-fence.
|
|
* The tricky thing is, load() and store() above can be the load or store
|
|
* portions of a gcc __sync builtin, so we have to follow GCC's lead, which
|
|
* means going with strategy 2.
|
|
* On strongly ordered architectures, the natural strategy is to stick a strong
|
|
* fence after seq_cst stores, and have naked loads. So we want the strong
|
|
* fences in different places on different architectures.
|
|
* atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
|
|
* accomplish this.
|
|
*/
|
|
|
|
ATOMIC_INLINE void
|
|
atomic_pre_sc_load_fence() {
|
|
# if defined(__i386__) || defined(__x86_64__) || \
|
|
(defined(__sparc__) && defined(__arch64__))
|
|
atomic_fence(atomic_memory_order_relaxed);
|
|
# else
|
|
atomic_fence(atomic_memory_order_seq_cst);
|
|
# endif
|
|
}
|
|
|
|
ATOMIC_INLINE void
|
|
atomic_post_sc_store_fence() {
|
|
# if defined(__i386__) || defined(__x86_64__) || \
|
|
(defined(__sparc__) && defined(__arch64__))
|
|
atomic_fence(atomic_memory_order_seq_cst);
|
|
# else
|
|
atomic_fence(atomic_memory_order_relaxed);
|
|
# endif
|
|
|
|
}
|
|
|
|
#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
|
|
/* unused */ lg_size) \
|
|
typedef struct { \
|
|
type volatile repr; \
|
|
} atomic_##short_type##_t; \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_load_##short_type(const atomic_##short_type##_t *a, \
|
|
atomic_memory_order_t mo) { \
|
|
if (mo == atomic_memory_order_seq_cst) { \
|
|
atomic_pre_sc_load_fence(); \
|
|
} \
|
|
type result = a->repr; \
|
|
if (mo != atomic_memory_order_relaxed) { \
|
|
atomic_fence(atomic_memory_order_acquire); \
|
|
} \
|
|
return result; \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE void \
|
|
atomic_store_##short_type(atomic_##short_type##_t *a, \
|
|
type val, atomic_memory_order_t mo) { \
|
|
if (mo != atomic_memory_order_relaxed) { \
|
|
atomic_fence(atomic_memory_order_release); \
|
|
} \
|
|
a->repr = val; \
|
|
if (mo == atomic_memory_order_seq_cst) { \
|
|
atomic_post_sc_store_fence(); \
|
|
} \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
/* \
|
|
* Because of FreeBSD, we care about gcc 4.2, which doesn't have\
|
|
* an atomic exchange builtin. We fake it with a CAS loop. \
|
|
*/ \
|
|
while (true) { \
|
|
type old = a->repr; \
|
|
if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
|
|
return old; \
|
|
} \
|
|
} \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE bool \
|
|
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
|
|
type *expected, type desired, \
|
|
atomic_memory_order_t success_mo, \
|
|
atomic_memory_order_t failure_mo) { \
|
|
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
|
|
desired); \
|
|
if (prev == *expected) { \
|
|
return true; \
|
|
} else { \
|
|
*expected = prev; \
|
|
return false; \
|
|
} \
|
|
} \
|
|
ATOMIC_INLINE bool \
|
|
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
|
|
type *expected, type desired, \
|
|
atomic_memory_order_t success_mo, \
|
|
atomic_memory_order_t failure_mo) { \
|
|
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
|
|
desired); \
|
|
if (prev == *expected) { \
|
|
return true; \
|
|
} else { \
|
|
*expected = prev; \
|
|
return false; \
|
|
} \
|
|
}
|
|
|
|
#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
|
|
/* unused */ lg_size) \
|
|
JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
return __sync_fetch_and_add(&a->repr, val); \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
return __sync_fetch_and_sub(&a->repr, val); \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
return __sync_fetch_and_and(&a->repr, val); \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
return __sync_fetch_and_or(&a->repr, val); \
|
|
} \
|
|
\
|
|
ATOMIC_INLINE type \
|
|
atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
|
|
atomic_memory_order_t mo) { \
|
|
return __sync_fetch_and_xor(&a->repr, val); \
|
|
}
|
|
|
|
#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
|