Tweak the ticker paths to help GCC generate better code.
GCC on its own isn't quite able to turn the ticker subtract into a memory operation followed by a js.
This commit is contained in:
parent
ae0f5d5c3f
commit
dd7e283b6f
@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) {
|
||||
return ticker->tick;
|
||||
}
|
||||
|
||||
/*
|
||||
* Not intended to be a public API. Unfortunately, on x86, neither gcc nor
|
||||
* clang seems smart enough to turn
|
||||
* ticker->tick -= nticks;
|
||||
* if (unlikely(ticker->tick < 0)) {
|
||||
* fixup ticker
|
||||
* return true;
|
||||
* }
|
||||
* return false;
|
||||
* into
|
||||
* subq %nticks_reg, (%ticker_reg)
|
||||
* js fixup ticker
|
||||
*
|
||||
* unless we force "fixup ticker" out of line. In that case, gcc gets it right,
|
||||
* but clang now does worse than before. So, on x86 with gcc, we force it out
|
||||
* of line, but otherwise let the inlining occur. Ordinarily this wouldn't be
|
||||
* worth the hassle, but this is on the fast path of both malloc and free (via
|
||||
* tcache_event).
|
||||
*/
|
||||
#if defined(__GNUC__) && !defined(__clang__) \
|
||||
&& (defined(__x86_64__) || defined(__i386__))
|
||||
JEMALLOC_NOINLINE
|
||||
#endif
|
||||
static bool
|
||||
ticker_fixup(ticker_t *ticker) {
|
||||
ticker->tick = ticker->nticks;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
ticker_ticks(ticker_t *ticker, int32_t nticks) {
|
||||
if (unlikely(ticker->tick < nticks)) {
|
||||
ticker->tick = ticker->nticks;
|
||||
return true;
|
||||
}
|
||||
ticker->tick -= nticks;
|
||||
return(false);
|
||||
if (unlikely(ticker->tick < 0)) {
|
||||
return ticker_fixup(ticker);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
Loading…
Reference in New Issue
Block a user