Tweak the ticker paths to help GCC generate better code.

GCC on its own isn't quite able to turn the ticker subtract into a memory
operation followed by a js.
This commit is contained in:
David T. Goldblatt 2018-02-02 19:18:18 -08:00 committed by David Goldblatt
parent ae0f5d5c3f
commit dd7e283b6f

View File

@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) {
return ticker->tick;
}
/*
* Not intended to be a public API. Unfortunately, on x86, neither gcc nor
* clang seems smart enough to turn
* ticker->tick -= nticks;
* if (unlikely(ticker->tick < 0)) {
* fixup ticker
* return true;
* }
* return false;
* into
* subq %nticks_reg, (%ticker_reg)
* js fixup ticker
*
* unless we force "fixup ticker" out of line. In that case, gcc gets it right,
* but clang now does worse than before. So, on x86 with gcc, we force it out
* of line, but otherwise let the inlining occur. Ordinarily this wouldn't be
* worth the hassle, but this is on the fast path of both malloc and free (via
* tcache_event).
*/
#if defined(__GNUC__) && !defined(__clang__) \
&& (defined(__x86_64__) || defined(__i386__))
JEMALLOC_NOINLINE
#endif
static bool
ticker_fixup(ticker_t *ticker) {
ticker->tick = ticker->nticks;
return true;
}
static inline bool
ticker_ticks(ticker_t *ticker, int32_t nticks) {
if (unlikely(ticker->tick < nticks)) {
ticker->tick = ticker->nticks;
return true;
}
ticker->tick -= nticks;
return(false);
if (unlikely(ticker->tick < 0)) {
return ticker_fixup(ticker);
}
return false;
}
static inline bool