diff --git a/Makefile.in b/Makefile.in
index 3cb3161e..40c41442 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -151,6 +151,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/tcache.c \
$(srcroot)src/test_hooks.c \
$(srcroot)src/thread_event.c \
+ $(srcroot)src/ticker.c \
$(srcroot)src/tsd.c \
$(srcroot)src/witness.c
ifeq ($(enable_zone_allocator), 1)
@@ -188,7 +189,8 @@ ifeq (1, $(link_whole_archive))
C_UTIL_INTEGRATION_SRCS :=
C_UTIL_CPP_SRCS :=
else
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \
+ $(srcroot)src/ticker.c
C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
endif
TESTS_UNIT := \
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 52d0db4c..6b51ddec 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_TICKER_H
#define JEMALLOC_INTERNAL_TICKER_H
+#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/util.h"
/**
@@ -10,11 +11,11 @@
* have occurred with a call to ticker_ticks), which will return true (and reset
* the counter) if the countdown hit zero.
*/
-
-typedef struct {
+typedef struct ticker_s ticker_t;
+struct ticker_s {
int32_t tick;
int32_t nticks;
-} ticker_t;
+};
static inline void
ticker_init(ticker_t *ticker, int32_t nticks) {
@@ -75,7 +76,7 @@ ticker_tick(ticker_t *ticker) {
return ticker_ticks(ticker, 1);
}
-/*
+/*
* Try to tick. If ticker would fire, return true, but rely on
* slowpath to reset ticker.
*/
@@ -88,4 +89,87 @@ ticker_trytick(ticker_t *ticker) {
return false;
}
+/*
+ * The ticker_geom_t is much like the ticker_t, except that instead of ticker
+ * having a constant countdown, it has an approximate one; each tick has
+ * approximately a 1/nticks chance of triggering the count.
+ *
+ * The motivation is in triggering arena decay. With a naive strategy, each
+ * thread would maintain a ticker per arena, and check if decay is necessary
+ * each time that the arena's ticker fires. This has two costs:
+ * - Since under reasonable assumptions both threads and arenas can scale
+ * linearly with the number of CPUs, maintaining per-arena data in each thread
+ * scales quadratically with the number of CPUs.
+ * - These tickers are often a cache miss down tcache flush pathways.
+ *
+ * By giving each tick a 1/nticks chance of firing, we still maintain the same
+ * average number of ticks-until-firing per arena, with only a single ticker's
+ * worth of metadata.
+ */
+
+/* See ticker.c for an explanation of these constants. */
+#define TICKER_GEOM_NBITS 6
+#define TICKER_GEOM_MUL 61
+extern const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS];
+
+/* Not actually any different from ticker_t; just for type safety. */
+typedef struct ticker_geom_s ticker_geom_t;
+struct ticker_geom_s {
+ int32_t tick;
+ int32_t nticks;
+};
+
+/*
+ * Just pick the average delay for the first counter. We're more concerned with
+ * the behavior over long periods of time rather than the exact timing of the
+ * initial ticks.
+ */
+#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+
+static inline void
+ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
+ /*
+ * Make sure there's no overflow possible. This shouldn't really be a
+ * problem for reasonable nticks choices, which are all static and
+ * relatively small.
+ */
+ assert((uint64_t)nticks * (uint64_t)255 / (uint64_t)TICKER_GEOM_MUL
+ <= (uint64_t)INT32_MAX);
+ ticker->tick = nticks;
+ ticker->nticks = nticks;
+}
+
+static inline int32_t
+ticker_geom_read(const ticker_geom_t *ticker) {
+ return ticker->tick;
+}
+
+/* Same deal as above. */
+#if defined(__GNUC__) && !defined(__clang__) \
+ && (defined(__x86_64__) || defined(__i386__))
+JEMALLOC_NOINLINE
+#endif
+static bool
+ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+ uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
+ ticker->tick = (uint32_t)(
+ (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
+ / (uint64_t)TICKER_GEOM_MUL);
+ return true;
+}
+
+static inline bool
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+ ticker->tick -= nticks;
+ if (unlikely(ticker->tick < 0)) {
+ return ticker_geom_fixup(ticker, prng_state);
+ }
+ return false;
+}
+
+static inline bool
+ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
+ return ticker_geom_ticks(ticker, prng_state, 1);
+}
+
#endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9443ac55..a93511d1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
+
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 3c4bff62..06460e5a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
Source Files
+
+ Source Files
+
Source Files
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index fafb4914..916460a7 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
+
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 3c4bff62..06460e5a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
Source Files
+
+ Source Files
+
Source Files
diff --git a/src/ticker.c b/src/ticker.c
new file mode 100644
index 00000000..790b5c20
--- /dev/null
+++ b/src/ticker.c
@@ -0,0 +1,32 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+/*
+ * To avoid using floating point math down core paths (still necessary because
+ * versions of the glibc dynamic loader that did not preserve xmm registers are
+ * still somewhat common, requiring us to be compilable with -mno-sse), and also
+ * to avoid generally expensive library calls, we use a precomputed table of
+ * values. We want to sample U uniformly on [0, 1], and then compute
+ * ceil(log(u)/log(1-1/nticks)). We're mostly interested in the case where
+ * nticks is reasonably big, so 1/log(1-1/nticks) is well-approximated by
+ * -nticks.
+ *
+ * To compute log(u), we sample an integer in [1, 64] and divide, then just look
+ * up results in a table. As a space-compression mechanism, we store these as
+ * uint8_t by dividing the range (255) by the highest-magnitude value the log
+ * can take on, and using that as a multiplier. We then have to divide by that
+ * multiplier at the end of the computation.
+ *
+ * The values here are computed in src/ticker.py
+ */
+
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
+ 254, 211, 187, 169, 156, 144, 135, 127,
+ 120, 113, 107, 102, 97, 93, 89, 85,
+ 81, 77, 74, 71, 68, 65, 62, 60,
+ 57, 55, 53, 50, 48, 46, 44, 42,
+ 40, 39, 37, 35, 33, 32, 30, 29,
+ 27, 26, 24, 23, 21, 20, 19, 18,
+ 16, 15, 14, 13, 12, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+};
diff --git a/src/ticker.py b/src/ticker.py
new file mode 100755
index 00000000..3807740c
--- /dev/null
+++ b/src/ticker.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import math
+
+# Must match TICKER_GEOM_NBITS
+lg_table_size = 6
+table_size = 2**lg_table_size
+byte_max = 255
+mul = math.floor(-byte_max/math.log(1 / table_size))
+values = [round(-mul * math.log(i / table_size))
+ for i in range(1, table_size+1)]
+print("mul =", mul)
+print("values:")
+for i in range(table_size // 8):
+ print(", ".join((str(x) for x in values[i*8 : i*8 + 8])))
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 1cf10b0c..0dd77861 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -64,10 +64,37 @@ TEST_BEGIN(test_ticker_copy) {
}
TEST_END
+TEST_BEGIN(test_ticker_geom) {
+ const int32_t ticks = 100;
+ const uint64_t niters = 100 * 1000;
+
+ ticker_geom_t ticker;
+ ticker_geom_init(&ticker, ticks);
+ uint64_t total_ticks = 0;
+ /* Just some random constant. */
+ uint64_t prng_state = 0x343219f93496db9fULL;
+ for (uint64_t i = 0; i < niters; i++) {
+ while(!ticker_geom_tick(&ticker, &prng_state)) {
+ total_ticks++;
+ }
+ }
+ /*
+ * In fact, with this choice of random seed and the PRNG implementation
+ * used at the time this was tested, total_ticks is 95.1% of the
+ * expected ticks.
+ */
+ expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
+ "Mean off by > 10%%");
+ expect_u64_le(total_ticks , niters * ticks * 11 / 10,
+ "Mean off by > 10%%");
+}
+TEST_END
+
int
main(void) {
return test(
test_ticker_tick,
test_ticker_ticks,
- test_ticker_copy);
+ test_ticker_copy,
+ test_ticker_geom);
}