Add ticker_geom_t.

This lets a single ticker object drive events across a large number of different
tick streams while sharing state.
This commit is contained in:
David Goldblatt 2021-01-31 11:55:45 -08:00 committed by David Goldblatt
parent 3967329813
commit 8edfc5b170
9 changed files with 174 additions and 6 deletions

View File

@ -151,6 +151,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/tcache.c \
$(srcroot)src/test_hooks.c \
$(srcroot)src/thread_event.c \
$(srcroot)src/ticker.c \
$(srcroot)src/tsd.c \
$(srcroot)src/witness.c
ifeq ($(enable_zone_allocator), 1)
@ -188,7 +189,8 @@ ifeq (1, $(link_whole_archive))
C_UTIL_INTEGRATION_SRCS :=
C_UTIL_CPP_SRCS :=
else
C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \
$(srcroot)src/ticker.c
C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
endif
TESTS_UNIT := \

View File

@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_TICKER_H
#define JEMALLOC_INTERNAL_TICKER_H
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/util.h"
/**
@ -10,11 +11,11 @@
* have occurred with a call to ticker_ticks), which will return true (and reset
* the counter) if the countdown hit zero.
*/
typedef struct {
typedef struct ticker_s ticker_t;
struct ticker_s {
int32_t tick;
int32_t nticks;
} ticker_t;
};
static inline void
ticker_init(ticker_t *ticker, int32_t nticks) {
@ -88,4 +89,87 @@ ticker_trytick(ticker_t *ticker) {
return false;
}
/*
* The ticker_geom_t is much like the ticker_t, except that instead of ticker
* having a constant countdown, it has an approximate one; each tick has
* approximately a 1/nticks chance of triggering the count.
*
* The motivation is in triggering arena decay. With a naive strategy, each
* thread would maintain a ticker per arena, and check if decay is necessary
* each time that the arena's ticker fires. This has two costs:
* - Since under reasonable assumptions both threads and arenas can scale
* linearly with the number of CPUs, maintaining per-arena data in each thread
* scales quadratically with the number of CPUs.
* - These tickers are often a cache miss down tcache flush pathways.
*
* By giving each tick a 1/nticks chance of firing, we still maintain the same
* average number of ticks-until-firing per arena, with only a single ticker's
* worth of metadata.
*/
/* See ticker.c for an explanation of these constants. */
#define TICKER_GEOM_NBITS 6
#define TICKER_GEOM_MUL 61
extern const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS];
/* Not actually any different from ticker_t; just for type safety. */
typedef struct ticker_geom_s ticker_geom_t;
struct ticker_geom_s {
int32_t tick;
int32_t nticks;
};
/*
* Just pick the average delay for the first counter. We're more concerned with
* the behavior over long periods of time rather than the exact timing of the
* initial ticks.
*/
#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
static inline void
ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
/*
* Make sure there's no overflow possible. This shouldn't really be a
* problem for reasonable nticks choices, which are all static and
* relatively small.
*/
assert((uint64_t)nticks * (uint64_t)255 / (uint64_t)TICKER_GEOM_MUL
<= (uint64_t)INT32_MAX);
ticker->tick = nticks;
ticker->nticks = nticks;
}
static inline int32_t
ticker_geom_read(const ticker_geom_t *ticker) {
return ticker->tick;
}
/* Same deal as above. */
#if defined(__GNUC__) && !defined(__clang__) \
&& (defined(__x86_64__) || defined(__i386__))
JEMALLOC_NOINLINE
#endif
static bool
ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
ticker->tick = (uint32_t)(
(uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
/ (uint64_t)TICKER_GEOM_MUL);
return true;
}
static inline bool
ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
ticker->tick -= nticks;
if (unlikely(ticker->tick < 0)) {
return ticker_geom_fixup(ticker, prng_state);
}
return false;
}
static inline bool
ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
return ticker_geom_ticks(ticker, prng_state, 1);
}
#endif /* JEMALLOC_INTERNAL_TICKER_H */

View File

@ -92,6 +92,7 @@
<ClCompile Include="..\..\..\..\src\tcache.c" />
<ClCompile Include="..\..\..\..\src\test_hooks.c" />
<ClCompile Include="..\..\..\..\src\thread_event.c" />
<ClCompile Include="..\..\..\..\src\ticker.c" />
<ClCompile Include="..\..\..\..\src\tsd.c" />
<ClCompile Include="..\..\..\..\src\witness.c" />
</ItemGroup>

View File

@ -160,6 +160,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\tsd.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -92,6 +92,7 @@
<ClCompile Include="..\..\..\..\src\tcache.c" />
<ClCompile Include="..\..\..\..\src\test_hooks.c" />
<ClCompile Include="..\..\..\..\src\thread_event.c" />
<ClCompile Include="..\..\..\..\src\ticker.c" />
<ClCompile Include="..\..\..\..\src\tsd.c" />
<ClCompile Include="..\..\..\..\src\witness.c" />
</ItemGroup>

View File

@ -160,6 +160,9 @@
<ClCompile Include="..\..\..\..\src\thread_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\ticker.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\tsd.c">
<Filter>Source Files</Filter>
</ClCompile>

32
src/ticker.c Normal file
View File

@ -0,0 +1,32 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
/*
* To avoid using floating point math down core paths (still necessary because
* versions of the glibc dynamic loader that did not preserve xmm registers are
* still somewhat common, requiring us to be compilable with -mno-sse), and also
* to avoid generally expensive library calls, we use a precomputed table of
* values. We want to sample U uniformly on [0, 1], and then compute
* ceil(log(u)/log(1-1/nticks)). We're mostly interested in the case where
* nticks is reasonably big, so 1/log(1-1/nticks) is well-approximated by
* -nticks.
*
* To compute log(u), we sample an integer in [1, 64] and divide, then just look
* up results in a table. As a space-compression mechanism, we store these as
* uint8_t by dividing the range (255) by the highest-magnitude value the log
* can take on, and using that as a multiplier. We then have to divide by that
* multiplier at the end of the computation.
*
* The values here are computed in src/ticker.py
*/
const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
254, 211, 187, 169, 156, 144, 135, 127,
120, 113, 107, 102, 97, 93, 89, 85,
81, 77, 74, 71, 68, 65, 62, 60,
57, 55, 53, 50, 48, 46, 44, 42,
40, 39, 37, 35, 33, 32, 30, 29,
27, 26, 24, 23, 21, 20, 19, 18,
16, 15, 14, 13, 12, 10, 9, 8,
7, 6, 5, 4, 3, 2, 1, 0
};

15
src/ticker.py Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env python3
import math
# Must match TICKER_GEOM_NBITS
lg_table_size = 6
table_size = 2**lg_table_size
byte_max = 255
mul = math.floor(-byte_max/math.log(1 / table_size))
values = [round(-mul * math.log(i / table_size))
for i in range(1, table_size+1)]
print("mul =", mul)
print("values:")
for i in range(table_size // 8):
print(", ".join((str(x) for x in values[i*8 : i*8 + 8])))

View File

@ -64,10 +64,37 @@ TEST_BEGIN(test_ticker_copy) {
}
TEST_END
TEST_BEGIN(test_ticker_geom) {
const int32_t ticks = 100;
const uint64_t niters = 100 * 1000;
ticker_geom_t ticker;
ticker_geom_init(&ticker, ticks);
uint64_t total_ticks = 0;
/* Just some random constant. */
uint64_t prng_state = 0x343219f93496db9fULL;
for (uint64_t i = 0; i < niters; i++) {
while(!ticker_geom_tick(&ticker, &prng_state)) {
total_ticks++;
}
}
/*
* In fact, with this choice of random seed and the PRNG implementation
* used at the time this was tested, total_ticks is 95.1% of the
* expected ticks.
*/
expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
"Mean off by > 10%%");
expect_u64_le(total_ticks , niters * ticks * 11 / 10,
"Mean off by > 10%%");
}
TEST_END
int
main(void) {
return test(
test_ticker_tick,
test_ticker_ticks,
test_ticker_copy);
test_ticker_copy,
test_ticker_geom);
}