Implement cache index randomization for large allocations.

Extract szad size quantization into {extent,run}_quantize(), and . quantize szad run sizes to the union of valid small region run sizes and large run sizes. Refactor iteration in arena_run_first_fit() to use run_quantize{,_first,_next(), and add support for padded large runs. For large allocations that have no specified alignment constraints, compute a pseudo-random offset from the beginning of the first backing page that is a multiple of the cache line size. Under typical configurations with 4-KiB pages and 64-byte cache lines this results in a uniform distribution among 64 page boundary offsets. Add the --disable-cache-oblivious option, primarily intended for performance testing. This resolves #13.
2015-05-04 09:58:36 -07:00
parent 6bb54cb9da
commit 8a03cf039c
10 changed files with 280 additions and 74 deletions
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -26,22 +26,22 @@
 *   const uint32_t a, c : See above discussion.
 */
 #define	prng32(r, lg_range, state, a, c) do {				\
-	assert(lg_range > 0);						\
-	assert(lg_range <= 32);						\
+	assert((lg_range) > 0);						\
+	assert((lg_range) <= 32);					\
 									\
 	r = (state * (a)) + (c);					\
 	state = r;							\
-	r >>= (32 - lg_range);						\
+	r >>= (32 - (lg_range));					\
 } while (false)

 /* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
 #define	prng64(r, lg_range, state, a, c) do {				\
-	assert(lg_range > 0);						\
-	assert(lg_range <= 64);						\
+	assert((lg_range) > 0);						\
+	assert((lg_range) <= 64);					\
 									\
 	r = (state * (a)) + (c);					\
 	state = r;							\
-	r >>= (64 - lg_range);						\
+	r >>= (64 - (lg_range));					\
 } while (false)

 #endif /* JEMALLOC_H_TYPES */