From 997d86acc6d2cc632b79669ebf3f938290e9f5da Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Tue, 9 Oct 2018 11:25:36 -0700 Subject: [PATCH] restrict bytes_until_sample to int64_t. This allows optimal asm generation of sub bytes_until_sample, usize; je; for x86 arch. Subtraction is unconditional, and only flags are checked for the jump, no extra compare is necessary. This also reduces register pressure. --- include/jemalloc/internal/prof_inlines_b.h | 14 +++++++++----- include/jemalloc/internal/tsd.h | 2 +- src/prof.c | 3 +++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h index b2f5a04e..085111f4 100644 --- a/include/jemalloc/internal/prof_inlines_b.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -82,17 +82,21 @@ JEMALLOC_ALWAYS_INLINE bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, prof_tdata_t **tdata_out) { prof_tdata_t *tdata; - uint64_t bytes_until_sample; + int64_t bytes_until_sample; cassert(config_prof); + ssize_t check = update ? 0 : usize; bytes_until_sample = tsd_bytes_until_sample_get(tsd); - if (likely(bytes_until_sample >= usize)) { - if (update && tsd_nominal(tsd)) { - tsd_bytes_until_sample_set(tsd, bytes_until_sample - usize); + if (update) { + bytes_until_sample -= usize; + if (tsd_nominal(tsd)) { + tsd_bytes_until_sample_set(tsd, bytes_until_sample); } - return true; } + if (likely(bytes_until_sample >= check)) { + return true; + } tdata = prof_tdata_get(tsd, true); if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 69fb05cb..c931441b 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -68,7 +68,7 @@ typedef void (*test_callback_t)(int *); O(offset_state, uint64_t, uint64_t) \ O(thread_allocated, uint64_t, uint64_t) \ O(thread_deallocated, uint64_t, uint64_t) \ - O(bytes_until_sample, uint64_t, uint64_t) \ + O(bytes_until_sample, int64_t, int64_t) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ O(iarena, arena_t *, arena_t *) \ diff --git a/src/prof.c b/src/prof.c index 83d492d4..71de2d34 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1168,6 +1168,9 @@ prof_sample_threshold_update(prof_tdata_t *tdata) { uint64_t bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) + (uint64_t)1U; + if (bytes_until_sample > SSIZE_MAX) { + bytes_until_sample = SSIZE_MAX; + } tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample); #endif