Fix all optimization-inhibiting integer-to-pointer casts

Following from PR #2481, we replace all integer-to-pointer casts [which
hide pointer provenance information (and thus inhibit
optimizations)](https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html)
with equivalent operations that preserve this information. I have
enabled the corresponding clang-tidy check in our static analysis CI so
that we do not get bitten by this again in the future.
This commit is contained in:
Kevin Svetlitski
2023-07-24 10:33:36 -07:00
committed by Qi Wang
parent 4827bb17bd
commit 3e82f357bb
27 changed files with 116 additions and 66 deletions

View File

@@ -513,7 +513,7 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
}
uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
lg_range);
edata->e_addr = (void *)((uintptr_t)edata->e_addr +
edata->e_addr = (void *)((byte_t *)edata->e_addr +
random_offset);
assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
edata->e_addr);
@@ -599,7 +599,7 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
static inline bin_t *
arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
return shard0 + binshard;
}

View File

@@ -247,7 +247,7 @@ static inline void **
cache_bin_empty_position_get(cache_bin_t *bin) {
cache_bin_sz_t diff = cache_bin_diff(bin,
(uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
void **ret = (void **)empty_bits;
assert(ret >= bin->stack_head);
@@ -479,7 +479,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
/* Wraparound handled as well. */
uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
*(void **)((byte_t *)bin->stack_head - diff) = ptr;
assert(!cache_bin_full(bin));
bin->low_bits_full += sizeof(void *);

View File

@@ -377,18 +377,18 @@ edata_ps_get(const edata_t *edata) {
static inline void *
edata_before_get(const edata_t *edata) {
return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
return (void *)((byte_t *)edata_base_get(edata) - PAGE);
}
static inline void *
edata_last_get(const edata_t *edata) {
return (void *)((uintptr_t)edata_base_get(edata) +
return (void *)((byte_t *)edata_base_get(edata) +
edata_size_get(edata) - PAGE);
}
static inline void *
edata_past_get(const edata_t *edata) {
return (void *)((uintptr_t)edata_base_get(edata) +
return (void *)((byte_t *)edata_base_get(edata) +
edata_size_get(edata));
}

View File

@@ -105,4 +105,21 @@ isblank(int c) {
# undef small
#endif
/*
* Oftentimes we'd like to perform some kind of arithmetic to obtain
* a pointer from another pointer but with some offset or mask applied.
* Naively you would accomplish this by casting the source pointer to
* `uintptr_t`, performing all of the relevant arithmetic, and then casting
* the result to the desired pointer type. However, this has the unfortunate
* side-effect of concealing pointer provenance, hiding useful information for
* optimization from the compiler (see here for details:
* https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html
* )
* Instead what one should do is cast the source pointer to `char *` and perform
* the equivalent arithmetic (since `char` of course represents one byte). But
* because `char *` has the semantic meaning of "string", we define this typedef
* simply to make it clearer where we are performing such pointer arithmetic.
*/
typedef char byte_t;
#endif /* JEMALLOC_INTERNAL_H */

View File

@@ -99,7 +99,8 @@ typedef enum malloc_init_e malloc_init_t;
/* Return the nearest aligned address at or below a. */
#define ALIGNMENT_ADDR2BASE(a, alignment) \
((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
((void *)(((byte_t *)(a)) - (((uintptr_t)(a)) - \
((uintptr_t)(a) & ((~(alignment)) + 1)))))
/* Return the offset between a and the nearest aligned address at or below a. */
#define ALIGNMENT_ADDR2OFFSET(a, alignment) \
@@ -109,6 +110,19 @@ typedef enum malloc_init_e malloc_init_t;
#define ALIGNMENT_CEILING(s, alignment) \
(((s) + (alignment - 1)) & ((~(alignment)) + 1))
/*
* Return the nearest aligned address at or above a.
*
* While at first glance this would appear to be merely a more complicated
* way to perform the same computation as `ALIGNMENT_CEILING`,
* this has the important additional property of not concealing pointer
* provenance from the compiler. See the block-comment on the
* definition of `byte_t` for more details.
*/
#define ALIGNMENT_ADDR2CEILING(a, alignment) \
((void *)(((byte_t *)(a)) + (((((uintptr_t)(a)) + \
(alignment - 1)) & ((~(alignment)) + 1)) - ((uintptr_t)(a)))))
/* Declare a variable-length array. */
#if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
# ifdef _MSC_VER

View File

@@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
/* Actual operating system page size, detected during bootstrap, <= PAGE. */
extern size_t os_page;
@@ -14,7 +15,7 @@ extern size_t os_page;
#define PAGE_MASK ((size_t)(PAGE - 1))
/* Return the page base address for the page containing address a. */
#define PAGE_ADDR2BASE(a) \
((void *)((uintptr_t)(a) & ~PAGE_MASK))
ALIGNMENT_ADDR2BASE(a, PAGE)
/* Return the smallest pagesize multiple that is >= s. */
#define PAGE_CEILING(s) \
(((s) + PAGE_MASK) & ~PAGE_MASK)
@@ -41,7 +42,7 @@ extern size_t os_page;
/* Return the huge page base address for the huge page containing address a. */
#define HUGEPAGE_ADDR2BASE(a) \
((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
/* Return the smallest pagesize multiple that is >= s. */
#define HUGEPAGE_CEILING(s) \
(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)

View File

@@ -88,6 +88,7 @@ typedef struct prof_recent_s prof_recent_t;
#define PROF_SAMPLE_ALIGNMENT PAGE
#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */

View File

@@ -226,9 +226,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
/* Mask off metadata. */
uintptr_t mask = high_bit_mask & low_bit_mask;
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
contents.edata = (edata_t *)(bits & mask);
# else
/* Restore sign-extended high bits, mask metadata bits. */
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
>> RTREE_NHIB) & low_bit_mask);
# endif
@@ -270,6 +272,7 @@ JEMALLOC_ALWAYS_INLINE void
rtree_contents_encode(rtree_contents_t contents, void **bits,
unsigned *additional) {
#ifdef RTREE_LEAF_COMPACT
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
*bits = (void *)rtree_leaf_elm_bits_encode(contents);
/* Suppress spurious warning from static analysis */
if (config_debug) {
@@ -320,8 +323,10 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
/* dependent */ true);
bits &= ~RTREE_LEAF_STATE_MASK;
bits |= state << RTREE_LEAF_STATE_SHIFT;
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
if (elm2 != NULL) {
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
}
#else

View File

@@ -31,7 +31,7 @@ compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
&ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
const unsigned char *page_end = (const unsigned char *)
ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
return redzone_end < page_end ? redzone_end : page_end;
}

View File

@@ -140,7 +140,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
*first = ptr;
*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
*mid = (void *)((byte_t *)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
assert(*first != *mid || usize == ptr_sz);
assert((uintptr_t)*first <= (uintptr_t)*mid);
@@ -151,7 +151,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
* default the tcache only goes up to the 32K size class, and is usually
* tuned lower instead of higher, which makes it less of a concern.
*/
*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
*last = (void *)((byte_t *)ptr + usize - sizeof(uaf_detect_junk));
assert(*first != *last || usize == ptr_sz);
assert(*mid != *last || usize <= ptr_sz * 2);
assert((uintptr_t)*mid <= (uintptr_t)*last);

View File

@@ -16,6 +16,7 @@ typedef struct tcaches_s tcaches_t;
#define TCACHE_ENABLED_ZERO_INITIALIZER false
/* Used for explicit tcache only. Means flushed but not destroyed. */
/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */

View File

@@ -110,14 +110,14 @@ util_prefetch_write(void *ptr) {
JEMALLOC_ALWAYS_INLINE void
util_prefetch_read_range(void *ptr, size_t sz) {
for (size_t i = 0; i < sz; i += CACHELINE) {
util_prefetch_read((void *)((uintptr_t)ptr + i));
util_prefetch_read((void *)((byte_t *)ptr + i));
}
}
JEMALLOC_ALWAYS_INLINE void
util_prefetch_write_range(void *ptr, size_t sz) {
for (size_t i = 0; i < sz; i += CACHELINE) {
util_prefetch_write((void *)((uintptr_t)ptr + i));
util_prefetch_write((void *)((byte_t *)ptr + i));
}
}