Fix a heap profiling regression.
Call prof_ctx_set() in all paths through prof_{m,re}alloc(). Inline arena_prof_ctx_get().
This commit is contained in:
parent
4d6a134e13
commit
81b4e6eb6f
@ -444,7 +444,6 @@ size_t arena_salloc(const void *ptr);
|
|||||||
#ifdef JEMALLOC_PROF
|
#ifdef JEMALLOC_PROF
|
||||||
void arena_prof_promoted(const void *ptr, size_t size);
|
void arena_prof_promoted(const void *ptr, size_t size);
|
||||||
size_t arena_salloc_demote(const void *ptr);
|
size_t arena_salloc_demote(const void *ptr);
|
||||||
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
|
|
||||||
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
||||||
#endif
|
#endif
|
||||||
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
|
||||||
@ -467,10 +466,116 @@ bool arena_boot(void);
|
|||||||
#ifdef JEMALLOC_H_INLINES
|
#ifdef JEMALLOC_H_INLINES
|
||||||
|
|
||||||
#ifndef JEMALLOC_ENABLE_INLINE
|
#ifndef JEMALLOC_ENABLE_INLINE
|
||||||
|
unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
|
||||||
|
const void *ptr, size_t size);
|
||||||
|
# ifdef JEMALLOC_PROF
|
||||||
|
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
|
||||||
|
# endif
|
||||||
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
|
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
|
||||||
|
JEMALLOC_INLINE unsigned
|
||||||
|
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
unsigned shift, diff, regind;
|
||||||
|
|
||||||
|
assert(run->magic == ARENA_RUN_MAGIC);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid doing division with a variable divisor if possible. Using
|
||||||
|
* actual division here can reduce allocator throughput by over 20%!
|
||||||
|
*/
|
||||||
|
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
||||||
|
|
||||||
|
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
||||||
|
shift = ffs(size) - 1;
|
||||||
|
diff >>= shift;
|
||||||
|
size >>= shift;
|
||||||
|
|
||||||
|
if (size == 1) {
|
||||||
|
/* The divisor was a power of 2. */
|
||||||
|
regind = diff;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* To divide by a number D that is not a power of two we
|
||||||
|
* multiply by (2^21 / D) and then right shift by 21 positions.
|
||||||
|
*
|
||||||
|
* X / D
|
||||||
|
*
|
||||||
|
* becomes
|
||||||
|
*
|
||||||
|
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
||||||
|
*
|
||||||
|
* We can omit the first three elements, because we never
|
||||||
|
* divide by 0, and 1 and 2 are both powers of two, which are
|
||||||
|
* handled above.
|
||||||
|
*/
|
||||||
|
#define SIZE_INV_SHIFT 21
|
||||||
|
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
||||||
|
static const unsigned size_invs[] = {
|
||||||
|
SIZE_INV(3),
|
||||||
|
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
||||||
|
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
||||||
|
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
||||||
|
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
||||||
|
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
||||||
|
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
||||||
|
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
||||||
|
};
|
||||||
|
|
||||||
|
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
||||||
|
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
||||||
|
else
|
||||||
|
regind = diff / size;
|
||||||
|
#undef SIZE_INV
|
||||||
|
#undef SIZE_INV_SHIFT
|
||||||
|
}
|
||||||
|
assert(diff == regind * size);
|
||||||
|
assert(regind < bin->nregs);
|
||||||
|
|
||||||
|
return (regind);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef JEMALLOC_PROF
|
||||||
|
JEMALLOC_INLINE prof_ctx_t *
|
||||||
|
arena_prof_ctx_get(const void *ptr)
|
||||||
|
{
|
||||||
|
prof_ctx_t *ret;
|
||||||
|
arena_chunk_t *chunk;
|
||||||
|
size_t pageind, mapbits;
|
||||||
|
|
||||||
|
assert(ptr != NULL);
|
||||||
|
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
||||||
|
|
||||||
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||||
|
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
||||||
|
mapbits = chunk->map[pageind-map_bias].bits;
|
||||||
|
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
|
||||||
|
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
|
||||||
|
if (prof_promote)
|
||||||
|
ret = (prof_ctx_t *)(uintptr_t)1U;
|
||||||
|
else {
|
||||||
|
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
||||||
|
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
|
||||||
|
PAGE_SHIFT));
|
||||||
|
arena_bin_t *bin = run->bin;
|
||||||
|
unsigned regind;
|
||||||
|
|
||||||
|
assert(run->magic == ARENA_RUN_MAGIC);
|
||||||
|
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
|
||||||
|
ret = *(prof_ctx_t **)((uintptr_t)run +
|
||||||
|
bin->ctx0_offset + (regind *
|
||||||
|
sizeof(prof_ctx_t *)));
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
ret = chunk->map[pageind-map_bias].prof_ctx;
|
||||||
|
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
JEMALLOC_INLINE void
|
JEMALLOC_INLINE void
|
||||||
arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
|
arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
|
||||||
{
|
{
|
||||||
|
@ -389,7 +389,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
|
|||||||
{
|
{
|
||||||
|
|
||||||
assert(ptr != NULL);
|
assert(ptr != NULL);
|
||||||
assert(size == s2u(size));
|
assert(size == isalloc(ptr));
|
||||||
|
|
||||||
if (opt_lg_prof_sample != 0) {
|
if (opt_lg_prof_sample != 0) {
|
||||||
if (prof_sample_accum_update(size)) {
|
if (prof_sample_accum_update(size)) {
|
||||||
@ -401,6 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
|
|||||||
* prof_alloc_prep() and prof_malloc().
|
* prof_alloc_prep() and prof_malloc().
|
||||||
*/
|
*/
|
||||||
assert(false);
|
assert(false);
|
||||||
|
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -438,6 +439,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
|||||||
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
|
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
|
||||||
|
|
||||||
if (ptr != NULL) {
|
if (ptr != NULL) {
|
||||||
|
assert(size == isalloc(ptr));
|
||||||
if (opt_lg_prof_sample != 0) {
|
if (opt_lg_prof_sample != 0) {
|
||||||
if (prof_sample_accum_update(size)) {
|
if (prof_sample_accum_update(size)) {
|
||||||
/*
|
/*
|
||||||
@ -448,6 +450,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
|||||||
* its actual size was insufficient to cross
|
* its actual size was insufficient to cross
|
||||||
* the sample threshold.
|
* the sample threshold.
|
||||||
*/
|
*/
|
||||||
|
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1766,105 +1766,6 @@ arena_salloc_demote(const void *ptr)
|
|||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned
|
|
||||||
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
|
||||||
size_t size)
|
|
||||||
{
|
|
||||||
unsigned shift, diff, regind;
|
|
||||||
|
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Avoid doing division with a variable divisor if possible. Using
|
|
||||||
* actual division here can reduce allocator throughput by over 20%!
|
|
||||||
*/
|
|
||||||
diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
|
|
||||||
|
|
||||||
/* Rescale (factor powers of 2 out of the numerator and denominator). */
|
|
||||||
shift = ffs(size) - 1;
|
|
||||||
diff >>= shift;
|
|
||||||
size >>= shift;
|
|
||||||
|
|
||||||
if (size == 1) {
|
|
||||||
/* The divisor was a power of 2. */
|
|
||||||
regind = diff;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* To divide by a number D that is not a power of two we
|
|
||||||
* multiply by (2^21 / D) and then right shift by 21 positions.
|
|
||||||
*
|
|
||||||
* X / D
|
|
||||||
*
|
|
||||||
* becomes
|
|
||||||
*
|
|
||||||
* (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
|
||||||
*
|
|
||||||
* We can omit the first three elements, because we never
|
|
||||||
* divide by 0, and 1 and 2 are both powers of two, which are
|
|
||||||
* handled above.
|
|
||||||
*/
|
|
||||||
#define SIZE_INV_SHIFT 21
|
|
||||||
#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
|
|
||||||
static const unsigned size_invs[] = {
|
|
||||||
SIZE_INV(3),
|
|
||||||
SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
|
|
||||||
SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
|
|
||||||
SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
|
|
||||||
SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
|
|
||||||
SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
|
|
||||||
SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
|
|
||||||
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
|
|
||||||
};
|
|
||||||
|
|
||||||
if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
|
|
||||||
regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
|
|
||||||
else
|
|
||||||
regind = diff / size;
|
|
||||||
#undef SIZE_INV
|
|
||||||
#undef SIZE_INV_SHIFT
|
|
||||||
}
|
|
||||||
assert(diff == regind * size);
|
|
||||||
assert(regind < bin->nregs);
|
|
||||||
|
|
||||||
return (regind);
|
|
||||||
}
|
|
||||||
|
|
||||||
prof_ctx_t *
|
|
||||||
arena_prof_ctx_get(const void *ptr)
|
|
||||||
{
|
|
||||||
prof_ctx_t *ret;
|
|
||||||
arena_chunk_t *chunk;
|
|
||||||
size_t pageind, mapbits;
|
|
||||||
|
|
||||||
assert(ptr != NULL);
|
|
||||||
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
||||||
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
|
|
||||||
mapbits = chunk->map[pageind-map_bias].bits;
|
|
||||||
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
|
|
||||||
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
|
|
||||||
if (prof_promote)
|
|
||||||
ret = (prof_ctx_t *)(uintptr_t)1U;
|
|
||||||
else {
|
|
||||||
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
|
||||||
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
|
|
||||||
PAGE_SHIFT));
|
|
||||||
arena_bin_t *bin = run->bin;
|
|
||||||
unsigned regind;
|
|
||||||
|
|
||||||
assert(run->magic == ARENA_RUN_MAGIC);
|
|
||||||
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
|
|
||||||
ret = *(prof_ctx_t **)((uintptr_t)run +
|
|
||||||
bin->ctx0_offset + (regind *
|
|
||||||
sizeof(prof_ctx_t *)));
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
ret = chunk->map[pageind-map_bias].prof_ctx;
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
|
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user