Reduce statistical heap sampling memory overhead.
If the mean heap sampling interval is larger than one page, simulate sampled small objects with large objects. This allows profiling context pointers to be omitted for small objects. As a result, the memory overhead for sampling decreases as the sampling interval is increased. Fix a compilation error in the profiling code.
This commit is contained in:
parent
169cbc1ef7
commit
0b270a991d
@ -121,8 +121,10 @@ struct arena_chunk_map_s {
|
||||
*
|
||||
* p : run page offset
|
||||
* s : run size
|
||||
* c : size class (used only if prof_promote is true)
|
||||
* x : don't care
|
||||
* - : 0
|
||||
* + : 1
|
||||
* [DZLA] : bit set
|
||||
* [dzla] : bit unset
|
||||
*
|
||||
@ -142,17 +144,27 @@ struct arena_chunk_map_s {
|
||||
* pppppppp pppppppp pppp---- ----d--a
|
||||
*
|
||||
* Large:
|
||||
* ssssssss ssssssss ssss---- ----D-la
|
||||
* ssssssss ssssssss ssss++++ ++++D-la
|
||||
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx
|
||||
* -------- -------- -------- ----D-la
|
||||
*
|
||||
* Large (sampled, size <= PAGE_SIZE):
|
||||
* ssssssss ssssssss sssscccc ccccD-la
|
||||
*
|
||||
* Large (not sampled, size == PAGE_SIZE):
|
||||
* ssssssss ssssssss ssss++++ ++++D-la
|
||||
*/
|
||||
size_t bits;
|
||||
#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1fU)
|
||||
#define CHUNK_MAP_KEY ((size_t)0x10U)
|
||||
#define CHUNK_MAP_DIRTY ((size_t)0x08U)
|
||||
#define CHUNK_MAP_ZEROED ((size_t)0x04U)
|
||||
#define CHUNK_MAP_LARGE ((size_t)0x02U)
|
||||
#define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
|
||||
#ifdef JEMALLOC_PROF
|
||||
#define CHUNK_MAP_CLASS_SHIFT 4
|
||||
#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U)
|
||||
#endif
|
||||
#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
|
||||
#define CHUNK_MAP_DIRTY ((size_t)0x8U)
|
||||
#define CHUNK_MAP_ZEROED ((size_t)0x4U)
|
||||
#define CHUNK_MAP_LARGE ((size_t)0x2U)
|
||||
#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
|
||||
#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
|
||||
};
|
||||
typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
|
||||
typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
|
||||
@ -421,6 +433,8 @@ void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
|
||||
size_t alloc_size);
|
||||
size_t arena_salloc(const void *ptr);
|
||||
#ifdef JEMALLOC_PROF
|
||||
void arena_prof_promoted(const void *ptr, size_t size);
|
||||
size_t arena_salloc_demote(const void *ptr);
|
||||
prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
|
||||
void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
|
||||
#endif
|
||||
|
@ -515,7 +515,11 @@ isalloc(const void *ptr)
|
||||
/* Region. */
|
||||
assert(chunk->arena->magic == ARENA_MAGIC);
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
ret = arena_salloc_demote(ptr);
|
||||
#else
|
||||
ret = arena_salloc(ptr);
|
||||
#endif
|
||||
} else
|
||||
ret = huge_salloc(ptr);
|
||||
|
||||
|
@ -134,6 +134,12 @@ extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
||||
*/
|
||||
extern uint64_t prof_interval;
|
||||
|
||||
/*
|
||||
* If true, promote small sampled objects to large objects, since small run
|
||||
* headers do not have embedded profile context pointers.
|
||||
*/
|
||||
extern bool prof_promote;
|
||||
|
||||
bool prof_init(prof_t *prof, bool master);
|
||||
void prof_destroy(prof_t *prof);
|
||||
|
||||
|
@ -256,6 +256,12 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
|
||||
if (ret == NULL)
|
||||
return (NULL);
|
||||
} else {
|
||||
#ifdef JEMALLOC_PROF
|
||||
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
|
||||
size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk)
|
||||
>> PAGE_SHIFT);
|
||||
chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK;
|
||||
#endif
|
||||
if (zero == false) {
|
||||
#ifdef JEMALLOC_FILL
|
||||
if (opt_junk)
|
||||
@ -289,6 +295,8 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
|
||||
size_t pageind, binind;
|
||||
arena_chunk_map_t *mapelm;
|
||||
|
||||
assert(arena_salloc(ptr) <= small_maxclass);
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
arena = chunk->arena;
|
||||
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
@ -334,6 +342,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
|
||||
arena_chunk_map_t *mapelm;
|
||||
|
||||
assert((size & PAGE_MASK) == 0);
|
||||
assert(arena_salloc(ptr) > small_maxclass);
|
||||
assert(arena_salloc(ptr) <= tcache_maxclass);
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
arena = chunk->arena;
|
||||
|
@ -218,8 +218,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
|
||||
size_t a_size = a->bits & ~PAGE_MASK;
|
||||
size_t b_size = b->bits & ~PAGE_MASK;
|
||||
|
||||
assert(a->bits & CHUNK_MAP_KEY || (a->bits & CHUNK_MAP_DIRTY) ==
|
||||
(b->bits & CHUNK_MAP_DIRTY));
|
||||
assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
|
||||
CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
|
||||
|
||||
ret = (a_size > b_size) - (a_size < b_size);
|
||||
if (ret == 0) {
|
||||
@ -382,6 +382,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
|
||||
chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE |
|
||||
CHUNK_MAP_ALLOCATED | flag_dirty;
|
||||
chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE |
|
||||
#ifdef JEMALLOC_PROF
|
||||
CHUNK_MAP_CLASS_MASK |
|
||||
#endif
|
||||
CHUNK_MAP_ALLOCATED | flag_dirty;
|
||||
} else {
|
||||
assert(zero == false);
|
||||
@ -1210,7 +1213,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
try_nregs--;
|
||||
try_hdr_size = sizeof(arena_run_t);
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof) {
|
||||
if (opt_prof && prof_promote == false) {
|
||||
/* Pad to a quantum boundary. */
|
||||
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
|
||||
try_cnt0_offset = try_hdr_size;
|
||||
@ -1243,7 +1246,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
|
||||
try_nregs--;
|
||||
try_hdr_size = sizeof(arena_run_t);
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof) {
|
||||
if (opt_prof && prof_promote == false) {
|
||||
/* Pad to a quantum boundary. */
|
||||
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
|
||||
try_cnt0_offset = try_hdr_size;
|
||||
@ -1507,6 +1510,63 @@ arena_salloc(const void *ptr)
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
void
|
||||
arena_prof_promoted(const void *ptr, size_t size)
|
||||
{
|
||||
arena_chunk_t *chunk;
|
||||
size_t pageind, binind;
|
||||
|
||||
assert(ptr != NULL);
|
||||
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
||||
assert(isalloc(ptr) == PAGE_SIZE);
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
binind = small_size2bin[size];
|
||||
assert(binind < nbins);
|
||||
chunk->map[pageind].bits = (chunk->map[pageind].bits &
|
||||
~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT);
|
||||
}
|
||||
|
||||
size_t
|
||||
arena_salloc_demote(const void *ptr)
|
||||
{
|
||||
size_t ret;
|
||||
arena_chunk_t *chunk;
|
||||
size_t pageind, mapbits;
|
||||
|
||||
assert(ptr != NULL);
|
||||
assert(CHUNK_ADDR2BASE(ptr) != ptr);
|
||||
|
||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
|
||||
mapbits = chunk->map[pageind].bits;
|
||||
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
|
||||
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
|
||||
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
||||
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
|
||||
PAGE_SHIFT));
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
assert(((uintptr_t)ptr - ((uintptr_t)run +
|
||||
(uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
|
||||
0);
|
||||
ret = run->bin->reg_size;
|
||||
} else {
|
||||
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
|
||||
ret = mapbits & ~PAGE_MASK;
|
||||
if (prof_promote && ret == PAGE_SIZE && (mapbits &
|
||||
CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) {
|
||||
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
|
||||
CHUNK_MAP_CLASS_SHIFT);
|
||||
assert(binind < nbins);
|
||||
ret = chunk->arena->bins[binind].reg_size;
|
||||
}
|
||||
assert(ret != 0);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
|
||||
size_t size)
|
||||
@ -1585,6 +1645,9 @@ arena_prof_cnt_get(const void *ptr)
|
||||
mapbits = chunk->map[pageind].bits;
|
||||
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
|
||||
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
|
||||
if (prof_promote)
|
||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||
else {
|
||||
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
||||
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
|
||||
PAGE_SHIFT));
|
||||
@ -1593,11 +1656,12 @@ arena_prof_cnt_get(const void *ptr)
|
||||
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
|
||||
ret = *(prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset +
|
||||
(regind * sizeof(prof_thr_cnt_t *)));
|
||||
} else {
|
||||
ret = chunk->map[pageind].prof_cnt;
|
||||
ret = *(prof_thr_cnt_t **)((uintptr_t)run +
|
||||
bin->cnt0_offset + (regind *
|
||||
sizeof(prof_thr_cnt_t *)));
|
||||
}
|
||||
} else
|
||||
ret = chunk->map[pageind].prof_cnt;
|
||||
|
||||
return (ret);
|
||||
}
|
||||
@ -1616,8 +1680,9 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
|
||||
mapbits = chunk->map[pageind].bits;
|
||||
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
|
||||
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
|
||||
if (prof_promote == false) {
|
||||
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
|
||||
(uintptr_t)((pageind - (mapbits & >> PAGE_SHIFT)) <<
|
||||
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
|
||||
PAGE_SHIFT));
|
||||
arena_bin_t *bin = run->bin;
|
||||
unsigned regind;
|
||||
@ -1625,11 +1690,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
|
||||
assert(run->magic == ARENA_RUN_MAGIC);
|
||||
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
|
||||
|
||||
*((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset +
|
||||
(regind * sizeof(prof_thr_cnt_t *)))) = cnt;
|
||||
} else {
|
||||
*((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset
|
||||
+ (regind * sizeof(prof_thr_cnt_t *)))) = cnt;
|
||||
} else
|
||||
assert((uintptr_t)cnt == (uintptr_t)1U);
|
||||
} else
|
||||
chunk->map[pageind].prof_cnt = cnt;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2330,7 +2396,22 @@ arena_boot(void)
|
||||
* 4KiB pages), and such configurations are impractical, but
|
||||
* nonetheless we need to protect against this case in order to avoid
|
||||
* undefined behavior.
|
||||
*
|
||||
* Further constrain nbins to 255 if prof_promote is true, since all
|
||||
* small size classes, plus a "not small" size class must be stored in
|
||||
* 8 bits of arena_chunk_map_t's bits field.
|
||||
*/
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof && prof_promote) {
|
||||
if (nbins > 255) {
|
||||
char line_buf[UMAX2S_BUFSIZE];
|
||||
malloc_write("<jemalloc>: Too many small size classes (");
|
||||
malloc_write(umax2s(nbins, 10, line_buf));
|
||||
malloc_write(" > max 255)\n");
|
||||
abort();
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (nbins > 256) {
|
||||
char line_buf[UMAX2S_BUFSIZE];
|
||||
malloc_write("<jemalloc>: Too many small size classes (");
|
||||
|
@ -835,12 +835,20 @@ JEMALLOC_P(malloc)(size_t size)
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||
if (opt_prof) {
|
||||
if ((cnt = prof_alloc_prep(size)) == NULL) {
|
||||
ret = NULL;
|
||||
goto OOM;
|
||||
}
|
||||
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
|
||||
small_maxclass) {
|
||||
ret = imalloc(small_maxclass+1);
|
||||
if (ret != NULL)
|
||||
arena_prof_promoted(ret, size);
|
||||
} else
|
||||
ret = imalloc(size);
|
||||
} else
|
||||
#endif
|
||||
|
||||
ret = imalloc(size);
|
||||
|
||||
OOM:
|
||||
@ -918,12 +926,24 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||
if (opt_prof) {
|
||||
if ((cnt = prof_alloc_prep(size)) == NULL) {
|
||||
result = NULL;
|
||||
ret = EINVAL;
|
||||
} else {
|
||||
if (prof_promote && (uintptr_t)cnt !=
|
||||
(uintptr_t)1U && size <= small_maxclass) {
|
||||
result = ipalloc(alignment,
|
||||
small_maxclass+1);
|
||||
if (result != NULL) {
|
||||
arena_prof_promoted(result,
|
||||
size);
|
||||
}
|
||||
} else
|
||||
result = ipalloc(alignment, size);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
|
||||
result = ipalloc(alignment, size);
|
||||
}
|
||||
|
||||
@ -992,12 +1012,20 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
|
||||
}
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof && (cnt = prof_alloc_prep(num_size)) == NULL) {
|
||||
if (opt_prof) {
|
||||
if ((cnt = prof_alloc_prep(num_size)) == NULL) {
|
||||
ret = NULL;
|
||||
goto RETURN;
|
||||
}
|
||||
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size
|
||||
<= small_maxclass) {
|
||||
ret = icalloc(small_maxclass+1);
|
||||
if (ret != NULL)
|
||||
arena_prof_promoted(ret, num_size);
|
||||
} else
|
||||
ret = icalloc(num_size);
|
||||
} else
|
||||
#endif
|
||||
|
||||
ret = icalloc(num_size);
|
||||
|
||||
RETURN:
|
||||
@ -1071,9 +1099,15 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
|
||||
ret = NULL;
|
||||
goto OOM;
|
||||
}
|
||||
}
|
||||
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
|
||||
size <= small_maxclass) {
|
||||
ret = iralloc(ptr, small_maxclass+1);
|
||||
if (ret != NULL)
|
||||
arena_prof_promoted(ret, size);
|
||||
} else
|
||||
ret = iralloc(ptr, size);
|
||||
} else
|
||||
#endif
|
||||
|
||||
ret = iralloc(ptr, size);
|
||||
|
||||
#ifdef JEMALLOC_PROF
|
||||
@ -1104,8 +1138,21 @@ OOM:
|
||||
ret = NULL;
|
||||
} else {
|
||||
#ifdef JEMALLOC_PROF
|
||||
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
|
||||
if (opt_prof) {
|
||||
if ((cnt = prof_alloc_prep(size)) == NULL)
|
||||
ret = NULL;
|
||||
else {
|
||||
if (prof_promote && (uintptr_t)cnt !=
|
||||
(uintptr_t)1U && size <=
|
||||
small_maxclass) {
|
||||
ret = imalloc(small_maxclass+1);
|
||||
if (ret != NULL) {
|
||||
arena_prof_promoted(ret,
|
||||
size);
|
||||
}
|
||||
} else
|
||||
ret = imalloc(size);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
ret = imalloc(size);
|
||||
|
@ -25,6 +25,7 @@ bool opt_prof_udump = false;
|
||||
bool opt_prof_leak = false;
|
||||
|
||||
uint64_t prof_interval;
|
||||
bool prof_promote;
|
||||
|
||||
/*
|
||||
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
|
||||
@ -1250,8 +1251,8 @@ prof_boot0(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* opt_prof must be in its final state before any arenas are
|
||||
* initialized, so this function must be executed early.
|
||||
* opt_prof and prof_promote must be in their final state before any
|
||||
* arenas are initialized, so this function must be executed early.
|
||||
*/
|
||||
|
||||
if (opt_lg_prof_sample > 0) {
|
||||
@ -1272,6 +1273,8 @@ prof_boot0(void)
|
||||
prof_interval = 0;
|
||||
} else if (opt_prof)
|
||||
prof_interval = (((uint64_t)1U) << opt_lg_prof_interval);
|
||||
|
||||
prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
|
||||
}
|
||||
|
||||
bool
|
||||
|
Loading…
Reference in New Issue
Block a user