Reduce statistical heap sampling memory overhead.

If the mean heap sampling interval is larger than one page, simulate
sampled small objects with large objects.  This allows profiling context
pointers to be omitted for small objects.  As a result, the memory
overhead for sampling decreases as the sampling interval is increased.

Fix a compilation error in the profiling code.
This commit is contained in:
Jason Evans 2010-03-31 16:45:04 -07:00
parent 169cbc1ef7
commit 0b270a991d
7 changed files with 221 additions and 56 deletions

View File

@ -121,8 +121,10 @@ struct arena_chunk_map_s {
* *
* p : run page offset * p : run page offset
* s : run size * s : run size
* c : size class (used only if prof_promote is true)
* x : don't care * x : don't care
* - : 0 * - : 0
* + : 1
* [DZLA] : bit set * [DZLA] : bit set
* [dzla] : bit unset * [dzla] : bit unset
* *
@ -142,17 +144,27 @@ struct arena_chunk_map_s {
* pppppppp pppppppp pppp---- ----d--a * pppppppp pppppppp pppp---- ----d--a
* *
* Large: * Large:
* ssssssss ssssssss ssss---- ----D-la * ssssssss ssssssss ssss++++ ++++D-la
* xxxxxxxx xxxxxxxx xxxx---- ----xxxx * xxxxxxxx xxxxxxxx xxxx---- ----xxxx
* -------- -------- -------- ----D-la * -------- -------- -------- ----D-la
*
* Large (sampled, size <= PAGE_SIZE):
* ssssssss ssssssss sssscccc ccccD-la
*
* Large (not sampled, size == PAGE_SIZE):
* ssssssss ssssssss ssss++++ ++++D-la
*/ */
size_t bits; size_t bits;
#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1fU) #ifdef JEMALLOC_PROF
#define CHUNK_MAP_KEY ((size_t)0x10U) #define CHUNK_MAP_CLASS_SHIFT 4
#define CHUNK_MAP_DIRTY ((size_t)0x08U) #define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U)
#define CHUNK_MAP_ZEROED ((size_t)0x04U) #endif
#define CHUNK_MAP_LARGE ((size_t)0x02U) #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) #define CHUNK_MAP_DIRTY ((size_t)0x8U)
#define CHUNK_MAP_ZEROED ((size_t)0x4U)
#define CHUNK_MAP_LARGE ((size_t)0x2U)
#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
}; };
typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
@ -421,6 +433,8 @@ void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
size_t alloc_size); size_t alloc_size);
size_t arena_salloc(const void *ptr); size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr); prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt); void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
#endif #endif

View File

@ -515,7 +515,11 @@ isalloc(const void *ptr)
/* Region. */ /* Region. */
assert(chunk->arena->magic == ARENA_MAGIC); assert(chunk->arena->magic == ARENA_MAGIC);
#ifdef JEMALLOC_PROF
ret = arena_salloc_demote(ptr);
#else
ret = arena_salloc(ptr); ret = arena_salloc(ptr);
#endif
} else } else
ret = huge_salloc(ptr); ret = huge_salloc(ptr);

View File

@ -134,6 +134,12 @@ extern bool opt_prof_leak; /* Dump leak summary at exit. */
*/ */
extern uint64_t prof_interval; extern uint64_t prof_interval;
/*
* If true, promote small sampled objects to large objects, since small run
* headers do not have embedded profile context pointers.
*/
extern bool prof_promote;
bool prof_init(prof_t *prof, bool master); bool prof_init(prof_t *prof, bool master);
void prof_destroy(prof_t *prof); void prof_destroy(prof_t *prof);

View File

@ -256,6 +256,12 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
if (ret == NULL) if (ret == NULL)
return (NULL); return (NULL);
} else { } else {
#ifdef JEMALLOC_PROF
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk)
>> PAGE_SHIFT);
chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK;
#endif
if (zero == false) { if (zero == false) {
#ifdef JEMALLOC_FILL #ifdef JEMALLOC_FILL
if (opt_junk) if (opt_junk)
@ -289,6 +295,8 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
size_t pageind, binind; size_t pageind, binind;
arena_chunk_map_t *mapelm; arena_chunk_map_t *mapelm;
assert(arena_salloc(ptr) <= small_maxclass);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena; arena = chunk->arena;
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
@ -334,6 +342,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
arena_chunk_map_t *mapelm; arena_chunk_map_t *mapelm;
assert((size & PAGE_MASK) == 0); assert((size & PAGE_MASK) == 0);
assert(arena_salloc(ptr) > small_maxclass);
assert(arena_salloc(ptr) <= tcache_maxclass);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
arena = chunk->arena; arena = chunk->arena;

View File

@ -218,8 +218,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
size_t a_size = a->bits & ~PAGE_MASK; size_t a_size = a->bits & ~PAGE_MASK;
size_t b_size = b->bits & ~PAGE_MASK; size_t b_size = b->bits & ~PAGE_MASK;
assert(a->bits & CHUNK_MAP_KEY || (a->bits & CHUNK_MAP_DIRTY) == assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
(b->bits & CHUNK_MAP_DIRTY)); CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
ret = (a_size > b_size) - (a_size < b_size); ret = (a_size > b_size) - (a_size < b_size);
if (ret == 0) { if (ret == 0) {
@ -382,6 +382,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE | chunk->map[run_ind+need_pages-1].bits = CHUNK_MAP_LARGE |
CHUNK_MAP_ALLOCATED | flag_dirty; CHUNK_MAP_ALLOCATED | flag_dirty;
chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE | chunk->map[run_ind].bits = size | CHUNK_MAP_LARGE |
#ifdef JEMALLOC_PROF
CHUNK_MAP_CLASS_MASK |
#endif
CHUNK_MAP_ALLOCATED | flag_dirty; CHUNK_MAP_ALLOCATED | flag_dirty;
} else { } else {
assert(zero == false); assert(zero == false);
@ -1210,7 +1213,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
try_nregs--; try_nregs--;
try_hdr_size = sizeof(arena_run_t); try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */ /* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size; try_cnt0_offset = try_hdr_size;
@ -1243,7 +1246,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
try_nregs--; try_nregs--;
try_hdr_size = sizeof(arena_run_t); try_hdr_size = sizeof(arena_run_t);
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */ /* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_hdr_size = QUANTUM_CEILING(try_hdr_size);
try_cnt0_offset = try_hdr_size; try_cnt0_offset = try_hdr_size;
@ -1507,6 +1510,63 @@ arena_salloc(const void *ptr)
} }
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
void
arena_prof_promoted(const void *ptr, size_t size)
{
arena_chunk_t *chunk;
size_t pageind, binind;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
assert(isalloc(ptr) == PAGE_SIZE);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
binind = small_size2bin[size];
assert(binind < nbins);
chunk->map[pageind].bits = (chunk->map[pageind].bits &
~CHUNK_MAP_CLASS_MASK) | (binind << CHUNK_MAP_CLASS_SHIFT);
}
size_t
arena_salloc_demote(const void *ptr)
{
size_t ret;
arena_chunk_t *chunk;
size_t pageind, mapbits;
assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
mapbits = chunk->map[pageind].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT));
assert(run->magic == ARENA_RUN_MAGIC);
assert(((uintptr_t)ptr - ((uintptr_t)run +
(uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size ==
0);
ret = run->bin->reg_size;
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
ret = mapbits & ~PAGE_MASK;
if (prof_promote && ret == PAGE_SIZE && (mapbits &
CHUNK_MAP_CLASS_MASK) != CHUNK_MAP_CLASS_MASK) {
size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
CHUNK_MAP_CLASS_SHIFT);
assert(binind < nbins);
ret = chunk->arena->bins[binind].reg_size;
}
assert(ret != 0);
}
return (ret);
}
static inline unsigned static inline unsigned
arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
size_t size) size_t size)
@ -1585,6 +1645,9 @@ arena_prof_cnt_get(const void *ptr)
mapbits = chunk->map[pageind].bits; mapbits = chunk->map[pageind].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) { if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote)
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
else {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT)); PAGE_SHIFT));
@ -1593,11 +1656,12 @@ arena_prof_cnt_get(const void *ptr)
assert(run->magic == ARENA_RUN_MAGIC); assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size); regind = arena_run_regind(run, bin, ptr, bin->reg_size);
ret = *(prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset + ret = *(prof_thr_cnt_t **)((uintptr_t)run +
(regind * sizeof(prof_thr_cnt_t *))); bin->cnt0_offset + (regind *
} else { sizeof(prof_thr_cnt_t *)));
ret = chunk->map[pageind].prof_cnt;
} }
} else
ret = chunk->map[pageind].prof_cnt;
return (ret); return (ret);
} }
@ -1616,8 +1680,9 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
mapbits = chunk->map[pageind].bits; mapbits = chunk->map[pageind].bits;
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) { if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote == false) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits & >> PAGE_SHIFT)) << (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
PAGE_SHIFT)); PAGE_SHIFT));
arena_bin_t *bin = run->bin; arena_bin_t *bin = run->bin;
unsigned regind; unsigned regind;
@ -1625,11 +1690,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
assert(run->magic == ARENA_RUN_MAGIC); assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size); regind = arena_run_regind(run, bin, ptr, bin->reg_size);
*((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset + *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset
(regind * sizeof(prof_thr_cnt_t *)))) = cnt; + (regind * sizeof(prof_thr_cnt_t *)))) = cnt;
} else { } else
assert((uintptr_t)cnt == (uintptr_t)1U);
} else
chunk->map[pageind].prof_cnt = cnt; chunk->map[pageind].prof_cnt = cnt;
}
} }
#endif #endif
@ -2330,7 +2396,22 @@ arena_boot(void)
* 4KiB pages), and such configurations are impractical, but * 4KiB pages), and such configurations are impractical, but
* nonetheless we need to protect against this case in order to avoid * nonetheless we need to protect against this case in order to avoid
* undefined behavior. * undefined behavior.
*
* Further constrain nbins to 255 if prof_promote is true, since all
* small size classes, plus a "not small" size class must be stored in
* 8 bits of arena_chunk_map_t's bits field.
*/ */
#ifdef JEMALLOC_PROF
if (opt_prof && prof_promote) {
if (nbins > 255) {
char line_buf[UMAX2S_BUFSIZE];
malloc_write("<jemalloc>: Too many small size classes (");
malloc_write(umax2s(nbins, 10, line_buf));
malloc_write(" > max 255)\n");
abort();
}
} else
#endif
if (nbins > 256) { if (nbins > 256) {
char line_buf[UMAX2S_BUFSIZE]; char line_buf[UMAX2S_BUFSIZE];
malloc_write("<jemalloc>: Too many small size classes ("); malloc_write("<jemalloc>: Too many small size classes (");

View File

@ -835,12 +835,20 @@ JEMALLOC_P(malloc)(size_t size)
} }
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL; ret = NULL;
goto OOM; goto OOM;
} }
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL)
arena_prof_promoted(ret, size);
} else
ret = imalloc(size);
} else
#endif #endif
ret = imalloc(size); ret = imalloc(size);
OOM: OOM:
@ -918,12 +926,24 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
} }
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) {
result = NULL; result = NULL;
ret = EINVAL; ret = EINVAL;
} else {
if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && size <= small_maxclass) {
result = ipalloc(alignment,
small_maxclass+1);
if (result != NULL) {
arena_prof_promoted(result,
size);
}
} else
result = ipalloc(alignment, size);
}
} else } else
#endif #endif
result = ipalloc(alignment, size); result = ipalloc(alignment, size);
} }
@ -992,12 +1012,20 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
} }
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep(num_size)) == NULL) { if (opt_prof) {
if ((cnt = prof_alloc_prep(num_size)) == NULL) {
ret = NULL; ret = NULL;
goto RETURN; goto RETURN;
} }
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size
<= small_maxclass) {
ret = icalloc(small_maxclass+1);
if (ret != NULL)
arena_prof_promoted(ret, num_size);
} else
ret = icalloc(num_size);
} else
#endif #endif
ret = icalloc(num_size); ret = icalloc(num_size);
RETURN: RETURN:
@ -1071,9 +1099,15 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
ret = NULL; ret = NULL;
goto OOM; goto OOM;
} }
} if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
size <= small_maxclass) {
ret = iralloc(ptr, small_maxclass+1);
if (ret != NULL)
arena_prof_promoted(ret, size);
} else
ret = iralloc(ptr, size);
} else
#endif #endif
ret = iralloc(ptr, size); ret = iralloc(ptr, size);
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
@ -1104,8 +1138,21 @@ OOM:
ret = NULL; ret = NULL;
} else { } else {
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL)
ret = NULL; ret = NULL;
else {
if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && size <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL) {
arena_prof_promoted(ret,
size);
}
} else
ret = imalloc(size);
}
} else } else
#endif #endif
ret = imalloc(size); ret = imalloc(size);

View File

@ -25,6 +25,7 @@ bool opt_prof_udump = false;
bool opt_prof_leak = false; bool opt_prof_leak = false;
uint64_t prof_interval; uint64_t prof_interval;
bool prof_promote;
/* /*
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
@ -1250,8 +1251,8 @@ prof_boot0(void)
{ {
/* /*
* opt_prof must be in its final state before any arenas are * opt_prof and prof_promote must be in their final state before any
* initialized, so this function must be executed early. * arenas are initialized, so this function must be executed early.
*/ */
if (opt_lg_prof_sample > 0) { if (opt_lg_prof_sample > 0) {
@ -1272,6 +1273,8 @@ prof_boot0(void)
prof_interval = 0; prof_interval = 0;
} else if (opt_prof) } else if (opt_prof)
prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); prof_interval = (((uint64_t)1U) << opt_lg_prof_interval);
prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
} }
bool bool