Add per thread allocation counters, and enhance heap sampling.

Add the "thread.allocated" and "thread.deallocated" mallctls, which can
be used to query the total number of bytes ever allocated/deallocated by
the calling thread.

Add s2u() and sa2u(), which can be used to compute the usable size that
will result from an allocation request of a particular size/alignment.

Re-factor ipalloc() to use sa2u().

Enhance the heap profiler to trigger samples based on usable size,
rather than request size.  This has a subtle, but important, impact on
the accuracy of heap sampling.  For example, previous to this change,
16- and 17-byte objects were sampled at nearly the same rate, but
17-byte objects actually consume 32 bytes each.  Therefore it was
possible for the sample to be somewhat skewed compared to actual memory
usage of the allocated objects.
This commit is contained in:
Jason Evans 2010-10-20 17:39:18 -07:00
parent 21fb95bba6
commit 93443689a4
10 changed files with 563 additions and 155 deletions

View File

@ -58,7 +58,8 @@ DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
@objroot@lib/libjemalloc@install_suffix@.$(SO) \ @objroot@lib/libjemalloc@install_suffix@.$(SO) \
@objroot@lib/libjemalloc@install_suffix@_pic.a @objroot@lib/libjemalloc@install_suffix@_pic.a
MAN3 := @objroot@doc/jemalloc@install_suffix@.3 MAN3 := @objroot@doc/jemalloc@install_suffix@.3
CTESTS := @srcroot@test/allocm.c @srcroot@test/posix_memalign.c \ CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
@srcroot@test/posix_memalign.c \
@srcroot@test/rallocm.c @srcroot@test/thread_arena.c @srcroot@test/rallocm.c @srcroot@test/thread_arena.c
.PHONY: all dist install check clean distclean relclean .PHONY: all dist install check clean distclean relclean

View File

@ -888,6 +888,21 @@ mallctl), it will be automatically initialized as a side effect of calling this
interface. interface.
.Ed .Ed
.\"----------------------------------------------------------------------------- .\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "thread.allocated (uint64_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
@roff_stats@Get the total number of bytes ever allocated by the calling thread.
@roff_stats@This counter has the potential to wrap around; it is up to the
@roff_stats@application to appropriately interpret the counter in such cases.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "thread.deallocated (uint64_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
@roff_stats@Get the total number of bytes ever deallocated by the calling
@roff_stats@thread.
@roff_stats@This counter has the potential to wrap around; it is up to the
@roff_stats@application to appropriately interpret the counter in such cases.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
.It Sy "config.debug (bool) r-" .It Sy "config.debug (bool) r-"
.Bd -ragged -offset indent -compact .Bd -ragged -offset indent -compact
--enable-debug was specified during build configuration. --enable-debug was specified during build configuration.

View File

@ -291,6 +291,50 @@ extern pthread_key_t arenas_tsd;
extern arena_t **arenas; extern arena_t **arenas;
extern unsigned narenas; extern unsigned narenas;
#ifdef JEMALLOC_STATS
typedef struct {
uint64_t allocated;
uint64_t deallocated;
} thread_allocated_t;
# ifndef NO_TLS
extern __thread thread_allocated_t thread_allocated_tls;
# define ALLOCATED_GET() thread_allocated_tls.allocated
# define DEALLOCATED_GET() thread_allocated_tls.deallocated
# define ALLOCATED_ADD(a, d) do { \
thread_allocated_tls.allocated += a; \
thread_allocated_tls.deallocated += d; \
} while (0)
# else
extern pthread_key_t thread_allocated_tsd;
# define ALLOCATED_GET() \
(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
? ((thread_allocated_t *) \
pthread_getspecific(thread_allocated_tsd))->allocated : 0)
# define DEALLOCATED_GET() \
(uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
? ((thread_allocated_t \
*)pthread_getspecific(thread_allocated_tsd))->deallocated : \
0)
# define ALLOCATED_ADD(a, d) do { \
thread_allocated_t *thread_allocated = (thread_allocated_t *) \
pthread_getspecific(thread_allocated_tsd); \
if (thread_allocated != NULL) { \
thread_allocated->allocated += (a); \
thread_allocated->deallocated += (d); \
} else { \
thread_allocated = (thread_allocated_t *) \
imalloc(sizeof(thread_allocated_t)); \
if (thread_allocated != NULL) { \
pthread_setspecific(thread_allocated_tsd, \
thread_allocated); \
thread_allocated->allocated = (a); \
thread_allocated->deallocated = (d); \
} \
} \
} while (0)
# endif
#endif
arena_t *arenas_extend(unsigned ind); arena_t *arenas_extend(unsigned ind);
arena_t *choose_arena_hard(void); arena_t *choose_arena_hard(void);
int buferror(int errnum, char *buf, size_t buflen); int buferror(int errnum, char *buf, size_t buflen);
@ -333,6 +377,8 @@ void jemalloc_postfork(void);
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
size_t pow2_ceil(size_t x); size_t pow2_ceil(size_t x);
size_t s2u(size_t size);
size_t sa2u(size_t size, size_t alignment, size_t *run_size_p);
void malloc_write(const char *s); void malloc_write(const char *s);
arena_t *choose_arena(void); arena_t *choose_arena(void);
#endif #endif
@ -356,6 +402,117 @@ pow2_ceil(size_t x)
return (x); return (x);
} }
/*
* Compute usable size that would result from allocating an object with the
* specified size.
*/
JEMALLOC_INLINE size_t
s2u(size_t size)
{
if (size <= small_maxclass)
return arenas[0]->bins[small_size2bin[size]].reg_size;
if (size <= arena_maxclass)
return PAGE_CEILING(size);
return CHUNK_CEILING(size);
}
/*
* Compute usable size that would result from allocating an object with the
* specified size and alignment.
*/
JEMALLOC_INLINE size_t
sa2u(size_t size, size_t alignment, size_t *run_size_p)
{
size_t usize;
/*
* Round size up to the nearest multiple of alignment.
*
* This done, we can take advantage of the fact that for each small
* size class, every object is aligned at the smallest power of two
* that is non-zero in the base two representation of the size. For
* example:
*
* Size | Base 2 | Minimum alignment
* -----+----------+------------------
* 96 | 1100000 | 32
* 144 | 10100000 | 32
* 192 | 11000000 | 64
*
* Depending on runtime settings, it is possible that arena_malloc()
* will further round up to a power of two, but that never causes
* correctness issues.
*/
usize = (size + (alignment - 1)) & (-alignment);
/*
* (usize < size) protects against the combination of maximal
* alignment and size greater than maximal alignment.
*/
if (usize < size) {
/* size_t overflow. */
return (0);
}
if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
if (usize <= small_maxclass) {
return
(arenas[0]->bins[small_size2bin[usize]].reg_size);
}
return (PAGE_CEILING(usize));
} else {
size_t run_size;
/*
* We can't achieve subpage alignment, so round up alignment
* permanently; it makes later calculations simpler.
*/
alignment = PAGE_CEILING(alignment);
usize = PAGE_CEILING(size);
/*
* (usize < size) protects against very large sizes within
* PAGE_SIZE of SIZE_T_MAX.
*
* (usize + alignment < usize) protects against the
* combination of maximal alignment and usize large enough
* to cause overflow. This is similar to the first overflow
* check above, but it needs to be repeated due to the new
* usize value, which may now be *equal* to maximal
* alignment, whereas before we only detected overflow if the
* original size was *greater* than maximal alignment.
*/
if (usize < size || usize + alignment < usize) {
/* size_t overflow. */
return (0);
}
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
*/
if (usize >= alignment)
run_size = usize + alignment - PAGE_SIZE;
else {
/*
* It is possible that (alignment << 1) will cause
* overflow, but it doesn't matter because we also
* subtract PAGE_SIZE, which in the case of overflow
* leaves us with a very large run_size. That causes
* the first conditional below to fail, which means
* that the bogus run_size value never gets used for
* anything important.
*/
run_size = (alignment << 1) - PAGE_SIZE;
}
if (run_size_p != NULL)
*run_size_p = run_size;
if (run_size <= arena_maxclass)
return (PAGE_CEILING(usize));
return (CHUNK_CEILING(usize));
}
}
/* /*
* Wrapper around malloc_message() that avoids the need for * Wrapper around malloc_message() that avoids the need for
* JEMALLOC_P(malloc_message)(...) throughout the code. * JEMALLOC_P(malloc_message)(...) throughout the code.
@ -435,92 +592,25 @@ JEMALLOC_INLINE void *
ipalloc(size_t size, size_t alignment, bool zero) ipalloc(size_t size, size_t alignment, bool zero)
{ {
void *ret; void *ret;
size_t ceil_size; size_t usize;
size_t run_size
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
/* usize = sa2u(size, alignment, &run_size);
* Round size up to the nearest multiple of alignment. if (usize == 0)
*
* This done, we can take advantage of the fact that for each small
* size class, every object is aligned at the smallest power of two
* that is non-zero in the base two representation of the size. For
* example:
*
* Size | Base 2 | Minimum alignment
* -----+----------+------------------
* 96 | 1100000 | 32
* 144 | 10100000 | 32
* 192 | 11000000 | 64
*
* Depending on runtime settings, it is possible that arena_malloc()
* will further round up to a power of two, but that never causes
* correctness issues.
*/
ceil_size = (size + (alignment - 1)) & (-alignment);
/*
* (ceil_size < size) protects against the combination of maximal
* alignment and size greater than maximal alignment.
*/
if (ceil_size < size) {
/* size_t overflow. */
return (NULL); return (NULL);
} if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
ret = arena_malloc(usize, zero);
if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE else if (run_size <= arena_maxclass) {
&& ceil_size <= arena_maxclass)) ret = arena_palloc(choose_arena(), usize, run_size, alignment,
ret = arena_malloc(ceil_size, zero); zero);
else { } else if (alignment <= chunksize)
size_t run_size; ret = huge_malloc(usize, zero);
else
/* ret = huge_palloc(usize, alignment, zero);
* We can't achieve subpage alignment, so round up alignment
* permanently; it makes later calculations simpler.
*/
alignment = PAGE_CEILING(alignment);
ceil_size = PAGE_CEILING(size);
/*
* (ceil_size < size) protects against very large sizes within
* PAGE_SIZE of SIZE_T_MAX.
*
* (ceil_size + alignment < ceil_size) protects against the
* combination of maximal alignment and ceil_size large enough
* to cause overflow. This is similar to the first overflow
* check above, but it needs to be repeated due to the new
* ceil_size value, which may now be *equal* to maximal
* alignment, whereas before we only detected overflow if the
* original size was *greater* than maximal alignment.
*/
if (ceil_size < size || ceil_size + alignment < ceil_size) {
/* size_t overflow. */
return (NULL);
}
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
*/
if (ceil_size >= alignment)
run_size = ceil_size + alignment - PAGE_SIZE;
else {
/*
* It is possible that (alignment << 1) will cause
* overflow, but it doesn't matter because we also
* subtract PAGE_SIZE, which in the case of overflow
* leaves us with a very large run_size. That causes
* the first conditional below to fail, which means
* that the bogus run_size value never gets used for
* anything important.
*/
run_size = (alignment << 1) - PAGE_SIZE;
}
if (run_size <= arena_maxclass) {
ret = arena_palloc(choose_arena(), ceil_size, run_size,
alignment, zero);
} else if (alignment <= chunksize)
ret = huge_malloc(ceil_size, zero);
else
ret = huge_palloc(ceil_size, alignment, zero);
}
assert(((uintptr_t)ret & (alignment - 1)) == 0); assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret); return (ret);

View File

@ -179,9 +179,9 @@ extern bool prof_promote;
prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_ctx_t *prof_ctx_get(const void *ptr); prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
size_t old_size, prof_ctx_t *old_ctx); const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr); void prof_free(const void *ptr);
void prof_idump(void); void prof_idump(void);
bool prof_mdump(const char *filename); bool prof_mdump(const char *filename);

View File

@ -1613,7 +1613,8 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
arena_chunk_t *chunk; arena_chunk_t *chunk;
assert((size & PAGE_MASK) == 0); assert((size & PAGE_MASK) == 0);
assert((alignment & PAGE_MASK) == 0);
alignment = PAGE_CEILING(alignment);
malloc_mutex_lock(&arena->lock); malloc_mutex_lock(&arena->lock);
ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); ret = (void *)arena_run_alloc(arena, alloc_size, true, zero);

View File

@ -42,6 +42,10 @@ CTL_PROTO(epoch)
CTL_PROTO(tcache_flush) CTL_PROTO(tcache_flush)
#endif #endif
CTL_PROTO(thread_arena) CTL_PROTO(thread_arena)
#ifdef JEMALLOC_STATS
CTL_PROTO(thread_allocated)
CTL_PROTO(thread_deallocated)
#endif
CTL_PROTO(config_debug) CTL_PROTO(config_debug)
CTL_PROTO(config_dss) CTL_PROTO(config_dss)
CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_dynamic_page_shift)
@ -216,6 +220,11 @@ static const ctl_node_t tcache_node[] = {
static const ctl_node_t thread_node[] = { static const ctl_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)} {NAME("arena"), CTL(thread_arena)}
#ifdef JEMALLOC_STATS
,
{NAME("allocated"), CTL(thread_allocated)},
{NAME("deallocated"), CTL(thread_deallocated)}
#endif
}; };
static const ctl_node_t config_node[] = { static const ctl_node_t config_node[] = {
@ -1092,6 +1101,11 @@ RETURN:
return (ret); return (ret);
} }
#ifdef JEMALLOC_STATS
CTL_RO_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
CTL_RO_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
#endif
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_DEBUG #ifdef JEMALLOC_DEBUG

View File

@ -15,14 +15,22 @@ __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
pthread_key_t arenas_tsd; pthread_key_t arenas_tsd;
#endif #endif
#ifdef JEMALLOC_STATS
# ifndef NO_TLS
__thread thread_allocated_t thread_allocated_tls;
# else
pthread_key_t thread_allocated_tsd;
# endif
#endif
/* Set to true once the allocator has been initialized. */ /* Set to true once the allocator has been initialized. */
static bool malloc_initialized = false; static bool malloc_initialized = false;
/* Used to let the initializing thread recursively allocate. */ /* Used to let the initializing thread recursively allocate. */
static pthread_t malloc_initializer = (unsigned long)0; static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */ /* Used to avoid initialization races. */
static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#ifdef DYNAMIC_PAGE_SHIFT #ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize; size_t pagesize;
@ -63,6 +71,9 @@ static int opt_narenas_lshift = 0;
static void wrtmessage(void *cbopaque, const char *s); static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void); static void stats_print_atexit(void);
static unsigned malloc_ncpus(void); static unsigned malloc_ncpus(void);
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void thread_allocated_cleanup(void *arg);
#endif
static bool malloc_init_hard(void); static bool malloc_init_hard(void);
/******************************************************************************/ /******************************************************************************/
@ -222,6 +233,17 @@ malloc_ncpus(void)
return (ret); return (ret);
} }
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
static void
thread_allocated_cleanup(void *arg)
{
uint64_t *allocated = (uint64_t *)arg;
if (allocated != NULL)
idalloc(allocated);
}
#endif
/* /*
* FreeBSD's pthreads implementation calls malloc(3), so the malloc * FreeBSD's pthreads implementation calls malloc(3), so the malloc
* implementation has to take pains to avoid infinite recursion during * implementation has to take pains to avoid infinite recursion during
@ -633,6 +655,15 @@ MALLOC_OUT:
return (true); return (true);
} }
#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
/* Initialize allocation counters before any allocations can occur. */
if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup)
!= 0) {
malloc_mutex_unlock(&init_lock);
return (true);
}
#endif
/* /*
* Create enough scaffolding to allow recursive allocation in * Create enough scaffolding to allow recursive allocation in
* malloc_ncpus(). * malloc_ncpus().
@ -766,6 +797,13 @@ void *
JEMALLOC_P(malloc)(size_t size) JEMALLOC_P(malloc)(size_t size)
{ {
void *ret; void *ret;
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
@ -801,20 +839,26 @@ JEMALLOC_P(malloc)(size_t size)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) { usize = s2u(size);
if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL; ret = NULL;
goto OOM; goto OOM;
} }
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) { small_maxclass) {
ret = imalloc(small_maxclass+1); ret = imalloc(small_maxclass+1);
if (ret != NULL) if (ret != NULL)
arena_prof_promoted(ret, size); arena_prof_promoted(ret, usize);
} else } else
ret = imalloc(size); ret = imalloc(size);
} else } else
#endif
{
#ifdef JEMALLOC_STATS
usize = s2u(size);
#endif #endif
ret = imalloc(size); ret = imalloc(size);
}
OOM: OOM:
if (ret == NULL) { if (ret == NULL) {
@ -833,7 +877,13 @@ RETURN:
#endif #endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL) if (opt_prof && ret != NULL)
prof_malloc(ret, cnt); prof_malloc(ret, usize, cnt);
#endif
#ifdef JEMALLOC_STATS
if (ret != NULL) {
assert(usize == isalloc(ret));
ALLOCATED_ADD(usize, 0);
}
#endif #endif
return (ret); return (ret);
} }
@ -845,6 +895,13 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
{ {
int ret; int ret;
void *result; void *result;
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
@ -896,17 +953,18 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) { usize = sa2u(size, alignment, NULL);
if ((cnt = prof_alloc_prep(usize)) == NULL) {
result = NULL; result = NULL;
ret = EINVAL; ret = EINVAL;
} else { } else {
if (prof_promote && (uintptr_t)cnt != if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && size <= small_maxclass) { (uintptr_t)1U && usize <= small_maxclass) {
result = ipalloc(small_maxclass+1, result = ipalloc(small_maxclass+1,
alignment, false); alignment, false);
if (result != NULL) { if (result != NULL) {
arena_prof_promoted(result, arena_prof_promoted(result,
size); usize);
} }
} else { } else {
result = ipalloc(size, alignment, result = ipalloc(size, alignment,
@ -914,8 +972,13 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
} }
} }
} else } else
#endif
{
#ifdef JEMALLOC_STATS
usize = sa2u(size, alignment, NULL);
#endif #endif
result = ipalloc(size, alignment, false); result = ipalloc(size, alignment, false);
}
} }
if (result == NULL) { if (result == NULL) {
@ -934,9 +997,15 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
ret = 0; ret = 0;
RETURN: RETURN:
#ifdef JEMALLOC_STATS
if (result != NULL) {
assert(usize == isalloc(result));
ALLOCATED_ADD(usize, 0);
}
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && result != NULL) if (opt_prof && result != NULL)
prof_malloc(result, cnt); prof_malloc(result, usize, cnt);
#endif #endif
return (ret); return (ret);
} }
@ -948,6 +1017,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
{ {
void *ret; void *ret;
size_t num_size; size_t num_size;
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t usize
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
@ -988,20 +1064,26 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
if ((cnt = prof_alloc_prep(num_size)) == NULL) { usize = s2u(num_size);
if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL; ret = NULL;
goto RETURN; goto RETURN;
} }
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize
<= small_maxclass) { <= small_maxclass) {
ret = icalloc(small_maxclass+1); ret = icalloc(small_maxclass+1);
if (ret != NULL) if (ret != NULL)
arena_prof_promoted(ret, num_size); arena_prof_promoted(ret, usize);
} else } else
ret = icalloc(num_size); ret = icalloc(num_size);
} else } else
#endif
{
#ifdef JEMALLOC_STATS
usize = s2u(num_size);
#endif #endif
ret = icalloc(num_size); ret = icalloc(num_size);
}
RETURN: RETURN:
if (ret == NULL) { if (ret == NULL) {
@ -1017,7 +1099,13 @@ RETURN:
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL) if (opt_prof && ret != NULL)
prof_malloc(ret, cnt); prof_malloc(ret, usize, cnt);
#endif
#ifdef JEMALLOC_STATS
if (ret != NULL) {
assert(usize == isalloc(ret));
ALLOCATED_ADD(usize, 0);
}
#endif #endif
return (ret); return (ret);
} }
@ -1027,12 +1115,15 @@ void *
JEMALLOC_P(realloc)(void *ptr, size_t size) JEMALLOC_P(realloc)(void *ptr, size_t size)
{ {
void *ret; void *ret;
#ifdef JEMALLOC_PROF #if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t old_size size_t usize
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
= 0 = 0
# endif # endif
; ;
size_t old_size = 0;
#endif
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE # ifdef JEMALLOC_CC_SILENCE
= NULL = NULL
@ -1053,9 +1144,11 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_SYSV #ifdef JEMALLOC_SYSV
else { else {
if (ptr != NULL) { if (ptr != NULL) {
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
old_size = isalloc(ptr);
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
old_size = isalloc(ptr);
old_ctx = prof_ctx_get(ptr); old_ctx = prof_ctx_get(ptr);
cnt = NULL; cnt = NULL;
} }
@ -1064,7 +1157,6 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
} }
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
else if (opt_prof) { else if (opt_prof) {
old_size = 0;
old_ctx = NULL; old_ctx = NULL;
cnt = NULL; cnt = NULL;
} }
@ -1079,25 +1171,33 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
assert(malloc_initialized || malloc_initializer == assert(malloc_initialized || malloc_initializer ==
pthread_self()); pthread_self());
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
old_size = isalloc(ptr);
#endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
old_size = isalloc(ptr); usize = s2u(size);
old_ctx = prof_ctx_get(ptr); old_ctx = prof_ctx_get(ptr);
if ((cnt = prof_alloc_prep(size)) == NULL) { if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL; ret = NULL;
goto OOM; goto OOM;
} }
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
size <= small_maxclass) { usize <= small_maxclass) {
ret = iralloc(ptr, small_maxclass+1, 0, 0, ret = iralloc(ptr, small_maxclass+1, 0, 0,
false, false); false, false);
if (ret != NULL) if (ret != NULL)
arena_prof_promoted(ret, size); arena_prof_promoted(ret, usize);
} else } else
ret = iralloc(ptr, size, 0, 0, false, false); ret = iralloc(ptr, size, 0, 0, false, false);
} else } else
#endif
{
#ifdef JEMALLOC_STATS
usize = s2u(size);
#endif #endif
ret = iralloc(ptr, size, 0, 0, false, false); ret = iralloc(ptr, size, 0, 0, false, false);
}
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
OOM: OOM:
@ -1114,10 +1214,8 @@ OOM:
} }
} else { } else {
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof)
old_size = 0;
old_ctx = NULL; old_ctx = NULL;
}
#endif #endif
if (malloc_init()) { if (malloc_init()) {
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
@ -1128,23 +1226,29 @@ OOM:
} else { } else {
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) usize = s2u(size);
if ((cnt = prof_alloc_prep(usize)) == NULL)
ret = NULL; ret = NULL;
else { else {
if (prof_promote && (uintptr_t)cnt != if (prof_promote && (uintptr_t)cnt !=
(uintptr_t)1U && size <= (uintptr_t)1U && usize <=
small_maxclass) { small_maxclass) {
ret = imalloc(small_maxclass+1); ret = imalloc(small_maxclass+1);
if (ret != NULL) { if (ret != NULL) {
arena_prof_promoted(ret, arena_prof_promoted(ret,
size); usize);
} }
} else } else
ret = imalloc(size); ret = imalloc(size);
} }
} else } else
#endif
{
#ifdef JEMALLOC_STATS
usize = s2u(size);
#endif #endif
ret = imalloc(size); ret = imalloc(size);
}
} }
if (ret == NULL) { if (ret == NULL) {
@ -1164,7 +1268,13 @@ RETURN:
#endif #endif
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) if (opt_prof)
prof_realloc(ret, cnt, ptr, old_size, old_ctx); prof_realloc(ret, usize, cnt, ptr, old_size, old_ctx);
#endif
#ifdef JEMALLOC_STATS
if (ret != NULL) {
assert(usize == isalloc(ret));
ALLOCATED_ADD(usize, old_size);
}
#endif #endif
return (ret); return (ret);
} }
@ -1181,6 +1291,9 @@ JEMALLOC_P(free)(void *ptr)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) if (opt_prof)
prof_free(ptr); prof_free(ptr);
#endif
#ifdef JEMALLOC_STATS
ALLOCATED_ADD(0, isalloc(ptr));
#endif #endif
idalloc(ptr); idalloc(ptr);
} }
@ -1325,6 +1438,7 @@ int
JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
{ {
void *p; void *p;
size_t usize;
size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
& (SIZE_T_MAX-1)); & (SIZE_T_MAX-1));
bool zero = flags & ALLOCM_ZERO; bool zero = flags & ALLOCM_ZERO;
@ -1340,30 +1454,48 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
if ((cnt = prof_alloc_prep(size)) == NULL) usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
NULL);
if ((cnt = prof_alloc_prep(usize)) == NULL)
goto OOM; goto OOM;
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) { small_maxclass) {
p = iallocm(small_maxclass+1, alignment, zero); p = iallocm(small_maxclass+1, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
arena_prof_promoted(p, size); arena_prof_promoted(p, usize);
} else { } else {
p = iallocm(size, alignment, zero); p = iallocm(size, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
} }
if (rsize != NULL)
*rsize = usize;
} else } else
#endif #endif
{ {
p = iallocm(size, alignment, zero); p = iallocm(size, alignment, zero);
if (p == NULL) if (p == NULL)
goto OOM; goto OOM;
#ifndef JEMALLOC_STATS
if (rsize != NULL)
#endif
{
usize = (alignment == 0) ? s2u(size) : sa2u(size,
alignment, NULL);
#ifdef JEMALLOC_STATS
if (rsize != NULL)
#endif
*rsize = usize;
}
} }
*ptr = p; *ptr = p;
if (rsize != NULL) #ifdef JEMALLOC_STATS
*rsize = isalloc(p); assert(usize == isalloc(p));
ALLOCATED_ADD(usize, 0);
#endif
return (ALLOCM_SUCCESS); return (ALLOCM_SUCCESS);
OOM: OOM:
#ifdef JEMALLOC_XMALLOC #ifdef JEMALLOC_XMALLOC
@ -1384,12 +1516,15 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
int flags) int flags)
{ {
void *p, *q; void *p, *q;
size_t usize;
#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
size_t old_size;
#endif
size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
& (SIZE_T_MAX-1)); & (SIZE_T_MAX-1));
bool zero = flags & ALLOCM_ZERO; bool zero = flags & ALLOCM_ZERO;
bool no_move = flags & ALLOCM_NO_MOVE; bool no_move = flags & ALLOCM_NO_MOVE;
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
size_t old_size;
prof_thr_cnt_t *cnt; prof_thr_cnt_t *cnt;
prof_ctx_t *old_ctx; prof_ctx_t *old_ctx;
#endif #endif
@ -1403,36 +1538,60 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
p = *ptr; p = *ptr;
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) { if (opt_prof) {
/*
* usize isn't knowable before iralloc() returns when extra is
* non-zero. Therefore, compute its maximum possible value and
* use that in prof_alloc_prep() to decide whether to capture a
* backtrace. prof_realloc() will use the actual usize to
* decide whether to sample.
*/
size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment, NULL);
old_size = isalloc(p); old_size = isalloc(p);
old_ctx = prof_ctx_get(p); old_ctx = prof_ctx_get(p);
if ((cnt = prof_alloc_prep(size)) == NULL) if ((cnt = prof_alloc_prep(max_usize)) == NULL)
goto OOM; goto OOM;
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
small_maxclass) { <= small_maxclass) {
q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
size+extra) ? 0 : size+extra - (small_maxclass+1), size+extra) ? 0 : size+extra - (small_maxclass+1),
alignment, zero, no_move); alignment, zero, no_move);
if (q == NULL) if (q == NULL)
goto ERR; goto ERR;
arena_prof_promoted(q, size); usize = isalloc(q);
arena_prof_promoted(q, usize);
} else { } else {
q = iralloc(p, size, extra, alignment, zero, no_move); q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL) if (q == NULL)
goto ERR; goto ERR;
usize = isalloc(q);
} }
prof_realloc(q, cnt, p, old_size, old_ctx); prof_realloc(q, usize, cnt, p, old_size, old_ctx);
} else } else
#endif #endif
{ {
#ifdef JEMALLOC_STATS
old_size = isalloc(p);
#endif
q = iralloc(p, size, extra, alignment, zero, no_move); q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL) if (q == NULL)
goto ERR; goto ERR;
#ifndef JEMALLOC_STATS
if (rsize != NULL)
#endif
{
usize = isalloc(q);
#ifdef JEMALLOC_STATS
if (rsize != NULL)
#endif
*rsize = usize;
}
} }
*ptr = q; *ptr = q;
if (rsize != NULL) #ifdef JEMALLOC_STATS
*rsize = isalloc(q); ALLOCATED_ADD(usize, old_size);
#endif
return (ALLOCM_SUCCESS); return (ALLOCM_SUCCESS);
ERR: ERR:
if (no_move) if (no_move)
@ -1483,6 +1642,9 @@ JEMALLOC_P(dallocm)(void *ptr, int flags)
#ifdef JEMALLOC_PROF #ifdef JEMALLOC_PROF
if (opt_prof) if (opt_prof)
prof_free(ptr); prof_free(ptr);
#endif
#ifdef JEMALLOC_STATS
ALLOCATED_ADD(0, isalloc(ptr));
#endif #endif
idalloc(ptr); idalloc(ptr);

View File

@ -47,7 +47,8 @@ static __thread prof_tcache_t *prof_tcache_tls
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \ pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
} while (0) } while (0)
#else #else
# define PROF_TCACHE_GET() ((ckh_t *)pthread_getspecific(prof_tcache_tsd)) # define PROF_TCACHE_GET() \
((prof_tcache_t *)pthread_getspecific(prof_tcache_tsd))
# define PROF_TCACHE_SET(v) do { \ # define PROF_TCACHE_SET(v) do { \
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \ pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
} while (0) } while (0)
@ -69,7 +70,7 @@ static __thread void **vec_tls
pthread_setspecific(vec_tsd, (void *)(v)); \ pthread_setspecific(vec_tsd, (void *)(v)); \
} while (0) } while (0)
#else #else
# define VEC_GET() ((ckh_t *)pthread_getspecific(vec_tsd)) # define VEC_GET() ((void **)pthread_getspecific(vec_tsd))
# define VEC_SET(v) do { \ # define VEC_SET(v) do { \
pthread_setspecific(vec_tsd, (void *)(v)); \ pthread_setspecific(vec_tsd, (void *)(v)); \
} while (0) } while (0)
@ -106,7 +107,8 @@ prof_sample_state_t prof_sample_state_oom;
r = (prof_sample_state_t *)pthread_getspecific( \ r = (prof_sample_state_t *)pthread_getspecific( \
prof_sample_state_tsd); \ prof_sample_state_tsd); \
if (r == NULL) { \ if (r == NULL) { \
r = ipalloc(sizeof(prof_sample_state_t), CACHELINE); \ r = ipalloc(sizeof(prof_sample_state_t), CACHELINE, \
false); \
if (r == NULL) { \ if (r == NULL) { \
malloc_write("<jemalloc>: Error in heap " \ malloc_write("<jemalloc>: Error in heap " \
"profiler: out of memory; subsequent heap " \ "profiler: out of memory; subsequent heap " \
@ -658,6 +660,8 @@ prof_alloc_prep(size_t size)
void **vec; void **vec;
prof_bt_t bt; prof_bt_t bt;
assert(size == s2u(size));
vec = VEC_GET(); vec = VEC_GET();
if (vec == NULL) { if (vec == NULL) {
vec = imalloc(sizeof(void *) * prof_bt_max); vec = imalloc(sizeof(void *) * prof_bt_max);
@ -750,7 +754,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
huge_prof_ctx_set(ptr, ctx); huge_prof_ctx_set(ptr, ctx);
} }
static inline void static inline bool
prof_sample_accum_update(size_t size) prof_sample_accum_update(size_t size)
{ {
prof_sample_state_t *prof_sample_state; prof_sample_state_t *prof_sample_state;
@ -771,22 +775,33 @@ prof_sample_accum_update(size_t size)
prof_sample_state->threshold; prof_sample_state->threshold;
prof_sample_threshold_update(); prof_sample_threshold_update();
} }
} else return (false);
} else {
prof_sample_state->accum += size; prof_sample_state->accum += size;
return (true);
}
} }
void void
prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
{ {
size_t size;
assert(ptr != NULL); assert(ptr != NULL);
assert(size == s2u(size));
if (opt_lg_prof_sample != 0) { if (opt_lg_prof_sample != 0) {
size = isalloc(ptr); if (prof_sample_accum_update(size)) {
prof_sample_accum_update(size); /*
} else if ((uintptr_t)cnt > (uintptr_t)1U) * Don't sample. For malloc()-like allocation, it is
size = isalloc(ptr); * always possible to tell in advance how large an
* object's usable size will be, so there should never
* be a difference between the size passed to
* prof_alloc_prep() and prof_malloc().
*/
assert(false);
return;
}
}
if ((uintptr_t)cnt > (uintptr_t)1U) { if ((uintptr_t)cnt > (uintptr_t)1U) {
prof_ctx_set(ptr, cnt->ctx); prof_ctx_set(ptr, cnt->ctx);
@ -813,24 +828,27 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
} }
void void
prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
size_t old_size, prof_ctx_t *old_ctx) const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx)
{ {
size_t size
#ifdef JEMALLOC_CC_SILENCE
= 0
#endif
;
prof_thr_cnt_t *told_cnt; prof_thr_cnt_t *told_cnt;
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
if (ptr != NULL) { if (ptr != NULL) {
if (opt_lg_prof_sample != 0) { if (opt_lg_prof_sample != 0) {
size = isalloc(ptr); if (prof_sample_accum_update(size)) {
prof_sample_accum_update(size); /*
} else if ((uintptr_t)cnt > (uintptr_t)1U) * Don't sample. The size passed to
size = isalloc(ptr); * prof_alloc_prep() was larger than what
* actually got allocated., so a backtrace was
* captured for this allocation, even though
* its actual size was insufficient to cross
* the sample threshold.
*/
return;
}
}
} }
if ((uintptr_t)old_ctx > (uintptr_t)1U) { if ((uintptr_t)old_ctx > (uintptr_t)1U) {

105
jemalloc/test/allocated.c Normal file
View File

@ -0,0 +1,105 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
#define JEMALLOC_MANGLE
#include "jemalloc_test.h"
void *
thread_start(void *arg)
{
int err;
void *p;
uint64_t a0, a1, d0, d1;
size_t sz, usize;
sz = sizeof(a0);
if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
0))) {
if (err == ENOENT) {
#ifdef JEMALLOC_STATS
assert(false);
#endif
goto RETURN;
}
fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
strerror(err));
exit(1);
}
sz = sizeof(d0);
if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
0))) {
if (err == ENOENT) {
#ifdef JEMALLOC_STATS
assert(false);
#endif
goto RETURN;
}
fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
strerror(err));
exit(1);
}
p = JEMALLOC_P(malloc)(1);
if (p == NULL) {
fprintf(stderr, "%s(): Error in malloc()\n", __func__);
exit(1);
}
sz = sizeof(a1);
JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
usize = JEMALLOC_P(malloc_usable_size)(p);
assert(a0 + usize <= a1);
JEMALLOC_P(free)(p);
sz = sizeof(d1);
JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
assert(d0 + usize <= d1);
RETURN:
return (NULL);
}
int
main(void)
{
int ret = 0;
pthread_t thread;
fprintf(stderr, "Test begin\n");
thread_start(NULL);
if (pthread_create(&thread, NULL, thread_start, NULL)
!= 0) {
fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
ret = 1;
goto RETURN;
}
pthread_join(thread, (void *)&ret);
thread_start(NULL);
if (pthread_create(&thread, NULL, thread_start, NULL)
!= 0) {
fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
ret = 1;
goto RETURN;
}
pthread_join(thread, (void *)&ret);
thread_start(NULL);
RETURN:
fprintf(stderr, "Test end\n");
return (ret);
}

View File

@ -0,0 +1,2 @@
Test begin
Test end