diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index 7863c1b7..ca807fda 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -58,7 +58,8 @@ DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ @objroot@lib/libjemalloc@install_suffix@.$(SO) \ @objroot@lib/libjemalloc@install_suffix@_pic.a MAN3 := @objroot@doc/jemalloc@install_suffix@.3 -CTESTS := @srcroot@test/allocm.c @srcroot@test/posix_memalign.c \ +CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ + @srcroot@test/posix_memalign.c \ @srcroot@test/rallocm.c @srcroot@test/thread_arena.c .PHONY: all dist install check clean distclean relclean diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index c8d2f29c..5202a2bf 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -888,6 +888,21 @@ mallctl), it will be automatically initialized as a side effect of calling this interface. .Ed .\"----------------------------------------------------------------------------- +@roff_stats@.It Sy "thread.allocated (uint64_t) r-" +@roff_stats@.Bd -ragged -offset indent -compact +@roff_stats@Get the total number of bytes ever allocated by the calling thread. +@roff_stats@This counter has the potential to wrap around; it is up to the +@roff_stats@application to appropriately interpret the counter in such cases. +@roff_stats@.Ed +.\"----------------------------------------------------------------------------- +@roff_stats@.It Sy "thread.deallocated (uint64_t) r-" +@roff_stats@.Bd -ragged -offset indent -compact +@roff_stats@Get the total number of bytes ever deallocated by the calling +@roff_stats@thread. +@roff_stats@This counter has the potential to wrap around; it is up to the +@roff_stats@application to appropriately interpret the counter in such cases. +@roff_stats@.Ed +.\"----------------------------------------------------------------------------- .It Sy "config.debug (bool) r-" .Bd -ragged -offset indent -compact --enable-debug was specified during build configuration. diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 6ad7b064..eb609624 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -291,6 +291,50 @@ extern pthread_key_t arenas_tsd; extern arena_t **arenas; extern unsigned narenas; +#ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +# ifndef NO_TLS +extern __thread thread_allocated_t thread_allocated_tls; +# define ALLOCATED_GET() thread_allocated_tls.allocated +# define DEALLOCATED_GET() thread_allocated_tls.deallocated +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_tls.allocated += a; \ + thread_allocated_tls.deallocated += d; \ +} while (0) +# else +extern pthread_key_t thread_allocated_tsd; +# define ALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd))->allocated : 0) +# define DEALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t \ + *)pthread_getspecific(thread_allocated_tsd))->deallocated : \ + 0) +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_t *thread_allocated = (thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd); \ + if (thread_allocated != NULL) { \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ + } else { \ + thread_allocated = (thread_allocated_t *) \ + imalloc(sizeof(thread_allocated_t)); \ + if (thread_allocated != NULL) { \ + pthread_setspecific(thread_allocated_tsd, \ + thread_allocated); \ + thread_allocated->allocated = (a); \ + thread_allocated->deallocated = (d); \ + } \ + } \ +} while (0) +# endif +#endif + arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); int buferror(int errnum, char *buf, size_t buflen); @@ -333,6 +377,8 @@ void jemalloc_postfork(void); #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); #endif @@ -356,6 +402,117 @@ pow2_ceil(size_t x) return (x); } +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_INLINE size_t +s2u(size_t size) +{ + + if (size <= small_maxclass) + return arenas[0]->bins[small_size2bin[size]].reg_size; + if (size <= arena_maxclass) + return PAGE_CEILING(size); + return CHUNK_CEILING(size); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_INLINE size_t +sa2u(size_t size, size_t alignment, size_t *run_size_p) +{ + size_t usize; + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + * + * Depending on runtime settings, it is possible that arena_malloc() + * will further round up to a power of two, but that never causes + * correctness issues. + */ + usize = (size + (alignment - 1)) & (-alignment); + /* + * (usize < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (usize < size) { + /* size_t overflow. */ + return (0); + } + + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= small_maxclass) { + return + (arenas[0]->bins[small_size2bin[usize]].reg_size); + } + return (PAGE_CEILING(usize)); + } else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + usize = PAGE_CEILING(size); + /* + * (usize < size) protects against very large sizes within + * PAGE_SIZE of SIZE_T_MAX. + * + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * usize value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (usize < size || usize + alignment < usize) { + /* size_t overflow. */ + return (0); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + */ + if (usize >= alignment) + run_size = usize + alignment - PAGE_SIZE; + else { + /* + * It is possible that (alignment << 1) will cause + * overflow, but it doesn't matter because we also + * subtract PAGE_SIZE, which in the case of overflow + * leaves us with a very large run_size. That causes + * the first conditional below to fail, which means + * that the bogus run_size value never gets used for + * anything important. + */ + run_size = (alignment << 1) - PAGE_SIZE; + } + if (run_size_p != NULL) + *run_size_p = run_size; + + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); + } +} + /* * Wrapper around malloc_message() that avoids the need for * JEMALLOC_P(malloc_message)(...) throughout the code. @@ -435,92 +592,25 @@ JEMALLOC_INLINE void * ipalloc(size_t size, size_t alignment, bool zero) { void *ret; - size_t ceil_size; + size_t usize; + size_t run_size +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. - */ - ceil_size = (size + (alignment - 1)) & (-alignment); - /* - * (ceil_size < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (ceil_size < size) { - /* size_t overflow. */ + usize = sa2u(size, alignment, &run_size); + if (usize == 0) return (NULL); - } - - if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE - && ceil_size <= arena_maxclass)) - ret = arena_malloc(ceil_size, zero); - else { - size_t run_size; - - /* - * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. - */ - alignment = PAGE_CEILING(alignment); - ceil_size = PAGE_CEILING(size); - /* - * (ceil_size < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. - * - * (ceil_size + alignment < ceil_size) protects against the - * combination of maximal alignment and ceil_size large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * ceil_size value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (ceil_size < size || ceil_size + alignment < ceil_size) { - /* size_t overflow. */ - return (NULL); - } - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - */ - if (ceil_size >= alignment) - run_size = ceil_size + alignment - PAGE_SIZE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE_SIZE; - } - - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), ceil_size, run_size, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(ceil_size, zero); - else - ret = huge_palloc(ceil_size, alignment, zero); - } + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + ret = arena_malloc(usize, zero); + else if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, alignment, + zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); assert(((uintptr_t)ret & (alignment - 1)) == 0); return (ret); diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index 1aa85bb5..0d139da7 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -179,9 +179,9 @@ extern bool prof_promote; prof_thr_cnt_t *prof_alloc_prep(size_t size); prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_ctx_t *old_ctx); +void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx); void prof_free(const void *ptr); void prof_idump(void); bool prof_mdump(const char *filename); diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 52f3d661..a54a0905 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -1613,7 +1613,8 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); - assert((alignment & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); malloc_mutex_lock(&arena->lock); ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index edbbb209..dbc5cd42 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -42,6 +42,10 @@ CTL_PROTO(epoch) CTL_PROTO(tcache_flush) #endif CTL_PROTO(thread_arena) +#ifdef JEMALLOC_STATS +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_deallocated) +#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -216,6 +220,11 @@ static const ctl_node_t tcache_node[] = { static const ctl_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)} +#ifdef JEMALLOC_STATS + , + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("deallocated"), CTL(thread_deallocated)} +#endif }; static const ctl_node_t config_node[] = { @@ -1092,6 +1101,11 @@ RETURN: return (ret); } +#ifdef JEMALLOC_STATS +CTL_RO_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); +CTL_RO_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); +#endif + /******************************************************************************/ #ifdef JEMALLOC_DEBUG diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 6f9ec762..f3cba15a 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -15,14 +15,22 @@ __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); pthread_key_t arenas_tsd; #endif +#ifdef JEMALLOC_STATS +# ifndef NO_TLS +__thread thread_allocated_t thread_allocated_tls; +# else +pthread_key_t thread_allocated_tsd; +# endif +#endif + /* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +static bool malloc_initialized = false; /* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; +static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -63,6 +71,9 @@ static int opt_narenas_lshift = 0; static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void thread_allocated_cleanup(void *arg); +#endif static bool malloc_init_hard(void); /******************************************************************************/ @@ -222,6 +233,17 @@ malloc_ncpus(void) return (ret); } +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void +thread_allocated_cleanup(void *arg) +{ + uint64_t *allocated = (uint64_t *)arg; + + if (allocated != NULL) + idalloc(allocated); +} +#endif + /* * FreeBSD's pthreads implementation calls malloc(3), so the malloc * implementation has to take pains to avoid infinite recursion during @@ -633,6 +655,15 @@ MALLOC_OUT: return (true); } +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) + /* Initialize allocation counters before any allocations can occur. */ + if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) + != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -766,6 +797,13 @@ void * JEMALLOC_P(malloc)(size_t size) { void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -801,20 +839,26 @@ JEMALLOC_P(malloc)(size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { ret = imalloc(small_maxclass+1); if (ret != NULL) - arena_prof_promoted(ret, size); + arena_prof_promoted(ret, usize); } else ret = imalloc(size); } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); #endif ret = imalloc(size); + } OOM: if (ret == NULL) { @@ -833,7 +877,13 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } #endif return (ret); } @@ -845,6 +895,13 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { int ret; void *result; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -896,17 +953,18 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) { + usize = sa2u(size, alignment, NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) { result = NULL; ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= small_maxclass) { + (uintptr_t)1U && usize <= small_maxclass) { result = ipalloc(small_maxclass+1, alignment, false); if (result != NULL) { arena_prof_promoted(result, - size); + usize); } } else { result = ipalloc(size, alignment, @@ -914,8 +972,13 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) } } } else +#endif + { +#ifdef JEMALLOC_STATS + usize = sa2u(size, alignment, NULL); #endif result = ipalloc(size, alignment, false); + } } if (result == NULL) { @@ -934,9 +997,15 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) ret = 0; RETURN: +#ifdef JEMALLOC_STATS + if (result != NULL) { + assert(usize == isalloc(result)); + ALLOCATED_ADD(usize, 0); + } +#endif #ifdef JEMALLOC_PROF if (opt_prof && result != NULL) - prof_malloc(result, cnt); + prof_malloc(result, usize, cnt); #endif return (ret); } @@ -948,6 +1017,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size) { void *ret; size_t num_size; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif #ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE @@ -988,20 +1064,26 @@ JEMALLOC_P(calloc)(size_t num, size_t size) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(num_size)) == NULL) { + usize = s2u(num_size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto RETURN; } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { ret = icalloc(small_maxclass+1); if (ret != NULL) - arena_prof_promoted(ret, num_size); + arena_prof_promoted(ret, usize); } else ret = icalloc(num_size); } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(num_size); #endif ret = icalloc(num_size); + } RETURN: if (ret == NULL) { @@ -1017,7 +1099,13 @@ RETURN: #ifdef JEMALLOC_PROF if (opt_prof && ret != NULL) - prof_malloc(ret, cnt); + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } #endif return (ret); } @@ -1027,12 +1115,15 @@ void * JEMALLOC_P(realloc)(void *ptr, size_t size) { void *ret; -#ifdef JEMALLOC_PROF - size_t old_size +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize # ifdef JEMALLOC_CC_SILENCE = 0 # endif ; + size_t old_size = 0; +#endif +#ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt # ifdef JEMALLOC_CC_SILENCE = NULL @@ -1053,9 +1144,11 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) #ifdef JEMALLOC_SYSV else { if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF if (opt_prof) { - old_size = isalloc(ptr); old_ctx = prof_ctx_get(ptr); cnt = NULL; } @@ -1064,7 +1157,6 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) } #ifdef JEMALLOC_PROF else if (opt_prof) { - old_size = 0; old_ctx = NULL; cnt = NULL; } @@ -1079,25 +1171,33 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) assert(malloc_initialized || malloc_initializer == pthread_self()); +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif #ifdef JEMALLOC_PROF if (opt_prof) { - old_size = isalloc(ptr); + usize = s2u(size); old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(size)) == NULL) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { ret = NULL; goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - size <= small_maxclass) { + usize <= small_maxclass) { ret = iralloc(ptr, small_maxclass+1, 0, 0, false, false); if (ret != NULL) - arena_prof_promoted(ret, size); + arena_prof_promoted(ret, usize); } else ret = iralloc(ptr, size, 0, 0, false, false); } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); #endif ret = iralloc(ptr, size, 0, 0, false, false); + } #ifdef JEMALLOC_PROF OOM: @@ -1114,10 +1214,8 @@ OOM: } } else { #ifdef JEMALLOC_PROF - if (opt_prof) { - old_size = 0; + if (opt_prof) old_ctx = NULL; - } #endif if (malloc_init()) { #ifdef JEMALLOC_PROF @@ -1128,23 +1226,29 @@ OOM: } else { #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) ret = NULL; else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && size <= + (uintptr_t)1U && usize <= small_maxclass) { ret = imalloc(small_maxclass+1); if (ret != NULL) { arena_prof_promoted(ret, - size); + usize); } } else ret = imalloc(size); } } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); #endif ret = imalloc(size); + } } if (ret == NULL) { @@ -1164,7 +1268,13 @@ RETURN: #endif #ifdef JEMALLOC_PROF if (opt_prof) - prof_realloc(ret, cnt, ptr, old_size, old_ctx); + prof_realloc(ret, usize, cnt, ptr, old_size, old_ctx); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, old_size); + } #endif return (ret); } @@ -1181,6 +1291,9 @@ JEMALLOC_P(free)(void *ptr) #ifdef JEMALLOC_PROF if (opt_prof) prof_free(ptr); +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, isalloc(ptr)); #endif idalloc(ptr); } @@ -1325,6 +1438,7 @@ int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) { void *p; + size_t usize; size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; @@ -1340,30 +1454,48 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) #ifdef JEMALLOC_PROF if (opt_prof) { - if ((cnt = prof_alloc_prep(size)) == NULL) + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if ((cnt = prof_alloc_prep(usize)) == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= small_maxclass) { p = iallocm(small_maxclass+1, alignment, zero); if (p == NULL) goto OOM; - arena_prof_promoted(p, size); + arena_prof_promoted(p, usize); } else { p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; } + + if (rsize != NULL) + *rsize = usize; } else #endif { p = iallocm(size, alignment, zero); if (p == NULL) goto OOM; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = (alignment == 0) ? s2u(size) : sa2u(size, + alignment, NULL); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } } *ptr = p; - if (rsize != NULL) - *rsize = isalloc(p); +#ifdef JEMALLOC_STATS + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); +#endif return (ALLOCM_SUCCESS); OOM: #ifdef JEMALLOC_XMALLOC @@ -1384,12 +1516,15 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { void *p, *q; + size_t usize; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t old_size; +#endif size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; #ifdef JEMALLOC_PROF - size_t old_size; prof_thr_cnt_t *cnt; prof_ctx_t *old_ctx; #endif @@ -1403,36 +1538,60 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, p = *ptr; #ifdef JEMALLOC_PROF if (opt_prof) { + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in prof_alloc_prep() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment, NULL); old_size = isalloc(p); old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(size)) == NULL) + if ((cnt = prof_alloc_prep(max_usize)) == NULL) goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <= - small_maxclass) { + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= size+extra) ? 0 : size+extra - (small_maxclass+1), alignment, zero, no_move); if (q == NULL) goto ERR; - arena_prof_promoted(q, size); + usize = isalloc(q); + arena_prof_promoted(q, usize); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto ERR; + usize = isalloc(q); } - prof_realloc(q, cnt, p, old_size, old_ctx); + prof_realloc(q, usize, cnt, p, old_size, old_ctx); } else #endif { +#ifdef JEMALLOC_STATS + old_size = isalloc(p); +#endif q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto ERR; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = isalloc(q); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } } *ptr = q; - if (rsize != NULL) - *rsize = isalloc(q); - +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(usize, old_size); +#endif return (ALLOCM_SUCCESS); ERR: if (no_move) @@ -1483,6 +1642,9 @@ JEMALLOC_P(dallocm)(void *ptr, int flags) #ifdef JEMALLOC_PROF if (opt_prof) prof_free(ptr); +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, isalloc(ptr)); #endif idalloc(ptr); diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index e715da9a..583a6e91 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -47,7 +47,8 @@ static __thread prof_tcache_t *prof_tcache_tls pthread_setspecific(prof_tcache_tsd, (void *)(v)); \ } while (0) #else -# define PROF_TCACHE_GET() ((ckh_t *)pthread_getspecific(prof_tcache_tsd)) +# define PROF_TCACHE_GET() \ + ((prof_tcache_t *)pthread_getspecific(prof_tcache_tsd)) # define PROF_TCACHE_SET(v) do { \ pthread_setspecific(prof_tcache_tsd, (void *)(v)); \ } while (0) @@ -69,7 +70,7 @@ static __thread void **vec_tls pthread_setspecific(vec_tsd, (void *)(v)); \ } while (0) #else -# define VEC_GET() ((ckh_t *)pthread_getspecific(vec_tsd)) +# define VEC_GET() ((void **)pthread_getspecific(vec_tsd)) # define VEC_SET(v) do { \ pthread_setspecific(vec_tsd, (void *)(v)); \ } while (0) @@ -106,7 +107,8 @@ prof_sample_state_t prof_sample_state_oom; r = (prof_sample_state_t *)pthread_getspecific( \ prof_sample_state_tsd); \ if (r == NULL) { \ - r = ipalloc(sizeof(prof_sample_state_t), CACHELINE); \ + r = ipalloc(sizeof(prof_sample_state_t), CACHELINE, \ + false); \ if (r == NULL) { \ malloc_write(": Error in heap " \ "profiler: out of memory; subsequent heap " \ @@ -658,6 +660,8 @@ prof_alloc_prep(size_t size) void **vec; prof_bt_t bt; + assert(size == s2u(size)); + vec = VEC_GET(); if (vec == NULL) { vec = imalloc(sizeof(void *) * prof_bt_max); @@ -750,7 +754,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) huge_prof_ctx_set(ptr, ctx); } -static inline void +static inline bool prof_sample_accum_update(size_t size) { prof_sample_state_t *prof_sample_state; @@ -771,22 +775,33 @@ prof_sample_accum_update(size_t size) prof_sample_state->threshold; prof_sample_threshold_update(); } - } else + return (false); + } else { prof_sample_state->accum += size; + return (true); + } } void -prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) +prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) { - size_t size; assert(ptr != NULL); + assert(size == s2u(size)); if (opt_lg_prof_sample != 0) { - size = isalloc(ptr); - prof_sample_accum_update(size); - } else if ((uintptr_t)cnt > (uintptr_t)1U) - size = isalloc(ptr); + if (prof_sample_accum_update(size)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the size passed to + * prof_alloc_prep() and prof_malloc(). + */ + assert(false); + return; + } + } if ((uintptr_t)cnt > (uintptr_t)1U) { prof_ctx_set(ptr, cnt->ctx); @@ -813,24 +828,27 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) } void -prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_ctx_t *old_ctx) +prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx) { - size_t size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; prof_thr_cnt_t *told_cnt; assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { if (opt_lg_prof_sample != 0) { - size = isalloc(ptr); - prof_sample_accum_update(size); - } else if ((uintptr_t)cnt > (uintptr_t)1U) - size = isalloc(ptr); + if (prof_sample_accum_update(size)) { + /* + * Don't sample. The size passed to + * prof_alloc_prep() was larger than what + * actually got allocated., so a backtrace was + * captured for this allocation, even though + * its actual size was insufficient to cross + * the sample threshold. + */ + return; + } + } } if ((uintptr_t)old_ctx > (uintptr_t)1U) { diff --git a/jemalloc/test/allocated.c b/jemalloc/test/allocated.c new file mode 100644 index 00000000..64a17351 --- /dev/null +++ b/jemalloc/test/allocated.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +void * +thread_start(void *arg) +{ + int err; + void *p; + uint64_t a0, a1, d0, d1; + size_t sz, usize; + + sz = sizeof(a0); + if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + + sz = sizeof(d0); + if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + + p = JEMALLOC_P(malloc)(1); + if (p == NULL) { + fprintf(stderr, "%s(): Error in malloc()\n", __func__); + exit(1); + } + + sz = sizeof(a1); + JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0); + + usize = JEMALLOC_P(malloc_usable_size)(p); + assert(a0 + usize <= a1); + + JEMALLOC_P(free)(p); + + sz = sizeof(d1); + JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0); + + assert(d0 + usize <= d1); + +RETURN: + return (NULL); +} + +int +main(void) +{ + int ret = 0; + pthread_t thread; + + fprintf(stderr, "Test begin\n"); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/jemalloc/test/allocated.exp b/jemalloc/test/allocated.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/jemalloc/test/allocated.exp @@ -0,0 +1,2 @@ +Test begin +Test end