Inline the fast path for heap sampling.
Inline the heap sampling code that is executed for every allocation event (regardless of whether a sample is taken). Combine all prof TLS data into a single data structure, in order to reduce the TLS lookup volume.
This commit is contained in:
parent
93443689a4
commit
4d6a134e13
@ -27,6 +27,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
#define JEMALLOC_MANGLE
|
#define JEMALLOC_MANGLE
|
||||||
#include "../jemalloc@install_suffix@.h"
|
#include "../jemalloc@install_suffix@.h"
|
||||||
@ -210,10 +211,10 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
|
|||||||
#include "jemalloc/internal/rtree.h"
|
#include "jemalloc/internal/rtree.h"
|
||||||
#include "jemalloc/internal/tcache.h"
|
#include "jemalloc/internal/tcache.h"
|
||||||
#include "jemalloc/internal/hash.h"
|
#include "jemalloc/internal/hash.h"
|
||||||
#include "jemalloc/internal/prof.h"
|
|
||||||
#ifdef JEMALLOC_ZONE
|
#ifdef JEMALLOC_ZONE
|
||||||
#include "jemalloc/internal/zone.h"
|
#include "jemalloc/internal/zone.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "jemalloc/internal/prof.h"
|
||||||
|
|
||||||
#undef JEMALLOC_H_TYPES
|
#undef JEMALLOC_H_TYPES
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -233,10 +234,10 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
|
|||||||
#include "jemalloc/internal/rtree.h"
|
#include "jemalloc/internal/rtree.h"
|
||||||
#include "jemalloc/internal/tcache.h"
|
#include "jemalloc/internal/tcache.h"
|
||||||
#include "jemalloc/internal/hash.h"
|
#include "jemalloc/internal/hash.h"
|
||||||
#include "jemalloc/internal/prof.h"
|
|
||||||
#ifdef JEMALLOC_ZONE
|
#ifdef JEMALLOC_ZONE
|
||||||
#include "jemalloc/internal/zone.h"
|
#include "jemalloc/internal/zone.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "jemalloc/internal/prof.h"
|
||||||
|
|
||||||
#undef JEMALLOC_H_STRUCTS
|
#undef JEMALLOC_H_STRUCTS
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -355,10 +356,10 @@ void jemalloc_postfork(void);
|
|||||||
#include "jemalloc/internal/rtree.h"
|
#include "jemalloc/internal/rtree.h"
|
||||||
#include "jemalloc/internal/tcache.h"
|
#include "jemalloc/internal/tcache.h"
|
||||||
#include "jemalloc/internal/hash.h"
|
#include "jemalloc/internal/hash.h"
|
||||||
#include "jemalloc/internal/prof.h"
|
|
||||||
#ifdef JEMALLOC_ZONE
|
#ifdef JEMALLOC_ZONE
|
||||||
#include "jemalloc/internal/zone.h"
|
#include "jemalloc/internal/zone.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "jemalloc/internal/prof.h"
|
||||||
|
|
||||||
#undef JEMALLOC_H_EXTERNS
|
#undef JEMALLOC_H_EXTERNS
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -547,7 +548,6 @@ choose_arena(void)
|
|||||||
#include "jemalloc/internal/tcache.h"
|
#include "jemalloc/internal/tcache.h"
|
||||||
#include "jemalloc/internal/arena.h"
|
#include "jemalloc/internal/arena.h"
|
||||||
#include "jemalloc/internal/hash.h"
|
#include "jemalloc/internal/hash.h"
|
||||||
#include "jemalloc/internal/prof.h"
|
|
||||||
#ifdef JEMALLOC_ZONE
|
#ifdef JEMALLOC_ZONE
|
||||||
#include "jemalloc/internal/zone.h"
|
#include "jemalloc/internal/zone.h"
|
||||||
#endif
|
#endif
|
||||||
@ -729,5 +729,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "jemalloc/internal/prof.h"
|
||||||
|
|
||||||
#undef JEMALLOC_H_INLINES
|
#undef JEMALLOC_H_INLINES
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
@ -6,7 +6,7 @@ typedef struct prof_bt_s prof_bt_t;
|
|||||||
typedef struct prof_cnt_s prof_cnt_t;
|
typedef struct prof_cnt_s prof_cnt_t;
|
||||||
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
|
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
|
||||||
typedef struct prof_ctx_s prof_ctx_t;
|
typedef struct prof_ctx_s prof_ctx_t;
|
||||||
typedef struct prof_tcache_s prof_tcache_t;
|
typedef struct prof_tdata_s prof_tdata_t;
|
||||||
|
|
||||||
/* Option defaults. */
|
/* Option defaults. */
|
||||||
#define LG_PROF_BT_MAX_DEFAULT 7
|
#define LG_PROF_BT_MAX_DEFAULT 7
|
||||||
@ -124,22 +124,29 @@ struct prof_ctx_s {
|
|||||||
ql_head(prof_thr_cnt_t) cnts_ql;
|
ql_head(prof_thr_cnt_t) cnts_ql;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
struct prof_tdata_s {
|
||||||
* Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread
|
/*
|
||||||
* keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t
|
* Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
|
||||||
* objects. Other threads may read the prof_thr_cnt_t contents, but no others
|
* cache of backtraces, with associated thread-specific prof_thr_cnt_t
|
||||||
* will ever write them.
|
* objects. Other threads may read the prof_thr_cnt_t contents, but no
|
||||||
|
* others will ever write them.
|
||||||
*
|
*
|
||||||
* Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data
|
* Upon thread exit, the thread must merge all the prof_thr_cnt_t
|
||||||
* into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
|
* counter data into the associated prof_ctx_t objects, and unlink/free
|
||||||
* objects.
|
* the prof_thr_cnt_t objects.
|
||||||
*/
|
*/
|
||||||
struct prof_tcache_s {
|
|
||||||
/* (prof_bt_t *)-->(prof_thr_cnt_t *). */
|
|
||||||
ckh_t bt2cnt;
|
ckh_t bt2cnt;
|
||||||
|
|
||||||
/* LRU for contents of bt2cnt. */
|
/* LRU for contents of bt2cnt. */
|
||||||
ql_head(prof_thr_cnt_t) lru_ql;
|
ql_head(prof_thr_cnt_t) lru_ql;
|
||||||
|
|
||||||
|
/* Backtrace vector, used for calls to prof_backtrace(). */
|
||||||
|
void **vec;
|
||||||
|
|
||||||
|
/* Sampling state. */
|
||||||
|
uint64_t prn_state;
|
||||||
|
uint64_t threshold;
|
||||||
|
uint64_t accum;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* JEMALLOC_H_STRUCTS */
|
#endif /* JEMALLOC_H_STRUCTS */
|
||||||
@ -177,15 +184,39 @@ extern uint64_t prof_interval;
|
|||||||
*/
|
*/
|
||||||
extern bool prof_promote;
|
extern bool prof_promote;
|
||||||
|
|
||||||
prof_thr_cnt_t *prof_alloc_prep(size_t size);
|
/* (1U << opt_lg_prof_bt_max). */
|
||||||
prof_ctx_t *prof_ctx_get(const void *ptr);
|
extern unsigned prof_bt_max;
|
||||||
void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
|
|
||||||
void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
|
||||||
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx);
|
#ifndef NO_TLS
|
||||||
void prof_free(const void *ptr);
|
extern __thread prof_tdata_t *prof_tdata_tls
|
||||||
|
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
|
# define PROF_TCACHE_GET() prof_tdata_tls
|
||||||
|
# define PROF_TCACHE_SET(v) do { \
|
||||||
|
prof_tdata_tls = (v); \
|
||||||
|
pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
|
||||||
|
} while (0)
|
||||||
|
#else
|
||||||
|
# define PROF_TCACHE_GET() \
|
||||||
|
((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
|
||||||
|
# define PROF_TCACHE_SET(v) do { \
|
||||||
|
pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Same contents as b2cnt_tls, but initialized such that the TSD destructor is
|
||||||
|
* called when a thread exits, so that prof_tdata_tls contents can be merged,
|
||||||
|
* unlinked, and deallocated.
|
||||||
|
*/
|
||||||
|
extern pthread_key_t prof_tdata_tsd;
|
||||||
|
|
||||||
|
void bt_init(prof_bt_t *bt, void **vec);
|
||||||
|
void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
|
||||||
|
prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
|
||||||
void prof_idump(void);
|
void prof_idump(void);
|
||||||
bool prof_mdump(const char *filename);
|
bool prof_mdump(const char *filename);
|
||||||
void prof_udump(void);
|
void prof_udump(void);
|
||||||
|
prof_tdata_t *prof_tdata_init(void);
|
||||||
void prof_boot0(void);
|
void prof_boot0(void);
|
||||||
bool prof_boot1(void);
|
bool prof_boot1(void);
|
||||||
|
|
||||||
@ -193,6 +224,321 @@ bool prof_boot1(void);
|
|||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
#ifdef JEMALLOC_H_INLINES
|
#ifdef JEMALLOC_H_INLINES
|
||||||
|
|
||||||
|
#ifndef JEMALLOC_ENABLE_INLINE
|
||||||
|
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
|
||||||
|
prof_thr_cnt_t *prof_alloc_prep(size_t size);
|
||||||
|
prof_ctx_t *prof_ctx_get(const void *ptr);
|
||||||
|
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
||||||
|
bool prof_sample_accum_update(size_t size);
|
||||||
|
void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
|
||||||
|
void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
||||||
|
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx);
|
||||||
|
void prof_free(const void *ptr);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_sample_threshold_update(prof_tdata_t *prof_tdata)
|
||||||
|
{
|
||||||
|
uint64_t r;
|
||||||
|
double u;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute prof_sample_threshold as a geometrically distributed random
|
||||||
|
* variable with mean (2^opt_lg_prof_sample).
|
||||||
|
*/
|
||||||
|
prn64(r, 53, prof_tdata->prn_state,
|
||||||
|
(uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
|
||||||
|
u = (double)r * (1.0/9007199254740992.0L);
|
||||||
|
prof_tdata->threshold = (uint64_t)(log(u) /
|
||||||
|
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
|
||||||
|
+ (uint64_t)1U;
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE prof_thr_cnt_t *
|
||||||
|
prof_alloc_prep(size_t size)
|
||||||
|
{
|
||||||
|
#ifdef JEMALLOC_ENABLE_INLINE
|
||||||
|
/* This function does not have its own stack frame, because it is inlined. */
|
||||||
|
# define NIGNORE 1
|
||||||
|
#else
|
||||||
|
# define NIGNORE 2
|
||||||
|
#endif
|
||||||
|
prof_thr_cnt_t *ret;
|
||||||
|
prof_tdata_t *prof_tdata;
|
||||||
|
prof_bt_t bt;
|
||||||
|
|
||||||
|
assert(size == s2u(size));
|
||||||
|
|
||||||
|
prof_tdata = PROF_TCACHE_GET();
|
||||||
|
if (prof_tdata == NULL) {
|
||||||
|
prof_tdata = prof_tdata_init();
|
||||||
|
if (prof_tdata == NULL)
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_prof_active == false) {
|
||||||
|
/* Sampling is currently inactive, so avoid sampling. */
|
||||||
|
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
|
} else if (opt_lg_prof_sample == 0) {
|
||||||
|
/*
|
||||||
|
* Don't bother with sampling logic, since sampling interval is
|
||||||
|
* 1.
|
||||||
|
*/
|
||||||
|
bt_init(&bt, prof_tdata->vec);
|
||||||
|
prof_backtrace(&bt, NIGNORE, prof_bt_max);
|
||||||
|
ret = prof_lookup(&bt);
|
||||||
|
} else {
|
||||||
|
if (prof_tdata->threshold == 0) {
|
||||||
|
/*
|
||||||
|
* Initialize. Seed the prng differently for each
|
||||||
|
* thread.
|
||||||
|
*/
|
||||||
|
prof_tdata->prn_state = (uint64_t)(uintptr_t)&size;
|
||||||
|
prof_sample_threshold_update(prof_tdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether to capture a backtrace based on whether
|
||||||
|
* size is enough for prof_accum to reach
|
||||||
|
* prof_tdata->threshold. However, delay updating these
|
||||||
|
* variables until prof_{m,re}alloc(), because we don't know
|
||||||
|
* for sure that the allocation will succeed.
|
||||||
|
*
|
||||||
|
* Use subtraction rather than addition to avoid potential
|
||||||
|
* integer overflow.
|
||||||
|
*/
|
||||||
|
if (size >= prof_tdata->threshold - prof_tdata->accum) {
|
||||||
|
bt_init(&bt, prof_tdata->vec);
|
||||||
|
prof_backtrace(&bt, NIGNORE, prof_bt_max);
|
||||||
|
ret = prof_lookup(&bt);
|
||||||
|
} else
|
||||||
|
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ret);
|
||||||
|
#undef NIGNORE
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE prof_ctx_t *
|
||||||
|
prof_ctx_get(const void *ptr)
|
||||||
|
{
|
||||||
|
prof_ctx_t *ret;
|
||||||
|
arena_chunk_t *chunk;
|
||||||
|
|
||||||
|
assert(ptr != NULL);
|
||||||
|
|
||||||
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||||
|
if (chunk != ptr) {
|
||||||
|
/* Region. */
|
||||||
|
assert(chunk->arena->magic == ARENA_MAGIC);
|
||||||
|
|
||||||
|
ret = arena_prof_ctx_get(ptr);
|
||||||
|
} else
|
||||||
|
ret = huge_prof_ctx_get(ptr);
|
||||||
|
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
|
||||||
|
{
|
||||||
|
arena_chunk_t *chunk;
|
||||||
|
|
||||||
|
assert(ptr != NULL);
|
||||||
|
|
||||||
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
||||||
|
if (chunk != ptr) {
|
||||||
|
/* Region. */
|
||||||
|
assert(chunk->arena->magic == ARENA_MAGIC);
|
||||||
|
|
||||||
|
arena_prof_ctx_set(ptr, ctx);
|
||||||
|
} else
|
||||||
|
huge_prof_ctx_set(ptr, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE bool
|
||||||
|
prof_sample_accum_update(size_t size)
|
||||||
|
{
|
||||||
|
prof_tdata_t *prof_tdata;
|
||||||
|
|
||||||
|
/* Sampling logic is unnecessary if the interval is 1. */
|
||||||
|
assert(opt_lg_prof_sample != 0);
|
||||||
|
|
||||||
|
prof_tdata = PROF_TCACHE_GET();
|
||||||
|
assert(prof_tdata != NULL);
|
||||||
|
|
||||||
|
/* Take care to avoid integer overflow. */
|
||||||
|
if (size >= prof_tdata->threshold - prof_tdata->accum) {
|
||||||
|
prof_tdata->accum -= (prof_tdata->threshold - size);
|
||||||
|
/* Compute new prof_sample_threshold. */
|
||||||
|
prof_sample_threshold_update(prof_tdata);
|
||||||
|
while (prof_tdata->accum >= prof_tdata->threshold) {
|
||||||
|
prof_tdata->accum -= prof_tdata->threshold;
|
||||||
|
prof_sample_threshold_update(prof_tdata);
|
||||||
|
}
|
||||||
|
return (false);
|
||||||
|
} else {
|
||||||
|
prof_tdata->accum += size;
|
||||||
|
return (true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
|
||||||
|
{
|
||||||
|
|
||||||
|
assert(ptr != NULL);
|
||||||
|
assert(size == s2u(size));
|
||||||
|
|
||||||
|
if (opt_lg_prof_sample != 0) {
|
||||||
|
if (prof_sample_accum_update(size)) {
|
||||||
|
/*
|
||||||
|
* Don't sample. For malloc()-like allocation, it is
|
||||||
|
* always possible to tell in advance how large an
|
||||||
|
* object's usable size will be, so there should never
|
||||||
|
* be a difference between the size passed to
|
||||||
|
* prof_alloc_prep() and prof_malloc().
|
||||||
|
*/
|
||||||
|
assert(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||||
|
prof_ctx_set(ptr, cnt->ctx);
|
||||||
|
|
||||||
|
cnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
cnt->cnts.curobjs++;
|
||||||
|
cnt->cnts.curbytes += size;
|
||||||
|
if (opt_prof_accum) {
|
||||||
|
cnt->cnts.accumobjs++;
|
||||||
|
cnt->cnts.accumbytes += size;
|
||||||
|
}
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
cnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
} else
|
||||||
|
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
||||||
|
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx)
|
||||||
|
{
|
||||||
|
prof_thr_cnt_t *told_cnt;
|
||||||
|
|
||||||
|
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
|
||||||
|
|
||||||
|
if (ptr != NULL) {
|
||||||
|
if (opt_lg_prof_sample != 0) {
|
||||||
|
if (prof_sample_accum_update(size)) {
|
||||||
|
/*
|
||||||
|
* Don't sample. The size passed to
|
||||||
|
* prof_alloc_prep() was larger than what
|
||||||
|
* actually got allocated., so a backtrace was
|
||||||
|
* captured for this allocation, even though
|
||||||
|
* its actual size was insufficient to cross
|
||||||
|
* the sample threshold.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((uintptr_t)old_ctx > (uintptr_t)1U) {
|
||||||
|
told_cnt = prof_lookup(old_ctx->bt);
|
||||||
|
if (told_cnt == NULL) {
|
||||||
|
/*
|
||||||
|
* It's too late to propagate OOM for this realloc(),
|
||||||
|
* so operate directly on old_cnt->ctx->cnt_merged.
|
||||||
|
*/
|
||||||
|
malloc_mutex_lock(&old_ctx->lock);
|
||||||
|
old_ctx->cnt_merged.curobjs--;
|
||||||
|
old_ctx->cnt_merged.curbytes -= old_size;
|
||||||
|
malloc_mutex_unlock(&old_ctx->lock);
|
||||||
|
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
||||||
|
|
||||||
|
if ((uintptr_t)told_cnt > (uintptr_t)1U)
|
||||||
|
told_cnt->epoch++;
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||||
|
prof_ctx_set(ptr, cnt->ctx);
|
||||||
|
cnt->epoch++;
|
||||||
|
} else
|
||||||
|
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
if ((uintptr_t)told_cnt > (uintptr_t)1U) {
|
||||||
|
told_cnt->cnts.curobjs--;
|
||||||
|
told_cnt->cnts.curbytes -= old_size;
|
||||||
|
}
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
||||||
|
cnt->cnts.curobjs++;
|
||||||
|
cnt->cnts.curbytes += size;
|
||||||
|
if (opt_prof_accum) {
|
||||||
|
cnt->cnts.accumobjs++;
|
||||||
|
cnt->cnts.accumbytes += size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
if ((uintptr_t)told_cnt > (uintptr_t)1U)
|
||||||
|
told_cnt->epoch++;
|
||||||
|
if ((uintptr_t)cnt > (uintptr_t)1U)
|
||||||
|
cnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write(); /* Not strictly necessary. */
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_INLINE void
|
||||||
|
prof_free(const void *ptr)
|
||||||
|
{
|
||||||
|
prof_ctx_t *ctx = prof_ctx_get(ptr);
|
||||||
|
|
||||||
|
if ((uintptr_t)ctx > (uintptr_t)1) {
|
||||||
|
size_t size = isalloc(ptr);
|
||||||
|
prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
|
||||||
|
|
||||||
|
if (tcnt != NULL) {
|
||||||
|
tcnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
tcnt->cnts.curobjs--;
|
||||||
|
tcnt->cnts.curbytes -= size;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
tcnt->epoch++;
|
||||||
|
/*********/
|
||||||
|
mb_write();
|
||||||
|
/*********/
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* OOM during free() cannot be propagated, so operate
|
||||||
|
* directly on cnt->ctx->cnt_merged.
|
||||||
|
*/
|
||||||
|
malloc_mutex_lock(&ctx->lock);
|
||||||
|
ctx->cnt_merged.curobjs--;
|
||||||
|
ctx->cnt_merged.curbytes -= size;
|
||||||
|
malloc_mutex_unlock(&ctx->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* JEMALLOC_H_INLINES */
|
#endif /* JEMALLOC_H_INLINES */
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
#endif /* JEMALLOC_PROF */
|
#endif /* JEMALLOC_PROF */
|
||||||
|
@ -12,8 +12,6 @@
|
|||||||
#include <libunwind.h>
|
#include <libunwind.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* Data. */
|
/* Data. */
|
||||||
|
|
||||||
@ -30,6 +28,14 @@ ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
|
|||||||
uint64_t prof_interval;
|
uint64_t prof_interval;
|
||||||
bool prof_promote;
|
bool prof_promote;
|
||||||
|
|
||||||
|
unsigned prof_bt_max;
|
||||||
|
|
||||||
|
#ifndef NO_TLS
|
||||||
|
__thread prof_tdata_t *prof_tdata_tls
|
||||||
|
JEMALLOC_ATTR(tls_model("initial-exec"));
|
||||||
|
#endif
|
||||||
|
pthread_key_t prof_tdata_tsd;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
|
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
|
||||||
* structure that knows about all backtraces currently captured.
|
* structure that knows about all backtraces currently captured.
|
||||||
@ -37,96 +43,6 @@ bool prof_promote;
|
|||||||
static ckh_t bt2ctx;
|
static ckh_t bt2ctx;
|
||||||
static malloc_mutex_t bt2ctx_mtx;
|
static malloc_mutex_t bt2ctx_mtx;
|
||||||
|
|
||||||
/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
|
|
||||||
#ifndef NO_TLS
|
|
||||||
static __thread prof_tcache_t *prof_tcache_tls
|
|
||||||
JEMALLOC_ATTR(tls_model("initial-exec"));
|
|
||||||
# define PROF_TCACHE_GET() prof_tcache_tls
|
|
||||||
# define PROF_TCACHE_SET(v) do { \
|
|
||||||
prof_tcache_tls = (v); \
|
|
||||||
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
|
||||||
# define PROF_TCACHE_GET() \
|
|
||||||
((prof_tcache_t *)pthread_getspecific(prof_tcache_tsd))
|
|
||||||
# define PROF_TCACHE_SET(v) do { \
|
|
||||||
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
|
||||||
/*
|
|
||||||
* Same contents as b2cnt_tls, but initialized such that the TSD destructor is
|
|
||||||
* called when a thread exits, so that prof_tcache_tls contents can be merged,
|
|
||||||
* unlinked, and deallocated.
|
|
||||||
*/
|
|
||||||
static pthread_key_t prof_tcache_tsd;
|
|
||||||
|
|
||||||
/* Thread-specific backtrace vector, used for calls to prof_backtrace(). */
|
|
||||||
#ifndef NO_TLS
|
|
||||||
static __thread void **vec_tls
|
|
||||||
JEMALLOC_ATTR(tls_model("initial-exec"));
|
|
||||||
# define VEC_GET() vec_tls
|
|
||||||
# define VEC_SET(v) do { \
|
|
||||||
vec_tls = (v); \
|
|
||||||
pthread_setspecific(vec_tsd, (void *)(v)); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
|
||||||
# define VEC_GET() ((void **)pthread_getspecific(vec_tsd))
|
|
||||||
# define VEC_SET(v) do { \
|
|
||||||
pthread_setspecific(vec_tsd, (void *)(v)); \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
|
||||||
/*
|
|
||||||
* Same contents as vec_tls, but initialized such that the TSD destructor is
|
|
||||||
* called when a thread exits, so that vec_tls contents can be merged,
|
|
||||||
* unlinked, and deallocated.
|
|
||||||
*/
|
|
||||||
static pthread_key_t vec_tsd;
|
|
||||||
|
|
||||||
|
|
||||||
/* (1U << opt_lg_prof_bt_max). */
|
|
||||||
static unsigned prof_bt_max;
|
|
||||||
|
|
||||||
typedef struct prof_sample_state_s prof_sample_state_t;
|
|
||||||
struct prof_sample_state_s {
|
|
||||||
uint64_t prn_state;
|
|
||||||
uint64_t threshold;
|
|
||||||
uint64_t accum;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef NO_TLS
|
|
||||||
static __thread prof_sample_state_t prof_sample_state_tls
|
|
||||||
JEMALLOC_ATTR(tls_model("initial-exec"));
|
|
||||||
# define PROF_SAMPLE_STATE_GET(r) do { \
|
|
||||||
r = &prof_sample_state_tls; \
|
|
||||||
} while (0)
|
|
||||||
#else
|
|
||||||
static pthread_key_t prof_sample_state_tsd;
|
|
||||||
/* Used only if an OOM error occurs in PROF_SAMPLE_STATE_GET(). */
|
|
||||||
prof_sample_state_t prof_sample_state_oom;
|
|
||||||
# define PROF_SAMPLE_STATE_GET(r) do { \
|
|
||||||
r = (prof_sample_state_t *)pthread_getspecific( \
|
|
||||||
prof_sample_state_tsd); \
|
|
||||||
if (r == NULL) { \
|
|
||||||
r = ipalloc(sizeof(prof_sample_state_t), CACHELINE, \
|
|
||||||
false); \
|
|
||||||
if (r == NULL) { \
|
|
||||||
malloc_write("<jemalloc>: Error in heap " \
|
|
||||||
"profiler: out of memory; subsequent heap " \
|
|
||||||
"profiles may be inaccurate\n"); \
|
|
||||||
if (opt_abort) \
|
|
||||||
abort(); \
|
|
||||||
/* Failure is not an option... */ \
|
|
||||||
r = &prof_sample_state_oom; \
|
|
||||||
} \
|
|
||||||
pthread_setspecific(prof_sample_state_tsd, (void *)r); \
|
|
||||||
} \
|
|
||||||
} while (0)
|
|
||||||
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
|
|
||||||
# define ARENA_SET(v) do { \
|
|
||||||
pthread_setspecific(arenas_tsd, (void *)(v)); \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static malloc_mutex_t prof_dump_seq_mtx;
|
static malloc_mutex_t prof_dump_seq_mtx;
|
||||||
static uint64_t prof_dump_seq;
|
static uint64_t prof_dump_seq;
|
||||||
static uint64_t prof_dump_iseq;
|
static uint64_t prof_dump_iseq;
|
||||||
@ -154,7 +70,6 @@ static bool enq_udump;
|
|||||||
/* Function prototypes for non-inline static functions. */
|
/* Function prototypes for non-inline static functions. */
|
||||||
|
|
||||||
static prof_bt_t *bt_dup(prof_bt_t *bt);
|
static prof_bt_t *bt_dup(prof_bt_t *bt);
|
||||||
static void bt_init(prof_bt_t *bt, void **vec);
|
|
||||||
static void bt_destroy(prof_bt_t *bt);
|
static void bt_destroy(prof_bt_t *bt);
|
||||||
#ifdef JEMALLOC_PROF_LIBGCC
|
#ifdef JEMALLOC_PROF_LIBGCC
|
||||||
static _Unwind_Reason_Code prof_unwind_init_callback(
|
static _Unwind_Reason_Code prof_unwind_init_callback(
|
||||||
@ -162,9 +77,6 @@ static _Unwind_Reason_Code prof_unwind_init_callback(
|
|||||||
static _Unwind_Reason_Code prof_unwind_callback(
|
static _Unwind_Reason_Code prof_unwind_callback(
|
||||||
struct _Unwind_Context *context, void *arg);
|
struct _Unwind_Context *context, void *arg);
|
||||||
#endif
|
#endif
|
||||||
static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
|
|
||||||
static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
|
|
||||||
static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
|
|
||||||
static bool prof_flush(bool propagate_err);
|
static bool prof_flush(bool propagate_err);
|
||||||
static bool prof_write(const char *s, bool propagate_err);
|
static bool prof_write(const char *s, bool propagate_err);
|
||||||
static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
|
static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
|
||||||
@ -181,15 +93,11 @@ static void prof_fdump(void);
|
|||||||
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
|
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
|
||||||
size_t *hash2);
|
size_t *hash2);
|
||||||
static bool prof_bt_keycomp(const void *k1, const void *k2);
|
static bool prof_bt_keycomp(const void *k1, const void *k2);
|
||||||
static void prof_tcache_cleanup(void *arg);
|
static void prof_tdata_cleanup(void *arg);
|
||||||
static void vec_cleanup(void *arg);
|
|
||||||
#ifdef NO_TLS
|
|
||||||
static void prof_sample_state_thread_cleanup(void *arg);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
static void
|
void
|
||||||
bt_init(prof_bt_t *bt, void **vec)
|
bt_init(prof_bt_t *bt, void **vec)
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -285,7 +193,7 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg)
|
|||||||
return (_URC_NO_REASON);
|
return (_URC_NO_REASON);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
||||||
{
|
{
|
||||||
prof_unwind_data_t data = {bt, nignore, max};
|
prof_unwind_data_t data = {bt, nignore, max};
|
||||||
@ -293,7 +201,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
|||||||
_Unwind_Backtrace(prof_unwind_callback, &data);
|
_Unwind_Backtrace(prof_unwind_callback, &data);
|
||||||
}
|
}
|
||||||
#elif defined(JEMALLOC_PROF_LIBUNWIND)
|
#elif defined(JEMALLOC_PROF_LIBUNWIND)
|
||||||
static void
|
void
|
||||||
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
||||||
{
|
{
|
||||||
unw_context_t uc;
|
unw_context_t uc;
|
||||||
@ -328,7 +236,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void
|
void
|
||||||
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
||||||
{
|
{
|
||||||
#define NIGNORE 3
|
#define NIGNORE 3
|
||||||
@ -509,32 +417,23 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static prof_thr_cnt_t *
|
prof_thr_cnt_t *
|
||||||
prof_lookup(prof_bt_t *bt)
|
prof_lookup(prof_bt_t *bt)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
prof_thr_cnt_t *p;
|
prof_thr_cnt_t *p;
|
||||||
void *v;
|
void *v;
|
||||||
} ret;
|
} ret;
|
||||||
prof_tcache_t *prof_tcache = PROF_TCACHE_GET();
|
prof_tdata_t *prof_tdata;
|
||||||
|
|
||||||
if (prof_tcache == NULL) {
|
prof_tdata = PROF_TCACHE_GET();
|
||||||
/* Initialize an empty cache for this thread. */
|
if (prof_tdata == NULL) {
|
||||||
prof_tcache = (prof_tcache_t *)imalloc(sizeof(prof_tcache_t));
|
prof_tdata = prof_tdata_init();
|
||||||
if (prof_tcache == NULL)
|
if (prof_tdata == NULL)
|
||||||
return (NULL);
|
|
||||||
|
|
||||||
if (ckh_new(&prof_tcache->bt2cnt, PROF_CKH_MINITEMS,
|
|
||||||
prof_bt_hash, prof_bt_keycomp)) {
|
|
||||||
idalloc(prof_tcache);
|
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
ql_new(&prof_tcache->lru_ql);
|
|
||||||
|
|
||||||
PROF_TCACHE_SET(prof_tcache);
|
if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
|
||||||
}
|
|
||||||
|
|
||||||
if (ckh_search(&prof_tcache->bt2cnt, bt, NULL, &ret.v)) {
|
|
||||||
union {
|
union {
|
||||||
prof_bt_t *p;
|
prof_bt_t *p;
|
||||||
void *v;
|
void *v;
|
||||||
@ -588,23 +487,23 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
prof_leave();
|
prof_leave();
|
||||||
|
|
||||||
/* Link a prof_thd_cnt_t into ctx for this thread. */
|
/* Link a prof_thd_cnt_t into ctx for this thread. */
|
||||||
if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tcache->bt2cnt)
|
if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
|
||||||
== (ZU(1) << opt_lg_prof_tcmax)) {
|
== (ZU(1) << opt_lg_prof_tcmax)) {
|
||||||
assert(ckh_count(&prof_tcache->bt2cnt) > 0);
|
assert(ckh_count(&prof_tdata->bt2cnt) > 0);
|
||||||
/*
|
/*
|
||||||
* Flush the least least recently used cnt in order to
|
* Flush the least least recently used cnt in order to
|
||||||
* keep bt2cnt from becoming too large.
|
* keep bt2cnt from becoming too large.
|
||||||
*/
|
*/
|
||||||
ret.p = ql_last(&prof_tcache->lru_ql, lru_link);
|
ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
|
||||||
assert(ret.v != NULL);
|
assert(ret.v != NULL);
|
||||||
ckh_remove(&prof_tcache->bt2cnt, ret.p->ctx->bt, NULL,
|
ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL,
|
||||||
NULL);
|
NULL);
|
||||||
ql_remove(&prof_tcache->lru_ql, ret.p, lru_link);
|
ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
|
||||||
prof_ctx_merge(ret.p->ctx, ret.p);
|
prof_ctx_merge(ret.p->ctx, ret.p);
|
||||||
/* ret can now be re-used. */
|
/* ret can now be re-used. */
|
||||||
} else {
|
} else {
|
||||||
assert(opt_lg_prof_tcmax < 0 ||
|
assert(opt_lg_prof_tcmax < 0 ||
|
||||||
ckh_count(&prof_tcache->bt2cnt) < (ZU(1) <<
|
ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
|
||||||
opt_lg_prof_tcmax));
|
opt_lg_prof_tcmax));
|
||||||
/* Allocate and partially initialize a new cnt. */
|
/* Allocate and partially initialize a new cnt. */
|
||||||
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
ret.v = imalloc(sizeof(prof_thr_cnt_t));
|
||||||
@ -617,325 +516,22 @@ prof_lookup(prof_bt_t *bt)
|
|||||||
ret.p->ctx = ctx.p;
|
ret.p->ctx = ctx.p;
|
||||||
ret.p->epoch = 0;
|
ret.p->epoch = 0;
|
||||||
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
|
||||||
if (ckh_insert(&prof_tcache->bt2cnt, btkey.v, ret.v)) {
|
if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
|
||||||
idalloc(ret.v);
|
idalloc(ret.v);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
ql_head_insert(&prof_tcache->lru_ql, ret.p, lru_link);
|
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
|
||||||
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
|
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
|
||||||
malloc_mutex_unlock(&ctx.p->lock);
|
malloc_mutex_unlock(&ctx.p->lock);
|
||||||
} else {
|
} else {
|
||||||
/* Move ret to the front of the LRU. */
|
/* Move ret to the front of the LRU. */
|
||||||
ql_remove(&prof_tcache->lru_ql, ret.p, lru_link);
|
ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
|
||||||
ql_head_insert(&prof_tcache->lru_ql, ret.p, lru_link);
|
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (ret.p);
|
return (ret.p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
|
||||||
prof_sample_threshold_update(void)
|
|
||||||
{
|
|
||||||
uint64_t r;
|
|
||||||
double u;
|
|
||||||
prof_sample_state_t *prof_sample_state;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute prof_sample_threshold as a geometrically distributed random
|
|
||||||
* variable with mean (2^opt_lg_prof_sample).
|
|
||||||
*/
|
|
||||||
PROF_SAMPLE_STATE_GET(prof_sample_state);
|
|
||||||
prn64(r, 53, prof_sample_state->prn_state,
|
|
||||||
(uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
|
|
||||||
u = (double)r * (1.0/9007199254740992.0L);
|
|
||||||
prof_sample_state->threshold = (uint64_t)(log(u) /
|
|
||||||
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
|
|
||||||
+ (uint64_t)1U;
|
|
||||||
}
|
|
||||||
|
|
||||||
prof_thr_cnt_t *
|
|
||||||
prof_alloc_prep(size_t size)
|
|
||||||
{
|
|
||||||
prof_thr_cnt_t *ret;
|
|
||||||
void **vec;
|
|
||||||
prof_bt_t bt;
|
|
||||||
|
|
||||||
assert(size == s2u(size));
|
|
||||||
|
|
||||||
vec = VEC_GET();
|
|
||||||
if (vec == NULL) {
|
|
||||||
vec = imalloc(sizeof(void *) * prof_bt_max);
|
|
||||||
if (vec == NULL)
|
|
||||||
return (NULL);
|
|
||||||
VEC_SET(vec);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opt_prof_active == false) {
|
|
||||||
/* Sampling is currently inactive, so avoid sampling. */
|
|
||||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
|
||||||
} else if (opt_lg_prof_sample == 0) {
|
|
||||||
/*
|
|
||||||
* Don't bother with sampling logic, since sampling interval is
|
|
||||||
* 1.
|
|
||||||
*/
|
|
||||||
bt_init(&bt, vec);
|
|
||||||
prof_backtrace(&bt, 2, prof_bt_max);
|
|
||||||
ret = prof_lookup(&bt);
|
|
||||||
} else {
|
|
||||||
prof_sample_state_t *prof_sample_state;
|
|
||||||
|
|
||||||
PROF_SAMPLE_STATE_GET(prof_sample_state);
|
|
||||||
if (prof_sample_state->threshold == 0) {
|
|
||||||
/*
|
|
||||||
* Initialize. Seed the prng differently for each
|
|
||||||
* thread.
|
|
||||||
*/
|
|
||||||
prof_sample_state->prn_state =
|
|
||||||
(uint64_t)(uintptr_t)&size;
|
|
||||||
prof_sample_threshold_update();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine whether to capture a backtrace based on whether
|
|
||||||
* size is enough for prof_accum to reach
|
|
||||||
* prof_sample_state->threshold. However, delay updating these
|
|
||||||
* variables until prof_{m,re}alloc(), because we don't know
|
|
||||||
* for sure that the allocation will succeed.
|
|
||||||
*
|
|
||||||
* Use subtraction rather than addition to avoid potential
|
|
||||||
* integer overflow.
|
|
||||||
*/
|
|
||||||
if (size >= prof_sample_state->threshold -
|
|
||||||
prof_sample_state->accum) {
|
|
||||||
bt_init(&bt, vec);
|
|
||||||
prof_backtrace(&bt, 2, prof_bt_max);
|
|
||||||
ret = prof_lookup(&bt);
|
|
||||||
} else
|
|
||||||
ret = (prof_thr_cnt_t *)(uintptr_t)1U;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
prof_ctx_t *
|
|
||||||
prof_ctx_get(const void *ptr)
|
|
||||||
{
|
|
||||||
prof_ctx_t *ret;
|
|
||||||
arena_chunk_t *chunk;
|
|
||||||
|
|
||||||
assert(ptr != NULL);
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
||||||
if (chunk != ptr) {
|
|
||||||
/* Region. */
|
|
||||||
assert(chunk->arena->magic == ARENA_MAGIC);
|
|
||||||
|
|
||||||
ret = arena_prof_ctx_get(ptr);
|
|
||||||
} else
|
|
||||||
ret = huge_prof_ctx_get(ptr);
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
|
|
||||||
{
|
|
||||||
arena_chunk_t *chunk;
|
|
||||||
|
|
||||||
assert(ptr != NULL);
|
|
||||||
|
|
||||||
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
||||||
if (chunk != ptr) {
|
|
||||||
/* Region. */
|
|
||||||
assert(chunk->arena->magic == ARENA_MAGIC);
|
|
||||||
|
|
||||||
arena_prof_ctx_set(ptr, ctx);
|
|
||||||
} else
|
|
||||||
huge_prof_ctx_set(ptr, ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
prof_sample_accum_update(size_t size)
|
|
||||||
{
|
|
||||||
prof_sample_state_t *prof_sample_state;
|
|
||||||
|
|
||||||
/* Sampling logic is unnecessary if the interval is 1. */
|
|
||||||
assert(opt_lg_prof_sample != 0);
|
|
||||||
|
|
||||||
/* Take care to avoid integer overflow. */
|
|
||||||
PROF_SAMPLE_STATE_GET(prof_sample_state);
|
|
||||||
if (size >= prof_sample_state->threshold - prof_sample_state->accum) {
|
|
||||||
prof_sample_state->accum -= (prof_sample_state->threshold -
|
|
||||||
size);
|
|
||||||
/* Compute new prof_sample_threshold. */
|
|
||||||
prof_sample_threshold_update();
|
|
||||||
while (prof_sample_state->accum >=
|
|
||||||
prof_sample_state->threshold) {
|
|
||||||
prof_sample_state->accum -=
|
|
||||||
prof_sample_state->threshold;
|
|
||||||
prof_sample_threshold_update();
|
|
||||||
}
|
|
||||||
return (false);
|
|
||||||
} else {
|
|
||||||
prof_sample_state->accum += size;
|
|
||||||
return (true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
|
|
||||||
{
|
|
||||||
|
|
||||||
assert(ptr != NULL);
|
|
||||||
assert(size == s2u(size));
|
|
||||||
|
|
||||||
if (opt_lg_prof_sample != 0) {
|
|
||||||
if (prof_sample_accum_update(size)) {
|
|
||||||
/*
|
|
||||||
* Don't sample. For malloc()-like allocation, it is
|
|
||||||
* always possible to tell in advance how large an
|
|
||||||
* object's usable size will be, so there should never
|
|
||||||
* be a difference between the size passed to
|
|
||||||
* prof_alloc_prep() and prof_malloc().
|
|
||||||
*/
|
|
||||||
assert(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
|
||||||
prof_ctx_set(ptr, cnt->ctx);
|
|
||||||
|
|
||||||
cnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
cnt->cnts.curobjs++;
|
|
||||||
cnt->cnts.curbytes += size;
|
|
||||||
if (opt_prof_accum) {
|
|
||||||
cnt->cnts.accumobjs++;
|
|
||||||
cnt->cnts.accumbytes += size;
|
|
||||||
}
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
cnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
} else
|
|
||||||
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
|
|
||||||
const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx)
|
|
||||||
{
|
|
||||||
prof_thr_cnt_t *told_cnt;
|
|
||||||
|
|
||||||
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
|
|
||||||
|
|
||||||
if (ptr != NULL) {
|
|
||||||
if (opt_lg_prof_sample != 0) {
|
|
||||||
if (prof_sample_accum_update(size)) {
|
|
||||||
/*
|
|
||||||
* Don't sample. The size passed to
|
|
||||||
* prof_alloc_prep() was larger than what
|
|
||||||
* actually got allocated., so a backtrace was
|
|
||||||
* captured for this allocation, even though
|
|
||||||
* its actual size was insufficient to cross
|
|
||||||
* the sample threshold.
|
|
||||||
*/
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((uintptr_t)old_ctx > (uintptr_t)1U) {
|
|
||||||
told_cnt = prof_lookup(old_ctx->bt);
|
|
||||||
if (told_cnt == NULL) {
|
|
||||||
/*
|
|
||||||
* It's too late to propagate OOM for this realloc(),
|
|
||||||
* so operate directly on old_cnt->ctx->cnt_merged.
|
|
||||||
*/
|
|
||||||
malloc_mutex_lock(&old_ctx->lock);
|
|
||||||
old_ctx->cnt_merged.curobjs--;
|
|
||||||
old_ctx->cnt_merged.curbytes -= old_size;
|
|
||||||
malloc_mutex_unlock(&old_ctx->lock);
|
|
||||||
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
|
|
||||||
|
|
||||||
if ((uintptr_t)told_cnt > (uintptr_t)1U)
|
|
||||||
told_cnt->epoch++;
|
|
||||||
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
|
||||||
prof_ctx_set(ptr, cnt->ctx);
|
|
||||||
cnt->epoch++;
|
|
||||||
} else
|
|
||||||
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
if ((uintptr_t)told_cnt > (uintptr_t)1U) {
|
|
||||||
told_cnt->cnts.curobjs--;
|
|
||||||
told_cnt->cnts.curbytes -= old_size;
|
|
||||||
}
|
|
||||||
if ((uintptr_t)cnt > (uintptr_t)1U) {
|
|
||||||
cnt->cnts.curobjs++;
|
|
||||||
cnt->cnts.curbytes += size;
|
|
||||||
if (opt_prof_accum) {
|
|
||||||
cnt->cnts.accumobjs++;
|
|
||||||
cnt->cnts.accumbytes += size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
if ((uintptr_t)told_cnt > (uintptr_t)1U)
|
|
||||||
told_cnt->epoch++;
|
|
||||||
if ((uintptr_t)cnt > (uintptr_t)1U)
|
|
||||||
cnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write(); /* Not strictly necessary. */
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
prof_free(const void *ptr)
|
|
||||||
{
|
|
||||||
prof_ctx_t *ctx = prof_ctx_get(ptr);
|
|
||||||
|
|
||||||
if ((uintptr_t)ctx > (uintptr_t)1) {
|
|
||||||
size_t size = isalloc(ptr);
|
|
||||||
prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
|
|
||||||
|
|
||||||
if (tcnt != NULL) {
|
|
||||||
tcnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
tcnt->cnts.curobjs--;
|
|
||||||
tcnt->cnts.curbytes -= size;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
tcnt->epoch++;
|
|
||||||
/*********/
|
|
||||||
mb_write();
|
|
||||||
/*********/
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* OOM during free() cannot be propagated, so operate
|
|
||||||
* directly on cnt->ctx->cnt_merged.
|
|
||||||
*/
|
|
||||||
malloc_mutex_lock(&ctx->lock);
|
|
||||||
ctx->cnt_merged.curobjs--;
|
|
||||||
ctx->cnt_merged.curbytes -= size;
|
|
||||||
malloc_mutex_unlock(&ctx->lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
prof_flush(bool propagate_err)
|
prof_flush(bool propagate_err)
|
||||||
{
|
{
|
||||||
@ -1468,60 +1064,72 @@ prof_bt_keycomp(const void *k1, const void *k2)
|
|||||||
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
|
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
prof_tdata_t *
|
||||||
prof_tcache_cleanup(void *arg)
|
prof_tdata_init(void)
|
||||||
{
|
{
|
||||||
prof_tcache_t *prof_tcache;
|
prof_tdata_t *prof_tdata;
|
||||||
|
|
||||||
prof_tcache = PROF_TCACHE_GET();
|
/* Initialize an empty cache for this thread. */
|
||||||
if (prof_tcache != NULL) {
|
prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
|
||||||
|
if (prof_tdata == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
|
||||||
|
prof_bt_hash, prof_bt_keycomp)) {
|
||||||
|
idalloc(prof_tdata);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
ql_new(&prof_tdata->lru_ql);
|
||||||
|
|
||||||
|
prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
|
||||||
|
if (prof_tdata->vec == NULL) {
|
||||||
|
|
||||||
|
ckh_delete(&prof_tdata->bt2cnt);
|
||||||
|
idalloc(prof_tdata);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
prof_tdata->prn_state = 0;
|
||||||
|
prof_tdata->threshold = 0;
|
||||||
|
prof_tdata->accum = 0;
|
||||||
|
|
||||||
|
PROF_TCACHE_SET(prof_tdata);
|
||||||
|
|
||||||
|
return (prof_tdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
prof_tdata_cleanup(void *arg)
|
||||||
|
{
|
||||||
|
prof_tdata_t *prof_tdata;
|
||||||
|
|
||||||
|
prof_tdata = PROF_TCACHE_GET();
|
||||||
|
if (prof_tdata != NULL) {
|
||||||
prof_thr_cnt_t *cnt;
|
prof_thr_cnt_t *cnt;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete the hash table. All of its contents can still be
|
* Delete the hash table. All of its contents can still be
|
||||||
* iterated over via the LRU.
|
* iterated over via the LRU.
|
||||||
*/
|
*/
|
||||||
ckh_delete(&prof_tcache->bt2cnt);
|
ckh_delete(&prof_tdata->bt2cnt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iteratively merge cnt's into the global stats and delete
|
* Iteratively merge cnt's into the global stats and delete
|
||||||
* them.
|
* them.
|
||||||
*/
|
*/
|
||||||
while ((cnt = ql_last(&prof_tcache->lru_ql, lru_link)) !=
|
while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
|
||||||
NULL) {
|
|
||||||
prof_ctx_merge(cnt->ctx, cnt);
|
prof_ctx_merge(cnt->ctx, cnt);
|
||||||
ql_remove(&prof_tcache->lru_ql, cnt, lru_link);
|
ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
|
||||||
idalloc(cnt);
|
idalloc(cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
idalloc(prof_tcache);
|
idalloc(prof_tdata->vec);
|
||||||
|
|
||||||
|
idalloc(prof_tdata);
|
||||||
PROF_TCACHE_SET(NULL);
|
PROF_TCACHE_SET(NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
vec_cleanup(void *arg)
|
|
||||||
{
|
|
||||||
void **vec;
|
|
||||||
|
|
||||||
vec = VEC_GET();
|
|
||||||
if (vec != NULL) {
|
|
||||||
idalloc(vec);
|
|
||||||
VEC_SET(NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef NO_TLS
|
|
||||||
static void
|
|
||||||
prof_sample_state_thread_cleanup(void *arg)
|
|
||||||
{
|
|
||||||
prof_sample_state_t *prof_sample_state = (prof_sample_state_t *)arg;
|
|
||||||
|
|
||||||
if (prof_sample_state != &prof_sample_state_oom)
|
|
||||||
idalloc(prof_sample_state);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void
|
void
|
||||||
prof_boot0(void)
|
prof_boot0(void)
|
||||||
{
|
{
|
||||||
@ -1560,25 +1168,12 @@ prof_boot1(void)
|
|||||||
return (true);
|
return (true);
|
||||||
if (malloc_mutex_init(&bt2ctx_mtx))
|
if (malloc_mutex_init(&bt2ctx_mtx))
|
||||||
return (true);
|
return (true);
|
||||||
if (pthread_key_create(&prof_tcache_tsd, prof_tcache_cleanup)
|
if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
|
||||||
!= 0) {
|
!= 0) {
|
||||||
malloc_write(
|
malloc_write(
|
||||||
"<jemalloc>: Error in pthread_key_create()\n");
|
"<jemalloc>: Error in pthread_key_create()\n");
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if (pthread_key_create(&vec_tsd, vec_cleanup) != 0) {
|
|
||||||
malloc_write(
|
|
||||||
"<jemalloc>: Error in pthread_key_create()\n");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
#ifdef NO_TLS
|
|
||||||
if (pthread_key_create(&prof_sample_state_tsd,
|
|
||||||
prof_sample_state_thread_cleanup) != 0) {
|
|
||||||
malloc_write(
|
|
||||||
"<jemalloc>: Error in pthread_key_create()\n");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
prof_bt_max = (1U << opt_lg_prof_bt_max);
|
prof_bt_max = (1U << opt_lg_prof_bt_max);
|
||||||
if (malloc_mutex_init(&prof_dump_seq_mtx))
|
if (malloc_mutex_init(&prof_dump_seq_mtx))
|
||||||
|
Loading…
Reference in New Issue
Block a user