809b0ac391
This cleans up the fast path a bit more by moving away more code.
499 lines
14 KiB
C
499 lines
14 KiB
C
/******************************************************************************/
|
|
#ifdef JEMALLOC_H_TYPES
|
|
|
|
typedef struct prof_bt_s prof_bt_t;
|
|
typedef struct prof_cnt_s prof_cnt_t;
|
|
typedef struct prof_tctx_s prof_tctx_t;
|
|
typedef struct prof_gctx_s prof_gctx_t;
|
|
typedef struct prof_tdata_s prof_tdata_t;
|
|
|
|
/* Option defaults. */
|
|
#ifdef JEMALLOC_PROF
|
|
# define PROF_PREFIX_DEFAULT "jeprof"
|
|
#else
|
|
# define PROF_PREFIX_DEFAULT ""
|
|
#endif
|
|
#define LG_PROF_SAMPLE_DEFAULT 19
|
|
#define LG_PROF_INTERVAL_DEFAULT -1
|
|
|
|
/*
|
|
* Hard limit on stack backtrace depth. The version of prof_backtrace() that
|
|
* is based on __builtin_return_address() necessarily has a hard-coded number
|
|
* of backtrace frame handlers, and should be kept in sync with this setting.
|
|
*/
|
|
#define PROF_BT_MAX 128
|
|
|
|
/* Initial hash table size. */
|
|
#define PROF_CKH_MINITEMS 64
|
|
|
|
/* Size of memory buffer to use when writing dump files. */
|
|
#define PROF_DUMP_BUFSIZE 65536
|
|
|
|
/* Size of stack-allocated buffer used by prof_printf(). */
|
|
#define PROF_PRINTF_BUFSIZE 128
|
|
|
|
/*
|
|
* Number of mutexes shared among all gctx's. No space is allocated for these
|
|
* unless profiling is enabled, so it's okay to over-provision.
|
|
*/
|
|
#define PROF_NCTX_LOCKS 1024
|
|
|
|
/*
|
|
* Number of mutexes shared among all tdata's. No space is allocated for these
|
|
* unless profiling is enabled, so it's okay to over-provision.
|
|
*/
|
|
#define PROF_NTDATA_LOCKS 256
|
|
|
|
/*
|
|
* prof_tdata pointers close to NULL are used to encode state information that
|
|
* is used for cleaning up during thread shutdown.
|
|
*/
|
|
#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
|
|
#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
|
|
#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
|
|
|
|
#endif /* JEMALLOC_H_TYPES */
|
|
/******************************************************************************/
|
|
#ifdef JEMALLOC_H_STRUCTS
|
|
|
|
struct prof_bt_s {
|
|
/* Backtrace, stored as len program counters. */
|
|
void **vec;
|
|
unsigned len;
|
|
};
|
|
|
|
#ifdef JEMALLOC_PROF_LIBGCC
|
|
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
|
|
typedef struct {
|
|
prof_bt_t *bt;
|
|
unsigned max;
|
|
} prof_unwind_data_t;
|
|
#endif
|
|
|
|
struct prof_cnt_s {
|
|
/* Profiling counters. */
|
|
uint64_t curobjs;
|
|
uint64_t curbytes;
|
|
uint64_t accumobjs;
|
|
uint64_t accumbytes;
|
|
};
|
|
|
|
typedef enum {
|
|
prof_tctx_state_initializing,
|
|
prof_tctx_state_nominal,
|
|
prof_tctx_state_dumping,
|
|
prof_tctx_state_purgatory /* Dumper must finish destroying. */
|
|
} prof_tctx_state_t;
|
|
|
|
struct prof_tctx_s {
|
|
/* Thread data for thread that performed the allocation. */
|
|
prof_tdata_t *tdata;
|
|
|
|
/*
|
|
* Copy of tdata->thr_uid, necessary because tdata may be defunct during
|
|
* teardown.
|
|
*/
|
|
uint64_t thr_uid;
|
|
|
|
/* Profiling counters, protected by tdata->lock. */
|
|
prof_cnt_t cnts;
|
|
|
|
/* Associated global context. */
|
|
prof_gctx_t *gctx;
|
|
|
|
/* Linkage into gctx's tctxs. */
|
|
rb_node(prof_tctx_t) tctx_link;
|
|
|
|
/*
|
|
* True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
|
|
* sample vs destroy race.
|
|
*/
|
|
bool prepared;
|
|
|
|
/* Current dump-related state, protected by gctx->lock. */
|
|
prof_tctx_state_t state;
|
|
|
|
/*
|
|
* Copy of cnts snapshotted during early dump phase, protected by
|
|
* dump_mtx.
|
|
*/
|
|
prof_cnt_t dump_cnts;
|
|
};
|
|
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
|
|
|
|
struct prof_gctx_s {
|
|
/* Protects nlimbo, cnt_summed, and tctxs. */
|
|
malloc_mutex_t *lock;
|
|
|
|
/*
|
|
* Number of threads that currently cause this gctx to be in a state of
|
|
* limbo due to one of:
|
|
* - Initializing this gctx.
|
|
* - Initializing per thread counters associated with this gctx.
|
|
* - Preparing to destroy this gctx.
|
|
* - Dumping a heap profile that includes this gctx.
|
|
* nlimbo must be 1 (single destroyer) in order to safely destroy the
|
|
* gctx.
|
|
*/
|
|
unsigned nlimbo;
|
|
|
|
/*
|
|
* Tree of profile counters, one for each thread that has allocated in
|
|
* this context.
|
|
*/
|
|
prof_tctx_tree_t tctxs;
|
|
|
|
/* Linkage for tree of contexts to be dumped. */
|
|
rb_node(prof_gctx_t) dump_link;
|
|
|
|
/* Temporary storage for summation during dump. */
|
|
prof_cnt_t cnt_summed;
|
|
|
|
/* Associated backtrace. */
|
|
prof_bt_t bt;
|
|
|
|
/* Backtrace vector, variable size, referred to by bt. */
|
|
void *vec[1];
|
|
};
|
|
typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
|
|
|
|
struct prof_tdata_s {
|
|
malloc_mutex_t *lock;
|
|
|
|
/* Monotonically increasing unique thread identifier. */
|
|
uint64_t thr_uid;
|
|
|
|
/*
|
|
* Monotonically increasing discriminator among tdata structures
|
|
* associated with the same thr_uid.
|
|
*/
|
|
uint64_t thr_discrim;
|
|
|
|
/* Included in heap profile dumps if non-NULL. */
|
|
char *thread_name;
|
|
|
|
bool attached;
|
|
bool expired;
|
|
|
|
rb_node(prof_tdata_t) tdata_link;
|
|
|
|
/*
|
|
* Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
|
|
* backtraces for which it has non-zero allocation/deallocation counters
|
|
* associated with thread-specific prof_tctx_t objects. Other threads
|
|
* may write to prof_tctx_t contents when freeing associated objects.
|
|
*/
|
|
ckh_t bt2tctx;
|
|
|
|
/* Sampling state. */
|
|
uint64_t prng_state;
|
|
uint64_t bytes_until_sample;
|
|
|
|
/* State used to avoid dumping while operating on prof internals. */
|
|
bool enq;
|
|
bool enq_idump;
|
|
bool enq_gdump;
|
|
|
|
/*
|
|
* Set to true during an early dump phase for tdata's which are
|
|
* currently being dumped. New threads' tdata's have this initialized
|
|
* to false so that they aren't accidentally included in later dump
|
|
* phases.
|
|
*/
|
|
bool dumping;
|
|
|
|
/*
|
|
* True if profiling is active for this tdata's thread
|
|
* (thread.prof.active mallctl).
|
|
*/
|
|
bool active;
|
|
|
|
/* Temporary storage for summation during dump. */
|
|
prof_cnt_t cnt_summed;
|
|
|
|
/* Backtrace vector, used for calls to prof_backtrace(). */
|
|
void *vec[PROF_BT_MAX];
|
|
};
|
|
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
|
|
|
|
#endif /* JEMALLOC_H_STRUCTS */
|
|
/******************************************************************************/
|
|
#ifdef JEMALLOC_H_EXTERNS
|
|
|
|
extern bool opt_prof;
|
|
extern bool opt_prof_active;
|
|
extern bool opt_prof_thread_active_init;
|
|
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
|
|
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
|
|
extern bool opt_prof_gdump; /* High-water memory dumping. */
|
|
extern bool opt_prof_final; /* Final profile dumping. */
|
|
extern bool opt_prof_leak; /* Dump leak summary at exit. */
|
|
extern bool opt_prof_accum; /* Report cumulative bytes. */
|
|
extern char opt_prof_prefix[
|
|
/* Minimize memory bloat for non-prof builds. */
|
|
#ifdef JEMALLOC_PROF
|
|
PATH_MAX +
|
|
#endif
|
|
1];
|
|
|
|
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
|
|
extern bool prof_active;
|
|
|
|
/*
|
|
* Profile dump interval, measured in bytes allocated. Each arena triggers a
|
|
* profile dump when it reaches this threshold. The effect is that the
|
|
* interval between profile dumps averages prof_interval, though the actual
|
|
* interval between dumps will tend to be sporadic, and the interval will be a
|
|
* maximum of approximately (prof_interval * narenas).
|
|
*/
|
|
extern uint64_t prof_interval;
|
|
|
|
/*
|
|
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
|
|
* resets.
|
|
*/
|
|
extern size_t lg_prof_sample;
|
|
|
|
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
|
|
void prof_malloc_sample_object(const void *ptr, size_t usize,
|
|
prof_tctx_t *tctx);
|
|
void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
|
|
void bt_init(prof_bt_t *bt, void **vec);
|
|
void prof_backtrace(prof_bt_t *bt);
|
|
prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
|
|
#ifdef JEMALLOC_JET
|
|
size_t prof_tdata_count(void);
|
|
size_t prof_bt_count(void);
|
|
const prof_cnt_t *prof_cnt_all(void);
|
|
typedef int (prof_dump_open_t)(bool, const char *);
|
|
extern prof_dump_open_t *prof_dump_open;
|
|
typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
|
|
extern prof_dump_header_t *prof_dump_header;
|
|
#endif
|
|
void prof_idump(void);
|
|
bool prof_mdump(const char *filename);
|
|
void prof_gdump(void);
|
|
prof_tdata_t *prof_tdata_init(tsd_t *tsd);
|
|
prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
|
|
void prof_reset(tsd_t *tsd, size_t lg_sample);
|
|
void prof_tdata_cleanup(tsd_t *tsd);
|
|
const char *prof_thread_name_get(void);
|
|
bool prof_active_get(void);
|
|
bool prof_active_set(bool active);
|
|
int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
|
|
bool prof_thread_active_get(void);
|
|
bool prof_thread_active_set(bool active);
|
|
bool prof_thread_active_init_get(void);
|
|
bool prof_thread_active_init_set(bool active_init);
|
|
void prof_boot0(void);
|
|
void prof_boot1(void);
|
|
bool prof_boot2(void);
|
|
void prof_prefork(void);
|
|
void prof_postfork_parent(void);
|
|
void prof_postfork_child(void);
|
|
void prof_sample_threshold_update(prof_tdata_t *tdata);
|
|
|
|
#endif /* JEMALLOC_H_EXTERNS */
|
|
/******************************************************************************/
|
|
#ifdef JEMALLOC_H_INLINES
|
|
|
|
#ifndef JEMALLOC_ENABLE_INLINE
|
|
bool prof_active_get_unlocked(void);
|
|
prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
|
|
bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
|
|
prof_tdata_t **tdata_out);
|
|
prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
|
|
prof_tctx_t *prof_tctx_get(const void *ptr);
|
|
void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
|
|
void prof_malloc_sample_object(const void *ptr, size_t usize,
|
|
prof_tctx_t *tctx);
|
|
void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
|
|
void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
|
|
prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
|
|
void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
|
|
#endif
|
|
|
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
|
|
JEMALLOC_ALWAYS_INLINE bool
|
|
prof_active_get_unlocked(void)
|
|
{
|
|
|
|
/*
|
|
* Even if opt_prof is true, sampling can be temporarily disabled by
|
|
* setting prof_active to false. No locking is used when reading
|
|
* prof_active in the fast path, so there are no guarantees regarding
|
|
* how long it will take for all threads to notice state changes.
|
|
*/
|
|
return (prof_active);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE prof_tdata_t *
|
|
prof_tdata_get(tsd_t *tsd, bool create)
|
|
{
|
|
prof_tdata_t *tdata;
|
|
|
|
cassert(config_prof);
|
|
|
|
tdata = tsd_prof_tdata_get(tsd);
|
|
if (create) {
|
|
if (unlikely(tdata == NULL)) {
|
|
if (tsd_nominal(tsd)) {
|
|
tdata = prof_tdata_init(tsd);
|
|
tsd_prof_tdata_set(tsd, tdata);
|
|
}
|
|
} else if (unlikely(tdata->expired)) {
|
|
tdata = prof_tdata_reinit(tsd, tdata);
|
|
tsd_prof_tdata_set(tsd, tdata);
|
|
}
|
|
assert(tdata == NULL || tdata->attached);
|
|
}
|
|
|
|
return (tdata);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
|
|
prof_tctx_get(const void *ptr)
|
|
{
|
|
prof_tctx_t *ret;
|
|
arena_chunk_t *chunk;
|
|
|
|
cassert(config_prof);
|
|
assert(ptr != NULL);
|
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
if (likely(chunk != ptr)) {
|
|
/* Region. */
|
|
ret = arena_prof_tctx_get(ptr);
|
|
} else
|
|
ret = huge_prof_tctx_get(ptr);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
|
|
{
|
|
arena_chunk_t *chunk;
|
|
|
|
cassert(config_prof);
|
|
assert(ptr != NULL);
|
|
|
|
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
|
|
if (likely(chunk != ptr)) {
|
|
/* Region. */
|
|
arena_prof_tctx_set(ptr, tctx);
|
|
} else
|
|
huge_prof_tctx_set(ptr, tctx);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE bool
|
|
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
|
|
prof_tdata_t **tdata_out)
|
|
{
|
|
prof_tdata_t *tdata;
|
|
|
|
cassert(config_prof);
|
|
|
|
tdata = prof_tdata_get(tsd, true);
|
|
if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
|
|
tdata = NULL;
|
|
|
|
if (tdata_out != NULL)
|
|
*tdata_out = tdata;
|
|
|
|
if (tdata == NULL)
|
|
return (true);
|
|
|
|
if (tdata->bytes_until_sample >= usize) {
|
|
if (update)
|
|
tdata->bytes_until_sample -= usize;
|
|
return (true);
|
|
} else {
|
|
/* Compute new sample threshold. */
|
|
if (update)
|
|
prof_sample_threshold_update(tdata);
|
|
return (!tdata->active);
|
|
}
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
|
|
prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
|
|
{
|
|
prof_tctx_t *ret;
|
|
prof_tdata_t *tdata;
|
|
prof_bt_t bt;
|
|
|
|
assert(usize == s2u(usize));
|
|
|
|
if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
|
|
usize, update, &tdata)))
|
|
ret = (prof_tctx_t *)(uintptr_t)1U;
|
|
else {
|
|
bt_init(&bt, tdata->vec);
|
|
prof_backtrace(&bt);
|
|
ret = prof_lookup(tsd, &bt);
|
|
}
|
|
|
|
return (ret);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
|
|
{
|
|
|
|
cassert(config_prof);
|
|
assert(ptr != NULL);
|
|
assert(usize == isalloc(ptr, true));
|
|
|
|
if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
|
|
prof_malloc_sample_object(ptr, usize, tctx);
|
|
else
|
|
prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
|
|
bool updated, size_t old_usize, prof_tctx_t *old_tctx)
|
|
{
|
|
|
|
cassert(config_prof);
|
|
assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
|
|
|
|
if (!updated && ptr != NULL) {
|
|
assert(usize == isalloc(ptr, true));
|
|
if (prof_sample_accum_update(tsd, usize, true, NULL)) {
|
|
/*
|
|
* Don't sample. The usize passed to PROF_ALLOC_PREP()
|
|
* was larger than what actually got allocated, so a
|
|
* backtrace was captured for this allocation, even
|
|
* though its actual usize was insufficient to cross the
|
|
* sample threshold.
|
|
*/
|
|
tctx = (prof_tctx_t *)(uintptr_t)1U;
|
|
}
|
|
}
|
|
|
|
if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
|
|
prof_free_sampled_object(tsd, old_usize, old_tctx);
|
|
if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
|
|
prof_malloc_sample_object(ptr, usize, tctx);
|
|
else
|
|
prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
|
|
}
|
|
|
|
JEMALLOC_ALWAYS_INLINE void
|
|
prof_free(tsd_t *tsd, const void *ptr, size_t usize)
|
|
{
|
|
prof_tctx_t *tctx = prof_tctx_get(ptr);
|
|
|
|
cassert(config_prof);
|
|
assert(usize == isalloc(ptr, true));
|
|
|
|
if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
|
|
prof_free_sampled_object(tsd, usize, tctx);
|
|
}
|
|
#endif
|
|
|
|
#endif /* JEMALLOC_H_INLINES */
|
|
/******************************************************************************/
|