/******************************************************************************/ #ifdef JEMALLOC_H_TYPES typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_thr_cnt_s prof_thr_cnt_t; typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #ifdef JEMALLOC_PROF # define PROF_PREFIX_DEFAULT "jeprof" #else # define PROF_PREFIX_DEFAULT "" #endif #define LG_PROF_SAMPLE_DEFAULT 19 #define LG_PROF_INTERVAL_DEFAULT -1 /* * Hard limit on stack backtrace depth. The version of prof_backtrace() that * is based on __builtin_return_address() necessarily has a hard-coded number * of backtrace frame handlers, and should be kept in sync with this setting. */ #define PROF_BT_MAX 128 /* Maximum number of backtraces to store in each per thread LRU cache. */ #define PROF_TCMAX 1024 /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 /* Size of memory buffer to use when writing dump files. */ #define PROF_DUMP_BUFSIZE 65536 /* Size of stack-allocated buffer used by prof_printf(). */ #define PROF_PRINTF_BUFSIZE 128 /* * Number of mutexes shared among all ctx's. No space is allocated for these * unless profiling is enabled, so it's okay to over-provision. */ #define PROF_NCTX_LOCKS 1024 /* * prof_tdata pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. */ #define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) #define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) #define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS struct prof_bt_s { /* Backtrace, stored as len program counters. */ void **vec; unsigned len; }; #ifdef JEMALLOC_PROF_LIBGCC /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { prof_bt_t *bt; unsigned nignore; unsigned max; } prof_unwind_data_t; #endif struct prof_cnt_s { /* * Profiling counters. An allocation/deallocation pair can operate on * different prof_thr_cnt_t objects that are linked into the same * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go * negative. In principle it is possible for the *bytes counters to * overflow/underflow, but a general solution would require something * like 128-bit counters; this implementation doesn't bother to solve * that problem. */ int64_t curobjs; int64_t curbytes; uint64_t accumobjs; uint64_t accumbytes; }; struct prof_thr_cnt_s { /* Linkage into prof_ctx_t's cnts_ql. */ ql_elm(prof_thr_cnt_t) cnts_link; /* Linkage into thread's LRU. */ ql_elm(prof_thr_cnt_t) lru_link; /* * Associated context. If a thread frees an object that it did not * allocate, it is possible that the context is not cached in the * thread's hash table, in which case it must be able to look up the * context, insert a new prof_thr_cnt_t into the thread's hash table, * and link it into the prof_ctx_t's cnts_ql. */ prof_ctx_t *ctx; /* * Threads use memory barriers to update the counters. Since there is * only ever one writer, the only challenge is for the reader to get a * consistent read of the counters. * * The writer uses this series of operations: * * 1) Increment epoch to an odd number. * 2) Update counters. * 3) Increment epoch to an even number. * * The reader must assure 1) that the epoch is even while it reads the * counters, and 2) that the epoch doesn't change between the time it * starts and finishes reading the counters. */ unsigned epoch; /* Profiling counters. */ prof_cnt_t cnts; }; struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; /* Protects nlimbo, cnt_merged, and cnts_ql. */ malloc_mutex_t *lock; /* * Number of threads that currently cause this ctx to be in a state of * limbo due to one of: * - Initializing per thread counters associated with this ctx. * - Preparing to destroy this ctx. * - Dumping a heap profile that includes this ctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the * ctx. */ unsigned nlimbo; /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; /* When threads exit, they merge their stats into cnt_merged. */ prof_cnt_t cnt_merged; /* * List of profile counters, one for each thread that has allocated in * this context. */ ql_head(prof_thr_cnt_t) cnts_ql; /* Linkage for list of contexts to be dumped. */ ql_elm(prof_ctx_t) dump_link; }; typedef ql_head(prof_ctx_t) prof_ctx_list_t; struct prof_tdata_s { /* * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a * cache of backtraces, with associated thread-specific prof_thr_cnt_t * objects. Other threads may read the prof_thr_cnt_t contents, but no * others will ever write them. * * Upon thread exit, the thread must merge all the prof_thr_cnt_t * counter data into the associated prof_ctx_t objects, and unlink/free * the prof_thr_cnt_t objects. */ ckh_t bt2cnt; /* LRU for contents of bt2cnt. */ ql_head(prof_thr_cnt_t) lru_ql; /* Backtrace vector, used for calls to prof_backtrace(). */ void **vec; /* Sampling state. */ uint64_t prng_state; uint64_t threshold; uint64_t accum; /* State used to avoid dumping while operating on prof internals. */ bool enq; bool enq_idump; bool enq_gdump; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS extern bool opt_prof; /* * Even if opt_prof is true, sampling can be temporarily disabled by setting * opt_prof_active to false. No locking is used when updating opt_prof_active, * so there are no guarantees regarding how long it will take for all threads * to notice state changes. */ extern bool opt_prof_active; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ extern char opt_prof_prefix[ /* Minimize memory bloat for non-prof builds. */ #ifdef JEMALLOC_PROF PATH_MAX + #endif 1]; /* * Profile dump interval, measured in bytes allocated. Each arena triggers a * profile dump when it reaches this threshold. The effect is that the * interval between profile dumps averages prof_interval, though the actual * interval between dumps will tend to be sporadic, and the interval will be a * maximum of approximately (prof_interval * narenas). */ extern uint64_t prof_interval; void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); #ifdef JEMALLOC_JET size_t prof_bt_count(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; #endif void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); void prof_tdata_cleanup(void *arg); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); void prof_prefork(void); void prof_postfork_parent(void); void prof_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #define PROF_ALLOC_PREP(nignore, size, ret) do { \ prof_tdata_t *prof_tdata; \ prof_bt_t bt; \ \ assert(size == s2u(size)); \ \ prof_tdata = prof_tdata_get(true); \ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ if (prof_tdata != NULL) \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ else \ ret = NULL; \ break; \ } \ \ if (opt_prof_active == false) { \ /* Sampling is currently inactive, so avoid sampling. */\ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ } else if (opt_lg_prof_sample == 0) { \ /* Don't bother with sampling logic, since sampling */\ /* interval is 1. */\ bt_init(&bt, prof_tdata->vec); \ prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else { \ if (prof_tdata->threshold == 0) { \ /* Initialize. Seed the prng differently for */\ /* each thread. */\ prof_tdata->prng_state = \ (uint64_t)(uintptr_t)&size; \ prof_sample_threshold_update(prof_tdata); \ } \ \ /* Determine whether to capture a backtrace based on */\ /* whether size is enough for prof_accum to reach */\ /* prof_tdata->threshold. However, delay updating */\ /* these variables until prof_{m,re}alloc(), because */\ /* we don't know for sure that the allocation will */\ /* succeed. */\ /* */\ /* Use subtraction rather than addition to avoid */\ /* potential integer overflow. */\ if (size >= prof_tdata->threshold - \ prof_tdata->accum) { \ bt_init(&bt, prof_tdata->vec); \ prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ } \ } while (0) #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) prof_tdata_t *prof_tdata_get(bool create); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool prof_sample_accum_update(size_t size); void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, size_t old_usize, prof_ctx_t *old_ctx); void prof_free(const void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) JEMALLOC_INLINE prof_tdata_t * prof_tdata_get(bool create) { prof_tdata_t *prof_tdata; cassert(config_prof); prof_tdata = *prof_tdata_tsd_get(); if (create && prof_tdata == NULL) prof_tdata = prof_tdata_init(); return (prof_tdata); } JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { /* * The body of this function is compiled out unless heap profiling is * enabled, so that it is possible to compile jemalloc with floating * point support completely disabled. Avoiding floating point code is * important on memory-constrained systems, but it also enables a * workaround for versions of glibc that don't properly save/restore * floating point registers during dynamic lazy symbol loading (which * internally calls into whatever malloc implementation happens to be * integrated into the application). Note that some compilers (e.g. * gcc 4.8) may use floating point registers for fast memory moves, so * jemalloc must be compiled with such optimizations disabled (e.g. * -mno-sse) in order for the workaround to be complete. */ #ifdef JEMALLOC_PROF uint64_t r; double u; cassert(config_prof); /* * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). * * __ __ * | log(u) | 1 * prof_tdata->threshold = | -------- |, where p = ------------------- * | log(1-p) | opt_lg_prof_sample * 2 * * For more information on the math, see: * * Non-Uniform Random Variate Generation * Luc Devroye * Springer-Verlag, New York, 1986 * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; #endif } JEMALLOC_INLINE prof_ctx_t * prof_ctx_get(const void *ptr) { prof_ctx_t *ret; arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ ret = arena_prof_ctx_get(ptr); } else ret = huge_prof_ctx_get(ptr); return (ret); } JEMALLOC_INLINE void prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); } JEMALLOC_INLINE bool prof_sample_accum_update(size_t size) { prof_tdata_t *prof_tdata; cassert(config_prof); /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { prof_tdata->accum -= (prof_tdata->threshold - size); /* Compute new sample threshold. */ prof_sample_threshold_update(prof_tdata); while (prof_tdata->accum >= prof_tdata->threshold) { prof_tdata->accum -= prof_tdata->threshold; prof_sample_threshold_update(prof_tdata); } return (false); } else { prof_tdata->accum += size; return (true); } } JEMALLOC_INLINE void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { cassert(config_prof); assert(ptr != NULL); assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(usize)) { /* * Don't sample. For malloc()-like allocation, it is * always possible to tell in advance how large an * object's usable size will be, so there should never * be a difference between the usize passed to * PROF_ALLOC_PREP() and prof_malloc(). */ assert((uintptr_t)cnt == (uintptr_t)1U); } } if ((uintptr_t)cnt > (uintptr_t)1U) { prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; /*********/ mb_write(); /*********/ cnt->cnts.curobjs++; cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; cnt->cnts.accumbytes += usize; } /*********/ mb_write(); /*********/ cnt->epoch++; /*********/ mb_write(); /*********/ } else prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); } JEMALLOC_INLINE void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, size_t old_usize, prof_ctx_t *old_ctx) { prof_thr_cnt_t *told_cnt; cassert(config_prof); assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { assert(usize == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(usize)) { /* * Don't sample. The usize passed to * PROF_ALLOC_PREP() was larger than what * actually got allocated, so a backtrace was * captured for this allocation, even though * its actual usize was insufficient to cross * the sample threshold. */ cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } } if ((uintptr_t)old_ctx > (uintptr_t)1U) { told_cnt = prof_lookup(old_ctx->bt); if (told_cnt == NULL) { /* * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_usize; malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) { prof_ctx_set(ptr, cnt->ctx); cnt->epoch++; } else if (ptr != NULL) prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) { told_cnt->cnts.curobjs--; told_cnt->cnts.curbytes -= old_usize; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; cnt->cnts.curbytes += usize; if (opt_prof_accum) { cnt->cnts.accumobjs++; cnt->cnts.accumbytes += usize; } } /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ mb_write(); /* Not strictly necessary. */ } JEMALLOC_INLINE void prof_free(const void *ptr, size_t size) { prof_ctx_t *ctx = prof_ctx_get(ptr); cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { prof_thr_cnt_t *tcnt; assert(size == isalloc(ptr, true)); tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; /*********/ mb_write(); /*********/ tcnt->cnts.curobjs--; tcnt->cnts.curbytes -= size; /*********/ mb_write(); /*********/ tcnt->epoch++; /*********/ mb_write(); /*********/ } else { /* * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; malloc_mutex_unlock(ctx->lock); } } } #endif #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/