#define JEMALLOC_PROF_C_ #include "jemalloc/internal/jemalloc_internal.h" #ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_PROF_LIBGCC #include #endif #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include #endif #include /******************************************************************************/ /* Data. */ bool opt_prof = false; bool opt_prof_active = true; size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_udump = false; bool opt_prof_leak = false; uint64_t prof_interval; bool prof_promote; /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces ever captured. */ static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; /* * Thread-specific hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread * keeps a cache of backtraces, with associated thread-specific prof_thr_cnt_t * objects. Other threads may read the prof_thr_cnt_t contents, but no others * will ever write them. * * Upon thread exit, the thread must merge all the prof_thr_cnt_t counter data * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t * objects. */ static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); /* * Same contents as b2cnt_tls, but initialized such that the TSD destructor is * called when a thread exits, so that bt2cnt_tls contents can be merged, * unlinked, and deallocated. */ static pthread_key_t bt2cnt_tsd; /* (1U << opt_lg_prof_bt_max). */ static unsigned prof_bt_max; static __thread uint64_t prof_sample_prn_state JEMALLOC_ATTR(tls_model("initial-exec")); static __thread uint64_t prof_sample_threshold JEMALLOC_ATTR(tls_model("initial-exec")); static __thread uint64_t prof_sample_accum JEMALLOC_ATTR(tls_model("initial-exec")); static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; static uint64_t prof_dump_mseq; static uint64_t prof_dump_useq; /* * This buffer is rather large for stack allocation, so use a single buffer for * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since * it must be locked anyway during dumping. */ static char prof_dump_buf[PROF_DUMP_BUF_SIZE]; static unsigned prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; static malloc_mutex_t enq_mtx; static bool enq; static bool enq_idump; static bool enq_udump; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static prof_bt_t *bt_dup(prof_bt_t *bt); static void bt_init(prof_bt_t *bt, void **vec); #ifdef JEMALLOC_PROF_LIBGCC static _Unwind_Reason_Code prof_unwind_init_callback( struct _Unwind_Context *context, void *arg); static _Unwind_Reason_Code prof_unwind_callback( struct _Unwind_Context *context, void *arg); #endif static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); static bool prof_flush(bool propagate_err); static bool prof_write(const char *s, bool propagate_err); static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx); static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err); static bool prof_dump_maps(bool propagate_err); static bool prof_dump(const char *filename, bool leakcheck, bool propagate_err); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); static void bt2cnt_thread_cleanup(void *arg); /******************************************************************************/ static void bt_init(prof_bt_t *bt, void **vec) { bt->vec = vec; bt->len = 0; } static prof_bt_t * bt_dup(prof_bt_t *bt) { prof_bt_t *ret; /* * Create a single allocation that has space for vec immediately * following the prof_bt_t structure. The backtraces that get * stored in the backtrace caches are copied from stack-allocated * temporary variables, so size is known at creation time. Making this * a contiguous object improves cache locality. */ ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + (bt->len * sizeof(void *))); if (ret == NULL) return (NULL); ret->vec = (void **)((uintptr_t)ret + QUANTUM_CEILING(sizeof(prof_bt_t))); memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); ret->len = bt->len; return (ret); } static inline void prof_enter(void) { malloc_mutex_lock(&enq_mtx); enq = true; malloc_mutex_unlock(&enq_mtx); malloc_mutex_lock(&bt2ctx_mtx); } static inline void prof_leave(void) { bool idump, udump; malloc_mutex_unlock(&bt2ctx_mtx); malloc_mutex_lock(&enq_mtx); enq = false; idump = enq_idump; enq_idump = false; udump = enq_udump; enq_udump = false; malloc_mutex_unlock(&enq_mtx); if (idump) prof_idump(); if (udump) prof_udump(); } #ifdef JEMALLOC_PROF_LIBGCC static _Unwind_Reason_Code prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { return (_URC_NO_REASON); } static _Unwind_Reason_Code prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; if (data->nignore > 0) data->nignore--; else { data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); data->bt->len++; if (data->bt->len == data->max) return (_URC_END_OF_STACK); } return (_URC_NO_REASON); } static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { prof_unwind_data_t data = {bt, nignore, max}; _Unwind_Backtrace(prof_unwind_callback, &data); } #elif defined(JEMALLOC_PROF_LIBUNWIND) static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { unw_context_t uc; unw_cursor_t cursor; unsigned i; int err; assert(bt->len == 0); assert(bt->vec != NULL); assert(max <= (1U << opt_lg_prof_bt_max)); unw_getcontext(&uc); unw_init_local(&cursor, &uc); /* Throw away (nignore+1) stack frames, if that many exist. */ for (i = 0; i < nignore + 1; i++) { err = unw_step(&cursor); if (err <= 0) return; } /* * Iterate over stack frames until there are no more. Heap-allocate * and iteratively grow a larger bt if necessary. */ for (i = 0; i < max; i++) { unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); err = unw_step(&cursor); if (err <= 0) { bt->len = i; break; } } } #else static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { #define NIGNORE 3 #define BT_FRAME(i) \ if ((i) < NIGNORE + max) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ if (i >= NIGNORE) { \ bt->vec[(i) - NIGNORE] = p; \ bt->len = (i) - NIGNORE + 1; \ } \ } else \ return; assert(max <= (1U << opt_lg_prof_bt_max)); /* * Ignore the first three frames, since they are: * * 0: prof_backtrace() * 1: prof_alloc_prep() * 2: malloc(), calloc(), etc. */ #if 1 assert(nignore + 1 == NIGNORE); #else BT_FRAME(0) BT_FRAME(1) BT_FRAME(2) #endif BT_FRAME(3) BT_FRAME(4) BT_FRAME(5) BT_FRAME(6) BT_FRAME(7) BT_FRAME(8) BT_FRAME(9) BT_FRAME(10) BT_FRAME(11) BT_FRAME(12) BT_FRAME(13) BT_FRAME(14) BT_FRAME(15) BT_FRAME(16) BT_FRAME(17) BT_FRAME(18) BT_FRAME(19) BT_FRAME(20) BT_FRAME(21) BT_FRAME(22) BT_FRAME(23) BT_FRAME(24) BT_FRAME(25) BT_FRAME(26) BT_FRAME(27) BT_FRAME(28) BT_FRAME(29) BT_FRAME(30) BT_FRAME(31) BT_FRAME(32) BT_FRAME(33) BT_FRAME(34) BT_FRAME(35) BT_FRAME(36) BT_FRAME(37) BT_FRAME(38) BT_FRAME(39) BT_FRAME(40) BT_FRAME(41) BT_FRAME(42) BT_FRAME(43) BT_FRAME(44) BT_FRAME(45) BT_FRAME(46) BT_FRAME(47) BT_FRAME(48) BT_FRAME(49) BT_FRAME(50) BT_FRAME(51) BT_FRAME(52) BT_FRAME(53) BT_FRAME(54) BT_FRAME(55) BT_FRAME(56) BT_FRAME(57) BT_FRAME(58) BT_FRAME(59) BT_FRAME(60) BT_FRAME(61) BT_FRAME(62) BT_FRAME(63) BT_FRAME(64) BT_FRAME(65) BT_FRAME(66) BT_FRAME(67) BT_FRAME(68) BT_FRAME(69) BT_FRAME(70) BT_FRAME(71) BT_FRAME(72) BT_FRAME(73) BT_FRAME(74) BT_FRAME(75) BT_FRAME(76) BT_FRAME(77) BT_FRAME(78) BT_FRAME(79) BT_FRAME(80) BT_FRAME(81) BT_FRAME(82) BT_FRAME(83) BT_FRAME(84) BT_FRAME(85) BT_FRAME(86) BT_FRAME(87) BT_FRAME(88) BT_FRAME(89) BT_FRAME(90) BT_FRAME(91) BT_FRAME(92) BT_FRAME(93) BT_FRAME(94) BT_FRAME(95) BT_FRAME(96) BT_FRAME(97) BT_FRAME(98) BT_FRAME(99) BT_FRAME(100) BT_FRAME(101) BT_FRAME(102) BT_FRAME(103) BT_FRAME(104) BT_FRAME(105) BT_FRAME(106) BT_FRAME(107) BT_FRAME(108) BT_FRAME(109) BT_FRAME(110) BT_FRAME(111) BT_FRAME(112) BT_FRAME(113) BT_FRAME(114) BT_FRAME(115) BT_FRAME(116) BT_FRAME(117) BT_FRAME(118) BT_FRAME(119) BT_FRAME(120) BT_FRAME(121) BT_FRAME(122) BT_FRAME(123) BT_FRAME(124) BT_FRAME(125) BT_FRAME(126) BT_FRAME(127) /* Extras to compensate for NIGNORE. */ BT_FRAME(128) BT_FRAME(129) BT_FRAME(130) #undef BT_FRAME } #endif static prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { prof_thr_cnt_t *ret; ckh_t *bt2cnt = bt2cnt_tls; if (bt2cnt == NULL) { /* Initialize an empty cache for this thread. */ bt2cnt = (ckh_t *)imalloc(sizeof(ckh_t)); if (bt2cnt == NULL) return (NULL); if (ckh_new(bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { idalloc(bt2cnt); return (NULL); } bt2cnt_tls = bt2cnt; pthread_setspecific(bt2cnt_tsd, bt2cnt); } if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { prof_bt_t *btkey; prof_ctx_t *ctx; /* * This thread's cache lacks bt. Look for it in the global * cache. */ prof_enter(); if (ckh_search(&bt2ctx, bt, (void **)&btkey, (void **)&ctx)) { /* bt has never been seen before. Insert it. */ ctx = (prof_ctx_t *)imalloc(sizeof(prof_ctx_t)); if (ctx == NULL) { prof_leave(); return (NULL); } btkey = bt_dup(bt); if (btkey == NULL) { prof_leave(); idalloc(ctx); return (NULL); } ctx->bt = btkey; if (malloc_mutex_init(&ctx->lock)) { prof_leave(); idalloc(btkey); idalloc(ctx); return (NULL); } memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx->cnts_ql); if (ckh_insert(&bt2ctx, btkey, ctx)) { /* OOM. */ prof_leave(); idalloc(btkey); idalloc(ctx); return (NULL); } } prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ ret = (prof_thr_cnt_t *)imalloc(sizeof(prof_thr_cnt_t)); if (ret == NULL) return (NULL); ql_elm_new(ret, link); ret->ctx = ctx; ret->epoch = 0; memset(&ret->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(bt2cnt, btkey, ret)) { idalloc(ret); return (NULL); } malloc_mutex_lock(&ctx->lock); ql_tail_insert(&ctx->cnts_ql, ret, link); malloc_mutex_unlock(&ctx->lock); } return (ret); } static inline void prof_sample_threshold_update(void) { uint64_t r; double u; /* * Compute prof_sample_threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). */ prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU, 1058392653243283975); u = (double)r * (1.0/9007199254740992.0L); prof_sample_threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; } prof_thr_cnt_t * prof_alloc_prep(size_t size) { prof_thr_cnt_t *ret; void *vec[prof_bt_max]; prof_bt_t bt; if (opt_prof_active == false) { /* Sampling is currently inactive, so avoid sampling. */ ret = (prof_thr_cnt_t *)(uintptr_t)1U; } else if (opt_lg_prof_sample == 0) { /* * Don't bother with sampling logic, since sampling interval is * 1. */ bt_init(&bt, vec); prof_backtrace(&bt, 2, prof_bt_max); ret = prof_lookup(&bt); } else { if (prof_sample_threshold == 0) { /* * Initialize. Seed the prng differently for each * thread. */ prof_sample_prn_state = (uint64_t)(uintptr_t)&size; prof_sample_threshold_update(); } /* * Determine whether to capture a backtrace based on whether * size is enough for prof_accum to reach * prof_sample_threshold. However, delay updating these * variables until prof_{m,re}alloc(), because we don't know * for sure that the allocation will succeed. * * Use subtraction rather than addition to avoid potential * integer overflow. */ if (size >= prof_sample_threshold - prof_sample_accum) { bt_init(&bt, vec); prof_backtrace(&bt, 2, prof_bt_max); ret = prof_lookup(&bt); } else ret = (prof_thr_cnt_t *)(uintptr_t)1U; } return (ret); } prof_ctx_t * prof_ctx_get(const void *ptr) { prof_ctx_t *ret; arena_chunk_t *chunk; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else ret = huge_prof_ctx_get(ptr); return (ret); } static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ assert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); } static inline void prof_sample_accum_update(size_t size) { if (opt_lg_prof_sample == 0) { /* * Don't bother with sampling logic, since sampling interval is * 1. */ return; } /* Take care to avoid integer overflow. */ if (size >= prof_sample_threshold - prof_sample_accum) { prof_sample_accum -= (prof_sample_threshold - size); /* Compute new prof_sample_threshold. */ prof_sample_threshold_update(); while (prof_sample_accum >= prof_sample_threshold) { prof_sample_accum -= prof_sample_threshold; prof_sample_threshold_update(); } } else prof_sample_accum += size; } void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt) { size_t size = isalloc(ptr); assert(ptr != NULL); prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->epoch++; /*********/ mb_write(); /*********/ cnt->cnts.curobjs++; cnt->cnts.curbytes += size; cnt->cnts.accumobjs++; cnt->cnts.accumbytes += size; /*********/ mb_write(); /*********/ cnt->epoch++; /*********/ mb_write(); /*********/ } } void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx) { size_t size = isalloc(ptr); prof_thr_cnt_t *told_cnt; if (ptr != NULL) { prof_ctx_set(ptr, cnt->ctx); prof_sample_accum_update(size); } if ((uintptr_t)old_ctx > (uintptr_t)1U) { told_cnt = prof_lookup(old_ctx->bt); if (told_cnt == NULL) { /* * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ malloc_printf("XXX BANG A\n"); malloc_mutex_lock(&old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; malloc_mutex_unlock(&old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) { told_cnt->cnts.curobjs--; told_cnt->cnts.curbytes -= old_size; } if ((uintptr_t)cnt > (uintptr_t)1U) { cnt->cnts.curobjs++; cnt->cnts.curbytes += size; cnt->cnts.accumobjs++; cnt->cnts.accumbytes += size; } /*********/ mb_write(); /*********/ if ((uintptr_t)told_cnt > (uintptr_t)1U) told_cnt->epoch++; if ((uintptr_t)cnt > (uintptr_t)1U) cnt->epoch++; /*********/ mb_write(); /* Not strictly necessary. */ } void prof_free(const void *ptr) { prof_ctx_t *ctx = prof_ctx_get(ptr); if ((uintptr_t)ctx > (uintptr_t)1) { size_t size = isalloc(ptr); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; /*********/ mb_write(); /*********/ tcnt->cnts.curobjs--; tcnt->cnts.curbytes -= size; /*********/ mb_write(); /*********/ tcnt->epoch++; /*********/ mb_write(); /*********/ } else { /* * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ malloc_printf("XXX BANG B\n"); malloc_mutex_lock(&ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; malloc_mutex_unlock(&ctx->lock); } } } static bool prof_flush(bool propagate_err) { bool ret = false; ssize_t err; err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (propagate_err == false) { malloc_write(": write() failed during heap " "profile flush\n"); if (opt_abort) abort(); } ret = true; } prof_dump_buf_end = 0; return (ret); } static bool prof_write(const char *s, bool propagate_err) { unsigned i, slen, n; i = 0; slen = strlen(s); while (i < slen) { /* Flush the buffer if it is full. */ if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) if (prof_flush(propagate_err) && propagate_err) return (true); if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) { /* Finish writing. */ n = slen - i; } else { /* Write as much of s as will fit. */ n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end; } memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); prof_dump_buf_end += n; i += n; } return (false); } static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) { prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; malloc_mutex_lock(&ctx->lock); memcpy(&ctx->cnt_dump, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, link) { volatile unsigned *epoch = &thr_cnt->epoch; while (true) { unsigned epoch0 = *epoch; /* Make sure epoch is even. */ if (epoch0 & 1U) continue; memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); /* Terminate if epoch didn't change while reading. */ if (*epoch == epoch0) break; } ctx->cnt_dump.curobjs += tcnt.curobjs; ctx->cnt_dump.curbytes += tcnt.curbytes; ctx->cnt_dump.accumobjs += tcnt.accumobjs; ctx->cnt_dump.accumbytes += tcnt.accumbytes; if (tcnt.curobjs != 0) (*leak_nctx)++; } /* Merge into cnt_all. */ cnt_all->curobjs += ctx->cnt_dump.curobjs; cnt_all->curbytes += ctx->cnt_dump.curbytes; cnt_all->accumobjs += ctx->cnt_dump.accumobjs; cnt_all->accumbytes += ctx->cnt_dump.accumbytes; malloc_mutex_unlock(&ctx->lock); } static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) { char buf[UMAX2S_BUFSIZE]; unsigned i; if (prof_write(umax2s(ctx->cnt_dump.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) || prof_write(umax2s(ctx->cnt_dump.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) || prof_write(umax2s(ctx->cnt_dump.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) || prof_write(umax2s(ctx->cnt_dump.accumbytes, 10, buf), propagate_err) || prof_write("] @", propagate_err)) return (true); for (i = 0; i < bt->len; i++) { if (prof_write(" 0x", propagate_err) || prof_write(umax2s((uintptr_t)bt->vec[i], 16, buf), propagate_err)) return (true); } if (prof_write("\n", propagate_err)) return (true); return (false); } static bool prof_dump_maps(bool propagate_err) { int mfd; char buf[UMAX2S_BUFSIZE]; char *s; unsigned i, slen; /* /proc//maps\0 */ char mpath[6 + UMAX2S_BUFSIZE + 5 + 1]; i = 0; s = "/proc/"; slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; s = umax2s(getpid(), 10, buf); slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; s = "/maps"; slen = strlen(s); memcpy(&mpath[i], s, slen); i += slen; mpath[i] = '\0'; mfd = open(mpath, O_RDONLY); if (mfd != -1) { ssize_t nread; if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) && propagate_err) return (true); nread = 0; do { prof_dump_buf_end += nread; if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) { /* Make space in prof_dump_buf before read(). */ if (prof_flush(propagate_err) && propagate_err) return (true); } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUF_SIZE - prof_dump_buf_end); } while (nread > 0); close(mfd); } else return (true); return (false); } static bool prof_dump(const char *filename, bool leakcheck, bool propagate_err) { prof_cnt_t cnt_all; size_t tabind; prof_bt_t *bt; prof_ctx_t *ctx; char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; prof_enter(); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { malloc_write(": creat(\""); malloc_write(filename); malloc_write("\", 0644) failed\n"); if (opt_abort) abort(); } goto ERROR; } /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, (void **)&ctx) == false;) { prof_ctx_merge(ctx, &cnt_all, &leak_nctx); } /* Dump profile header. */ if (prof_write("heap profile: ", propagate_err) || prof_write(umax2s(cnt_all.curobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) || prof_write(umax2s(cnt_all.curbytes, 10, buf), propagate_err) || prof_write(" [", propagate_err) || prof_write(umax2s(cnt_all.accumobjs, 10, buf), propagate_err) || prof_write(": ", propagate_err) || prof_write(umax2s(cnt_all.accumbytes, 10, buf), propagate_err)) goto ERROR; if (opt_lg_prof_sample == 0) { if (prof_write("] @ heapprofile\n", propagate_err)) goto ERROR; } else { if (prof_write("] @ heap_v2/", propagate_err) || prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, buf), propagate_err) || prof_write("\n", propagate_err)) goto ERROR; } /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx) == false;) { if (prof_dump_ctx(ctx, bt, propagate_err)) goto ERROR; } /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) goto ERROR; if (prof_flush(propagate_err)) goto ERROR; close(prof_dump_fd); prof_leave(); if (leakcheck && cnt_all.curbytes != 0) { malloc_write(": Leak summary: "); malloc_write(umax2s(cnt_all.curbytes, 10, buf)); malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); malloc_write(umax2s(cnt_all.curobjs, 10, buf)); malloc_write((cnt_all.curobjs != 1) ? " objects, " : " object, "); malloc_write(umax2s(leak_nctx, 10, buf)); malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); malloc_write(": Run pprof on \""); malloc_write(filename); malloc_write("\" for leak detail\n"); } return (false); ERROR: prof_leave(); return (true); } #define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \ + 1 \ + UMAX2S_BUFSIZE \ + 2 \ + UMAX2S_BUFSIZE \ + 5 + 1) static void prof_dump_filename(char *filename, char v, int64_t vseq) { char buf[UMAX2S_BUFSIZE]; char *s; unsigned i, slen; /* * Construct a filename of the form: * * ...v.heap\0 * or * jeprof...v.heap\0 */ i = 0; /* * Use JEMALLOC_PROF_PREFIX if it's set, and if it is short enough to * avoid overflowing DUMP_FILENAME_BUFSIZE. The result may exceed * PATH_MAX, but creat(2) will catch that problem. */ if ((s = getenv("JEMALLOC_PROF_PREFIX")) != NULL && strlen(s) + (DUMP_FILENAME_BUFSIZE - PATH_MAX) <= PATH_MAX) { slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; s = "."; } else s = "jeprof."; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; s = umax2s(getpid(), 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; s = "."; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; s = umax2s(prof_dump_seq, 10, buf); prof_dump_seq++; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; s = "."; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; filename[i] = v; i++; if (vseq != 0xffffffffffffffffLLU) { s = umax2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; } s = ".heap"; slen = strlen(s); memcpy(&filename[i], s, slen); i += slen; filename[i] = '\0'; } static void prof_fdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; if (prof_booted == false) return; malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(filename, opt_prof_leak, false); } void prof_idump(void) { char filename[DUMP_FILENAME_BUFSIZE]; if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); if (enq) { enq_idump = true; malloc_mutex_unlock(&enq_mtx); return; } malloc_mutex_unlock(&enq_mtx); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(filename, false, false); } bool prof_mdump(const char *filename) { char filename_buf[DUMP_FILENAME_BUFSIZE]; if (opt_prof == false || prof_booted == false) return (true); if (filename == NULL) { /* No filename specified, so automatically generate one. */ malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } return (prof_dump(filename, false, true)); } void prof_udump(void) { char filename[DUMP_FILENAME_BUFSIZE]; if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); if (enq) { enq_udump = true; malloc_mutex_unlock(&enq_mtx); return; } malloc_mutex_unlock(&enq_mtx); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(filename, false, false); } static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) { size_t ret1, ret2; uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); assert(hash1 != NULL); assert(hash2 != NULL); h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides * enough bits. */ ret1 = h & ZU(0xffffffffU); ret2 = h >> 32; } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), 0x8432a476666bbc13U); } *hash1 = ret1; *hash2 = ret2; } static bool prof_bt_keycomp(const void *k1, const void *k2) { const prof_bt_t *bt1 = (prof_bt_t *)k1; const prof_bt_t *bt2 = (prof_bt_t *)k2; if (bt1->len != bt2->len) return (false); return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } static void bt2cnt_thread_cleanup(void *arg) { ckh_t *bt2cnt; bt2cnt = bt2cnt_tls; if (bt2cnt != NULL) { ql_head(prof_thr_cnt_t) cnts_ql; size_t tabind; prof_thr_cnt_t *cnt; /* Iteratively merge cnt's into the global stats. */ ql_new(&cnts_ql); tabind = 0; while (ckh_iter(bt2cnt, &tabind, NULL, (void **)&cnt) == false) { prof_ctx_t *ctx = cnt->ctx; /* Merge stats and detach from ctx. */ malloc_mutex_lock(&ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, link); malloc_mutex_unlock(&ctx->lock); /* * Stash cnt for deletion after finishing with * ckh_iter(). */ ql_tail_insert(&cnts_ql, cnt, link); } /* * Delete the hash table now that cnts_ql has a list of all * cnt's. */ ckh_delete(bt2cnt); idalloc(bt2cnt); bt2cnt_tls = NULL; /* Delete cnt's. */ while ((cnt = ql_last(&cnts_ql, link)) != NULL) { ql_remove(&cnts_ql, cnt, link); idalloc(cnt); } } } void prof_boot0(void) { /* * opt_prof and prof_promote must be in their final state before any * arenas are initialized, so this function must be executed early. */ if (opt_prof_leak && opt_prof == false) { /* * Enable opt_prof, but in such a way that profiles are never * automatically dumped. */ opt_prof = true; opt_prof_udump = false; prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); } else prof_interval = 0; } prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT); } bool prof_boot1(void) { if (opt_prof) { if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); if (pthread_key_create(&bt2cnt_tsd, bt2cnt_thread_cleanup) != 0) { malloc_write( ": Error in pthread_key_create()\n"); abort(); } prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); if (malloc_mutex_init(&enq_mtx)) return (true); enq = false; enq_idump = false; enq_udump = false; if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) abort(); } } #ifdef JEMALLOC_PROF_LIBGCC /* * Cause the backtracing machinery to allocate its internal state * before enabling profiling. */ _Unwind_Backtrace(prof_unwind_init_callback, NULL); #endif prof_booted = true; return (false); } /******************************************************************************/ #endif /* JEMALLOC_PROF */