diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index 3865ac9b..4266382f 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -388,12 +388,11 @@ will disable dirty page purging. @roff_tcache@Note that one cache slot per size class is not a valid @roff_tcache@configuration due to implementation details. @roff_prof@.It I -@roff_prof@Double/halve the maximum interval between memory profile dumps, as +@roff_prof@Double/halve the average interval between memory profile dumps, as @roff_prof@measured in bytes of allocation activity. -@roff_prof@On average, profiles are written four times as often as the maximum -@roff_prof@interval requires. -@roff_prof@This is an artifact of the concurrent algorithm that is used to -@roff_prof@track allocation activity. +@roff_prof@The actual interval between dumps may be sporadic because +@roff_prof@decentralized allocation counters are used to avoid synchronization +@roff_prof@bottlenecks. @roff_prof@Profiles are dumped to files named according to the pattern @roff_prof@.Pa ...i.heap , @roff_prof@where @@ -401,7 +400,7 @@ will disable dirty page purging. @roff_prof@is controlled by the @roff_prof@JEMALLOC_PROF_PREFIX @roff_prof@environment variable. -@roff_prof@The default maximum interval is 4 GiB. +@roff_prof@The default average interval is 1 GiB. @roff_fill@.It J @roff_fill@Each byte of new memory allocated by @roff_fill@.Fn @jemalloc_prefix@malloc @@ -693,6 +692,21 @@ This is useful for detecting whether another thread caused a refresh. --enable-lazy-lock was specified during build configuration. .Ed .\"----------------------------------------------------------------------------- +.It Sy "config.prof (bool) r-" +.Bd -ragged -offset indent -compact +--enable-prof was specified during build configuration. +.Ed +.\"----------------------------------------------------------------------------- +.It Sy "config.prof_libgcc (bool) r-" +.Bd -ragged -offset indent -compact +--disable-prof-libgcc was not specified during build configuration. +.Ed +.\"----------------------------------------------------------------------------- +.It Sy "config.prof_libunwind (bool) r-" +.Bd -ragged -offset indent -compact +--enable-prof-libunwind was specified during build configuration. +.Ed +.\"----------------------------------------------------------------------------- .It Sy "config.stats (bool) r-" .Bd -ragged -offset indent -compact --enable-stats was specified during build configuration. @@ -782,6 +796,41 @@ See the option. .Ed .\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@See the +@roff_prof@.Dq F +@roff_prof@option. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_bt_max (size_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@See the +@roff_prof@.Dq B +@roff_prof@option. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.lg_prof_interval (size_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@See the +@roff_prof@.Dq I +@roff_prof@option. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_udump (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@See the +@roff_prof@.Dq U +@roff_prof@option. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "opt.prof_leak (bool) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@See the +@roff_prof@.Dq L +@roff_prof@option. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- @roff_trace@.It Sy "opt.trace (bool) r-" @roff_trace@.Bd -ragged -offset indent -compact @roff_trace@See the @@ -993,6 +1042,15 @@ Maximum size supported by this large size class. @roff_prof@environment variable. @roff_prof@.Ed .\"----------------------------------------------------------------------------- +@roff_prof@.It Sy "prof.interval (uint64_t) r-" +@roff_prof@.Bd -ragged -offset indent -compact +@roff_prof@Average number of bytes allocated between inverval-based profile +@roff_prof@dumps. +@roff_prof@See the +@roff_prof@.Dq I +@roff_prof@option for additional information. +@roff_prof@.Ed +.\"----------------------------------------------------------------------------- @roff_stats@.It Sy "stats.allocated (size_t) r-" @roff_stats@.Bd -ragged -offset indent -compact @roff_stats@Total number of bytes allocated by the application. diff --git a/jemalloc/src/internal/jemalloc_arena.h b/jemalloc/src/internal/jemalloc_arena.h index 65054f87..cba954f2 100644 --- a/jemalloc/src/internal/jemalloc_arena.h +++ b/jemalloc/src/internal/jemalloc_arena.h @@ -290,6 +290,10 @@ struct arena_s { int trace_fd; #endif +#ifdef JEMALLOC_PROF + uint64_t prof_accumbytes; +#endif + /* Tree of dirty-page-containing chunks this arena manages. */ arena_chunk_tree_t chunks_dirty; @@ -411,7 +415,14 @@ extern size_t mspace_mask; #define nlclasses ((chunksize - PAGE_SIZE) >> PAGE_SHIFT) #ifdef JEMALLOC_TCACHE -void arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind); +void arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ); +#endif +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); #endif void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_medium(arena_t *arena, size_t size, bool zero); diff --git a/jemalloc/src/internal/jemalloc_tcache.h b/jemalloc/src/internal/jemalloc_tcache.h index 601320b1..b499f525 100644 --- a/jemalloc/src/internal/jemalloc_tcache.h +++ b/jemalloc/src/internal/jemalloc_tcache.h @@ -35,6 +35,9 @@ struct tcache_bin_s { struct tcache_s { # ifdef JEMALLOC_STATS ql_elm(tcache_t) link; /* Used for aggregating stats. */ +# endif +# ifdef JEMALLOC_PROF + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ # endif arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ @@ -62,7 +65,11 @@ extern size_t tcache_nslots; /* Number of tcache allocation/deallocation events between incremental GCs. */ extern unsigned tcache_gc_incr; -void tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem); +void tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem +#ifdef JEMALLOC_PROF + , tcache_t *tcache +#endif + ); tcache_t *tcache_create(arena_t *arena); void tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind); @@ -138,7 +145,11 @@ tcache_event(tcache_t *tcache) */ tcache_bin_flush(tbin, binind, tbin->ncached - (tbin->low_water >> - 1) - (tbin->low_water & 1)); + 1) - (tbin->low_water & 1) +#ifdef JEMALLOC_PROF + , tcache +#endif + ); } tbin->low_water = tbin->ncached; tbin->high_water = tbin->ncached; @@ -205,6 +216,9 @@ tcache_alloc(tcache_t *tcache, size_t size, bool zero) #ifdef JEMALLOC_STATS tbin->tstats.nrequests++; +#endif +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; #endif tcache_event(tcache); return (ret); @@ -252,7 +266,11 @@ tcache_dalloc(tcache_t *tcache, void *ptr) } if (tbin->ncached == tcache_nslots) - tcache_bin_flush(tbin, binind, (tcache_nslots >> 1)); + tcache_bin_flush(tbin, binind, (tcache_nslots >> 1) +#ifdef JEMALLOC_PROF + , tcache +#endif + ); assert(tbin->ncached < tcache_nslots); tbin->slots[tbin->ncached] = ptr; tbin->ncached++; diff --git a/jemalloc/src/internal/prof.h b/jemalloc/src/internal/prof.h index 1d562071..44e11cbf 100644 --- a/jemalloc/src/internal/prof.h +++ b/jemalloc/src/internal/prof.h @@ -8,7 +8,7 @@ typedef struct prof_thr_cnt_s prof_thr_cnt_t; typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_s prof_t; -#define LG_PROF_INTERVAL_DEFAULT 32 +#define LG_PROF_INTERVAL_DEFAULT 30 /* * Hard limit on stack backtrace depth. Note that the version of @@ -121,6 +121,15 @@ extern size_t opt_lg_prof_interval; extern bool opt_prof_udump; /* High-water memory dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + bool prof_init(prof_t *prof, bool master); void prof_destroy(prof_t *prof); @@ -135,7 +144,6 @@ void prof_mdump(void); void prof_udump(void); void prof_boot0(void); bool prof_boot1(void); -void prof_boot2(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 6a3f53e5..813f50b5 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -18,9 +18,9 @@ * * Allocation requests are rounded up to the nearest size class, and no record * of the original request size is maintained. Allocations are broken into - * categories according to size class. Assuming runtime defaults, 4 KiB pages - * and a 16 byte quantum on a 32-bit system, the size classes in each category - * are as follows: + * categories according to size class. Assuming 1 MiB chunks, 4 KiB pages and + * a 16 byte quantum on a 32-bit system, the size classes in each category are + * as follows: * * |========================================| * | Category | Subcategory | Size | @@ -822,10 +822,6 @@ MALLOC_OUT: next_arena = 0; #endif -#ifdef JEMALLOC_PROF - prof_boot2(); -#endif - /* Allocate and initialize arenas. */ arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); if (arenas == NULL) { diff --git a/jemalloc/src/jemalloc_arena.c b/jemalloc/src/jemalloc_arena.c index 109b3f63..585be3d6 100644 --- a/jemalloc/src/jemalloc_arena.c +++ b/jemalloc/src/jemalloc_arena.c @@ -1031,7 +1031,11 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) #ifdef JEMALLOC_TCACHE void -arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind) +arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ) { unsigned i, nfill; arena_bin_t *bin; @@ -1042,6 +1046,9 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind) bin = &arena->bins[binind]; malloc_mutex_lock(&arena->lock); +#ifdef JEMALLOC_PROF + arena_prof_accum(arena, prof_accumbytes); +#endif for (i = 0, nfill = (tcache_nslots >> 1); i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) ptr = arena_bin_malloc_easy(arena, bin, run); @@ -1088,6 +1095,19 @@ arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind) } #endif +#ifdef JEMALLOC_PROF +void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } +} +#endif + /* * Calculate bin->run_size such that it meets the following constraints: * @@ -1243,6 +1263,10 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } # endif arena->stats.allocated_small += size; +#endif +#ifdef JEMALLOC_PROF + if (isthreaded == false) + arena_prof_accum(arena, size); #endif malloc_mutex_unlock(&arena->lock); @@ -1294,6 +1318,10 @@ arena_malloc_medium(arena_t *arena, size_t size, bool zero) } # endif arena->stats.allocated_medium += size; +#endif +#ifdef JEMALLOC_PROF + if (isthreaded == false) + arena_prof_accum(arena, size); #endif malloc_mutex_unlock(&arena->lock); @@ -1333,6 +1361,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; } +#endif +#ifdef JEMALLOC_PROF + arena_prof_accum(arena, size); #endif malloc_mutex_unlock(&arena->lock); @@ -2035,6 +2066,10 @@ arena_new(arena_t *arena, unsigned ind) } #endif +#ifdef JEMALLOC_PROF + arena->prof_accumbytes = 0; +#endif + /* Initialize chunks. */ arena_chunk_tree_dirty_new(&arena->chunks_dirty); arena->spare = NULL; diff --git a/jemalloc/src/jemalloc_ctl.c b/jemalloc/src/jemalloc_ctl.c index 75c5c6b2..a68e78a6 100644 --- a/jemalloc/src/jemalloc_ctl.c +++ b/jemalloc/src/jemalloc_ctl.c @@ -34,14 +34,14 @@ CTL_PROTO(epoch) #ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) #endif -#ifdef JEMALLOC_PROF -CTL_PROTO(prof_dump) -#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_prof) +CTL_PROTO(config_prof_libgcc) +CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_swap) CTL_PROTO(config_sysv) @@ -67,6 +67,13 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_lg_tcache_nslots) CTL_PROTO(opt_lg_tcache_gc_sweep) #endif +#ifdef JEMALLOC_PROF +CTL_PROTO(opt_prof) +CTL_PROTO(opt_lg_prof_bt_max) +CTL_PROTO(opt_lg_prof_interval) +CTL_PROTO(opt_prof_udump) +CTL_PROTO(opt_prof_leak) +#endif CTL_PROTO(opt_stats_print) #ifdef JEMALLOC_TRACE CTL_PROTO(opt_trace) @@ -112,6 +119,10 @@ CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nmbins) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nlruns) +#ifdef JEMALLOC_PROF +CTL_PROTO(prof_dump) +CTL_PROTO(prof_interval) +#endif #ifdef JEMALLOC_STATS CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_total) @@ -188,18 +199,15 @@ static const ctl_node_t tcache_node[] = { }; #endif -#ifdef JEMALLOC_PROF -static const ctl_node_t prof_node[] = { - {NAME("dump"), CTL(prof_dump)} -}; -#endif - static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, {NAME("swap"), CTL(config_swap)}, {NAME("sysv"), CTL(config_sysv)}, @@ -227,6 +235,13 @@ static const ctl_node_t opt_node[] = { #ifdef JEMALLOC_TCACHE {NAME("lg_tcache_nslots"), CTL(opt_lg_tcache_nslots)}, {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, +#endif +#ifdef JEMALLOC_PROF + {NAME("prof"), CTL(opt_prof)}, + {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_udump"), CTL(opt_prof_udump)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, #endif {NAME("stats_print"), CTL(opt_stats_print)}, #ifdef JEMALLOC_TRACE @@ -299,6 +314,13 @@ static const ctl_node_t arenas_node[] = { {NAME("lrun"), CHILD(arenas_lrun)} }; +#ifdef JEMALLOC_PROF +static const ctl_node_t prof_node[] = { + {NAME("dump"), CTL(prof_dump)}, + {NAME("interval"), CTL(prof_interval)} +}; +#endif + #ifdef JEMALLOC_STATS static const ctl_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, @@ -413,13 +435,13 @@ static const ctl_node_t root_node[] = { {NAME("epoch"), CTL(epoch)}, #ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, -#endif -#ifdef JEMALLOC_PROF - {NAME("prof"), CHILD(prof)}, #endif {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, +#ifdef JEMALLOC_PROF + {NAME("prof"), CHILD(prof)}, +#endif {NAME("stats"), CHILD(stats)} #ifdef JEMALLOC_SWAP , @@ -938,23 +960,6 @@ RETURN: } #endif -#ifdef JEMALLOC_PROF -static int -prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - - VOID(); - - prof_mdump(); - - ret = 0; -RETURN: - return (ret); -} -#endif - /******************************************************************************/ #ifdef JEMALLOC_DEBUG @@ -987,6 +992,24 @@ CTL_RO_TRUE_GEN(config_lazy_lock) CTL_RO_FALSE_GEN(config_lazy_lock) #endif +#ifdef JEMALLOC_PROF +CTL_RO_TRUE_GEN(config_prof) +#else +CTL_RO_FALSE_GEN(config_prof) +#endif + +#ifdef JEMALLOC_PROF_LIBGCC +CTL_RO_TRUE_GEN(config_prof_libgcc) +#else +CTL_RO_FALSE_GEN(config_prof_libgcc) +#endif + +#ifdef JEMALLOC_PROF_LIBUNWIND +CTL_RO_TRUE_GEN(config_prof_libunwind) +#else +CTL_RO_FALSE_GEN(config_prof_libunwind) +#endif + #ifdef JEMALLOC_STATS CTL_RO_TRUE_GEN(config_stats) #else @@ -1054,6 +1077,13 @@ CTL_RO_GEN(opt_zero, opt_zero, bool) CTL_RO_GEN(opt_lg_tcache_nslots, opt_lg_tcache_nslots, size_t) CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) #endif +#ifdef JEMALLOC_PROF +CTL_RO_GEN(opt_prof, opt_prof, bool) +CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) +CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, size_t) +CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool) +CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool) +#endif CTL_RO_GEN(opt_stats_print, opt_stats_print, bool) #ifdef JEMALLOC_TRACE CTL_RO_GEN(opt_trace, opt_trace, bool) @@ -1145,6 +1175,27 @@ CTL_RO_GEN(arenas_nlruns, nlclasses, size_t) /******************************************************************************/ +#ifdef JEMALLOC_PROF +static int +prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + VOID(); + + prof_mdump(); + + ret = 0; +RETURN: + return (ret); +} + +CTL_RO_GEN(prof_interval, prof_interval, uint64_t) +#endif + +/******************************************************************************/ + #ifdef JEMALLOC_STATS CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t) CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t) diff --git a/jemalloc/src/jemalloc_stats.c b/jemalloc/src/jemalloc_stats.c index b0efe746..54aa103b 100644 --- a/jemalloc/src/jemalloc_stats.c +++ b/jemalloc/src/jemalloc_stats.c @@ -431,13 +431,19 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *, write4(w4opaque, "Boolean JEMALLOC_OPTIONS: ", "", "", ""); if ((err = mallctl("opt.abort", &bv, &bsz, NULL, 0)) == 0) write4(w4opaque, bv ? "A" : "a", "", "", ""); + if ((err = mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0) + write4(w4opaque, bv ? "F" : "f", "", "", ""); if ((err = mallctl("opt.junk", &bv, &bsz, NULL, 0)) == 0) write4(w4opaque, bv ? "J" : "j", "", "", ""); + if ((err = mallctl("opt.prof_leak", &bv, &bsz, NULL, 0)) == 0) + write4(w4opaque, bv ? "L" : "l", "", "", ""); if ((err = mallctl("opt.overcommit", &bv, &bsz, NULL, 0)) == 0) write4(w4opaque, bv ? "O" : "o", "", "", ""); write4(w4opaque, "P", "", "", ""); if ((err = mallctl("opt.trace", &bv, &bsz, NULL, 0)) == 0) write4(w4opaque, bv ? "T" : "t", "", "", ""); + if ((err = mallctl("opt.prof_udump", &bv, &bsz, NULL, 0)) == 0) + write4(w4opaque, bv ? "U" : "u", "", "", ""); if ((err = mallctl("opt.sysv", &bv, &bsz, NULL, 0)) == 0) write4(w4opaque, bv ? "V" : "v", "", "", ""); if ((err = mallctl("opt.xmalloc", &bv, &bsz, NULL, 0)) == 0) @@ -512,7 +518,6 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *, "Min active:dirty page ratio per arena: N/A\n", "", "", ""); } -#ifdef JEMALLOC_TCACHE if ((err = mallctl("opt.lg_tcache_nslots", &sv, &ssz, NULL, 0)) == 0) { size_t tcache_nslots, tcache_gc_sweep; @@ -528,7 +533,17 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *, tcache_nslots && ssv >= 0 ? umax2s(tcache_gc_sweep, 10, s) : "N/A", "\n", ""); } -#endif + if ((err = mallctl("opt.lg_prof_bt_max", &sv, &ssz, NULL, 0)) + == 0) { + write4(w4opaque, "Maximum profile backtrace depth: ", + umax2s((1U << sv), 10, s), "\n", ""); + } + if ((err = mallctl("opt.lg_prof_interval", &sv, &ssz, NULL, 0)) + == 0) { + write4(w4opaque, "Average profile dump interval: ", + umax2s((1U << sv), 10, s), "", ""); + write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", ""); + } CTL_GET("arenas.chunksize", &sv, size_t); write4(w4opaque, "Chunk size: ", umax2s(sv, 10, s), "", ""); CTL_GET("opt.lg_chunk", &sv, size_t); diff --git a/jemalloc/src/jemalloc_tcache.c b/jemalloc/src/jemalloc_tcache.c index 4518a148..c54d54e2 100644 --- a/jemalloc/src/jemalloc_tcache.c +++ b/jemalloc/src/jemalloc_tcache.c @@ -31,14 +31,25 @@ tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { void *ret; - arena_tcache_fill(tcache->arena, tbin, binind); + arena_tcache_fill(tcache->arena, tbin, binind +#ifdef JEMALLOC_PROF + , tcache->prof_accumbytes +#endif + ); +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes = 0; +#endif ret = tcache_bin_alloc(tbin); return (ret); } void -tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem) +tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem +#ifdef JEMALLOC_PROF + , tcache_t *tcache +#endif + ) { arena_chunk_t *chunk; arena_t *arena; @@ -51,6 +62,12 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]); arena = chunk->arena; malloc_mutex_lock(&arena->lock); +#ifdef JEMALLOC_PROF + if (arena == tcache->arena) { + arena_prof_accum(arena, tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; + } +#endif /* Deallocate every object that belongs to the locked arena. */ for (i = ndeferred = 0; i < ncached; i++) { ptr = tbin->slots[i]; @@ -216,11 +233,23 @@ tcache_destroy(tcache_t *tcache) for (i = 0; i < nbins; i++) { tcache_bin_t *tbin = tcache->tbins[i]; if (tbin != NULL) { - tcache_bin_flush(tbin, i, 0); + tcache_bin_flush(tbin, i, 0 +#ifdef JEMALLOC_PROF + , tcache +#endif + ); tcache_bin_destroy(tcache, tbin, i); } } +#ifdef JEMALLOC_PROF + if (tcache->prof_accumbytes > 0) { + malloc_mutex_lock(&tcache->arena->lock); + arena_prof_accum(tcache->arena, tcache->prof_accumbytes); + malloc_mutex_unlock(&tcache->arena->lock); + } +#endif + if (arena_salloc(tcache) <= bin_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 8f69c01f..7e1d9676 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -21,6 +21,8 @@ size_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_udump = false; bool opt_prof_leak = false; +uint64_t prof_interval; + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces ever captured. @@ -50,15 +52,6 @@ static pthread_key_t bt2cnt_tsd; /* (1U << opt_lg_prof_bt_max). */ static unsigned prof_bt_max; -/* - * Per arena profile dump interval, measured in bytes allocated. Each arena - * triggers a profile dump when it reaches this threshold. The effect is that - * the interval between profile dumps is never longer than (prof_interval * - * narenas), though the actual interval between dumps will tend to be sporadic, - * and the interval will on average be prof_interval. - */ -static uint64_t prof_interval; - static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; @@ -79,6 +72,7 @@ static bool prof_booted = false; static malloc_mutex_t enq_mtx; static bool enq; +static bool enq_idump; static bool enq_udump; /******************************************************************************/ @@ -157,16 +151,20 @@ prof_enter(void) static inline void prof_leave(void) { - bool udump; + bool idump, udump; malloc_mutex_unlock(&bt2ctx_mtx); malloc_mutex_lock(&enq_mtx); enq = false; + idump = enq_idump; + enq_idump = false; udump = enq_udump; enq_udump = false; malloc_mutex_unlock(&enq_mtx); + if (idump) + prof_idump(); if (udump) prof_udump(); } @@ -785,6 +783,7 @@ prof_dump_maps(void) nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUF_SIZE - prof_dump_buf_end); } while (nread > 0); + close(mfd); } } @@ -955,6 +954,13 @@ prof_idump(void) if (prof_booted == false) return; + malloc_mutex_lock(&enq_mtx); + if (enq) { + enq_idump = true; + malloc_mutex_unlock(&enq_mtx); + return; + } + malloc_mutex_unlock(&enq_mtx); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); @@ -1134,6 +1140,7 @@ prof_boot1(void) if (malloc_mutex_init(&enq_mtx)) return (true); enq = false; + enq_idump = false; enq_udump = false; if (atexit(prof_fdump) != 0) { @@ -1157,17 +1164,5 @@ prof_boot1(void) return (false); } -void -prof_boot2(void) -{ - - if (opt_prof) { - /* - * Finish initializing prof_interval, now that narenas is set. - */ - prof_interval /= narenas; - } -} - /******************************************************************************/ #endif /* JEMALLOC_PROF */