Add thread.peak.[read|reset] mallctls.

These can be used to track net allocator activity on a per-thread basis.
This commit is contained in:
David Goldblatt 2020-05-27 14:31:00 -07:00 committed by David Goldblatt
parent fe7108305a
commit d82a164d0d
13 changed files with 269 additions and 5 deletions

View File

@ -129,6 +129,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/pa.c \ $(srcroot)src/pa.c \
$(srcroot)src/pa_extra.c \ $(srcroot)src/pa_extra.c \
$(srcroot)src/pages.c \ $(srcroot)src/pages.c \
$(srcroot)src/peak_event.c \
$(srcroot)src/prng.c \ $(srcroot)src/prng.c \
$(srcroot)src/prof.c \ $(srcroot)src/prof.c \
$(srcroot)src/prof_data.c \ $(srcroot)src/prof_data.c \

View File

@ -1621,6 +1621,42 @@ malloc_conf = "xmalloc:true";]]></programlisting>
should not be modified by the application.</para></listitem> should not be modified by the application.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="thread.peak.read">
<term>
<mallctl>thread.peak.read</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
<listitem><para>Get an approximation of the maximum value of the
difference between the number of bytes allocated and the number of bytes
deallocated by the calling thread since the last call to <link
linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>,
or since the thread's creation if it has not called <link
linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>.
No guarantees are made about the quality of the approximation, but
jemalloc currently endeavors to maintain accuracy to within one hundred
kilobytes.
</para></listitem>
</varlistentry>
<varlistentry id="thread.peak.reset">
<term>
<mallctl>thread.peak.reset</mallctl>
(<type>void</type>)
<literal>--</literal>
[<option>--enable-stats</option>]
</term>
<listitem><para>Resets the counter for net bytes allocated in the calling
thread to zero. This affects subsequent calls to <link
linkend="thread.peak.read"><mallctl>thread.peak.read</mallctl></link>,
but not the values returned by <link
linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
or <link
linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>.
</para></listitem>
</varlistentry>
<varlistentry id="thread.tcache.enabled"> <varlistentry id="thread.tcache.enabled">
<term> <term>
<mallctl>thread.tcache.enabled</mallctl> <mallctl>thread.tcache.enabled</mallctl>

View File

@ -0,0 +1,24 @@
#ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
#define JEMALLOC_INTERNAL_PEAK_EVENT_H
/*
* While peak.h contains the simple helper struct that tracks state, this
* contains the allocator tie-ins (and knows about tsd, the event module, etc.).
*/
/* Update the peak with current tsd state. */
void peak_event_update(tsd_t *tsd);
/* Set current state to zero. */
void peak_event_zero(tsd_t *tsd);
uint64_t peak_event_max(tsd_t *tsd);
/* Manual hooks. */
/* The activity-triggered hooks. */
uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
#endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */

View File

@ -56,7 +56,9 @@ void tsd_te_init(tsd_t *tsd);
E(tcache_gc, (opt_tcache_gc_incr_bytes > 0), true) \ E(tcache_gc, (opt_tcache_gc_incr_bytes > 0), true) \
E(prof_sample, (config_prof && opt_prof), true) \ E(prof_sample, (config_prof && opt_prof), true) \
E(stats_interval, (opt_stats_interval >= 0), true) \ E(stats_interval, (opt_stats_interval >= 0), true) \
E(tcache_gc_dalloc, (opt_tcache_gc_incr_bytes > 0), false) E(tcache_gc_dalloc, (opt_tcache_gc_incr_bytes > 0), false) \
E(peak_alloc, config_stats, true) \
E(peak_dalloc, config_stats, false)
#define E(event, condition_unused, is_alloc_event_unused) \ #define E(event, condition_unused, is_alloc_event_unused) \
C(event##_event_wait) C(event##_event_wait)

View File

@ -5,6 +5,7 @@
#include "jemalloc/internal/assert.h" #include "jemalloc/internal/assert.h"
#include "jemalloc/internal/bin_types.h" #include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/jemalloc_internal_externs.h" #include "jemalloc/internal/jemalloc_internal_externs.h"
#include "jemalloc/internal/peak.h"
#include "jemalloc/internal/prof_types.h" #include "jemalloc/internal/prof_types.h"
#include "jemalloc/internal/ql.h" #include "jemalloc/internal/ql.h"
#include "jemalloc/internal/rtree_tsd.h" #include "jemalloc/internal/rtree_tsd.h"
@ -69,6 +70,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
O(prof_sample_last_event, uint64_t, uint64_t) \ O(prof_sample_last_event, uint64_t, uint64_t) \
O(stats_interval_event_wait, uint64_t, uint64_t) \ O(stats_interval_event_wait, uint64_t, uint64_t) \
O(stats_interval_last_event, uint64_t, uint64_t) \ O(stats_interval_last_event, uint64_t, uint64_t) \
O(peak_alloc_event_wait, uint64_t, uint64_t) \
O(peak_dalloc_event_wait, uint64_t, uint64_t) \
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
O(prng_state, uint64_t, uint64_t) \ O(prng_state, uint64_t, uint64_t) \
O(iarena, arena_t *, arena_t *) \ O(iarena, arena_t *, arena_t *) \
@ -77,6 +80,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
O(binshards, tsd_binshards_t, tsd_binshards_t)\ O(binshards, tsd_binshards_t, tsd_binshards_t)\
O(tsd_link, tsd_link_t, tsd_link_t) \ O(tsd_link, tsd_link_t, tsd_link_t) \
O(in_hook, bool, bool) \ O(in_hook, bool, bool) \
O(peak, peak_t, peak_t) \
O(tcache_slow, tcache_slow_t, tcache_slow_t) \ O(tcache_slow, tcache_slow_t, tcache_slow_t) \
O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
@ -95,6 +99,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
/* prof_sample_last_event */ 0, \ /* prof_sample_last_event */ 0, \
/* stats_interval_event_wait */ 0, \ /* stats_interval_event_wait */ 0, \
/* stats_interval_last_event */ 0, \ /* stats_interval_last_event */ 0, \
/* peak_alloc_event_wait */ 0, \
/* peak_dalloc_event_wait */ 0, \
/* prof_tdata */ NULL, \ /* prof_tdata */ NULL, \
/* prng_state */ 0, \ /* prng_state */ 0, \
/* iarena */ NULL, \ /* iarena */ NULL, \
@ -103,6 +109,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
/* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \ /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \
/* tsd_link */ {NULL}, \ /* tsd_link */ {NULL}, \
/* in_hook */ false, \ /* in_hook */ false, \
/* peak */ PEAK_INITIALIZER, \
/* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \ /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER, /* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER,

View File

@ -70,6 +70,7 @@
<ClCompile Include="..\..\..\..\src\pa.c" /> <ClCompile Include="..\..\..\..\src\pa.c" />
<ClCompile Include="..\..\..\..\src\pa_extra.c" /> <ClCompile Include="..\..\..\..\src\pa_extra.c" />
<ClCompile Include="..\..\..\..\src\pages.c" /> <ClCompile Include="..\..\..\..\src\pages.c" />
<ClCompile Include="..\..\..\..\src\peak_event.c" />
<ClCompile Include="..\..\..\..\src\prng.c" /> <ClCompile Include="..\..\..\..\src\prng.c" />
<ClCompile Include="..\..\..\..\src\prof.c" /> <ClCompile Include="..\..\..\..\src\prof.c" />
<ClCompile Include="..\..\..\..\src\prof_data.c" /> <ClCompile Include="..\..\..\..\src\prof_data.c" />

View File

@ -94,6 +94,9 @@
<ClCompile Include="..\..\..\..\src\pages.c"> <ClCompile Include="..\..\..\..\src\pages.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\..\..\src\peak_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\prng.c"> <ClCompile Include="..\..\..\..\src\prng.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>

View File

@ -70,6 +70,7 @@
<ClCompile Include="..\..\..\..\src\pa.c" /> <ClCompile Include="..\..\..\..\src\pa.c" />
<ClCompile Include="..\..\..\..\src\pa_extra.c" /> <ClCompile Include="..\..\..\..\src\pa_extra.c" />
<ClCompile Include="..\..\..\..\src\pages.c" /> <ClCompile Include="..\..\..\..\src\pages.c" />
<ClCompile Include="..\..\..\..\src\peak_event.c" />
<ClCompile Include="..\..\..\..\src\prng.c" /> <ClCompile Include="..\..\..\..\src\prng.c" />
<ClCompile Include="..\..\..\..\src\prof.c" /> <ClCompile Include="..\..\..\..\src\prof.c" />
<ClCompile Include="..\..\..\..\src\prof_data.c" /> <ClCompile Include="..\..\..\..\src\prof_data.c" />

View File

@ -94,6 +94,9 @@
<ClCompile Include="..\..\..\..\src\pages.c"> <ClCompile Include="..\..\..\..\src\pages.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\..\..\src\peak_event.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\prng.c"> <ClCompile Include="..\..\..\..\src\prng.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>

View File

@ -9,6 +9,7 @@
#include "jemalloc/internal/inspect.h" #include "jemalloc/internal/inspect.h"
#include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/nstime.h"
#include "jemalloc/internal/peak_event.h"
#include "jemalloc/internal/sc.h" #include "jemalloc/internal/sc.h"
#include "jemalloc/internal/util.h" #include "jemalloc/internal/util.h"
@ -61,6 +62,8 @@ CTL_PROTO(background_thread)
CTL_PROTO(max_background_threads) CTL_PROTO(max_background_threads)
CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_enabled)
CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_tcache_flush)
CTL_PROTO(thread_peak_read)
CTL_PROTO(thread_peak_reset)
CTL_PROTO(thread_prof_name) CTL_PROTO(thread_prof_name)
CTL_PROTO(thread_prof_active) CTL_PROTO(thread_prof_active)
CTL_PROTO(thread_arena) CTL_PROTO(thread_arena)
@ -294,6 +297,11 @@ static const ctl_named_node_t thread_tcache_node[] = {
{NAME("flush"), CTL(thread_tcache_flush)} {NAME("flush"), CTL(thread_tcache_flush)}
}; };
static const ctl_named_node_t thread_peak_node[] = {
{NAME("read"), CTL(thread_peak_read)},
{NAME("reset"), CTL(thread_peak_reset)},
};
static const ctl_named_node_t thread_prof_node[] = { static const ctl_named_node_t thread_prof_node[] = {
{NAME("name"), CTL(thread_prof_name)}, {NAME("name"), CTL(thread_prof_name)},
{NAME("active"), CTL(thread_prof_active)} {NAME("active"), CTL(thread_prof_active)}
@ -306,6 +314,7 @@ static const ctl_named_node_t thread_node[] = {
{NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocated"), CTL(thread_deallocated)},
{NAME("deallocatedp"), CTL(thread_deallocatedp)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)},
{NAME("tcache"), CHILD(named, thread_tcache)}, {NAME("tcache"), CHILD(named, thread_tcache)},
{NAME("peak"), CHILD(named, thread_peak)},
{NAME("prof"), CHILD(named, thread_prof)}, {NAME("prof"), CHILD(named, thread_prof)},
{NAME("idle"), CTL(thread_idle)} {NAME("idle"), CTL(thread_idle)}
}; };
@ -1953,6 +1962,38 @@ label_return:
return ret; return ret;
} }
static int
thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
size_t miblen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen) {
int ret;
if (!config_stats) {
return ENOENT;
}
READONLY();
peak_event_update(tsd);
uint64_t result = peak_event_max(tsd);
READ(result, uint64_t);
ret = 0;
label_return:
return ret;
}
static int
thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
size_t miblen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen) {
int ret;
if (!config_stats) {
return ENOENT;
}
NEITHER_READ_NOR_WRITE();
peak_event_zero(tsd);
ret = 0;
label_return:
return ret;
}
static int static int
thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t miblen, void *oldp, size_t *oldlenp, void *newp,

67
src/peak_event.c Normal file
View File

@ -0,0 +1,67 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/peak.h"
#include "jemalloc/internal/peak_event.h"
/*
* Update every 100k by default. We're not exposing this as a configuration
* option for now; we don't want to bind ourselves too tightly to any particular
* performance requirements for small values, or guarantee that we'll even be
* able to provide fine-grained accuracy.
*/
#define PEAK_EVENT_WAIT (100 * 1024)
/* Update the peak with current tsd state. */
void
peak_event_update(tsd_t *tsd) {
uint64_t alloc = tsd_thread_allocated_get(tsd);
uint64_t dalloc = tsd_thread_deallocated_get(tsd);
peak_t *peak = tsd_peakp_get(tsd);
peak_update(peak, alloc, dalloc);
}
/* Set current state to zero. */
void
peak_event_zero(tsd_t *tsd) {
uint64_t alloc = tsd_thread_allocated_get(tsd);
uint64_t dalloc = tsd_thread_deallocated_get(tsd);
peak_t *peak = tsd_peakp_get(tsd);
peak_set_zero(peak, alloc, dalloc);
}
uint64_t
peak_event_max(tsd_t *tsd) {
peak_t *peak = tsd_peakp_get(tsd);
return peak_max(peak);
}
uint64_t
peak_alloc_new_event_wait(tsd_t *tsd) {
return PEAK_EVENT_WAIT;
}
uint64_t
peak_alloc_postponed_event_wait(tsd_t *tsd) {
return TE_MIN_START_WAIT;
}
void
peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
peak_event_update(tsd);
}
uint64_t
peak_dalloc_new_event_wait(tsd_t *tsd) {
return PEAK_EVENT_WAIT;
}
uint64_t
peak_dalloc_postponed_event_wait(tsd_t *tsd) {
return TE_MIN_START_WAIT;
}
void
peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
peak_event_update(tsd);
}

View File

@ -60,6 +60,16 @@ stats_interval_fetch_elapsed(tsd_t *tsd) {
return last_event - last_stats_event; return last_event - last_stats_event;
} }
static uint64_t
peak_alloc_fetch_elapsed(tsd_t *tsd) {
return TE_INVALID_ELAPSED;
}
static uint64_t
peak_dalloc_fetch_elapsed(tsd_t *tsd) {
return TE_INVALID_ELAPSED;
}
/* Per event facilities done. */ /* Per event facilities done. */
static bool static bool

View File

@ -955,6 +955,73 @@ TEST_BEGIN(test_thread_idle) {
} }
TEST_END TEST_END
TEST_BEGIN(test_thread_peak) {
test_skip_if(!config_stats);
/*
* We don't commit to any stable amount of accuracy for peak tracking
* (in practice, when this test was written, we made sure to be within
* 100k). But 10MB is big for more or less any definition of big.
*/
size_t big_size = 10 * 1024 * 1024;
size_t small_size = 256;
void *ptr;
int err;
size_t sz;
uint64_t peak;
sz = sizeof(uint64_t);
err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
expect_d_eq(err, 0, "");
ptr = mallocx(SC_SMALL_MAXCLASS, 0);
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
expect_d_eq(err, 0, "");
expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Missed an update");
free(ptr);
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
expect_d_eq(err, 0, "");
expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Freeing changed peak");
ptr = mallocx(big_size, 0);
free(ptr);
/*
* The peak should have hit big_size in the last two lines, even though
* the net allocated bytes has since dropped back down to zero. We
* should have noticed the peak change without having down any mallctl
* calls while net allocated bytes was high.
*/
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
expect_d_eq(err, 0, "");
expect_u64_ge(peak, big_size, "Missed a peak change.");
/* Allocate big_size, but using small allocations. */
size_t nallocs = big_size / small_size;
void **ptrs = calloc(nallocs, sizeof(void *));
err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
expect_d_eq(err, 0, "");
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
expect_d_eq(err, 0, "");
expect_u64_eq(0, peak, "Missed a reset.");
for (size_t i = 0; i < nallocs; i++) {
ptrs[i] = mallocx(small_size, 0);
}
for (size_t i = 0; i < nallocs; i++) {
free(ptrs[i]);
}
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
expect_d_eq(err, 0, "");
/*
* We don't guarantee exactness; make sure we're within 10% of the peak,
* though.
*/
expect_u64_ge(peak, nallocx(small_size, 0) * nallocs * 9 / 10,
"Missed some peak changes.");
expect_u64_le(peak, nallocx(small_size, 0) * nallocs * 11 / 10,
"Overcounted peak changes.");
free(ptrs);
}
TEST_END
int int
main(void) { main(void) {
return test( return test(
@ -987,5 +1054,6 @@ main(void) {
test_stats_arenas, test_stats_arenas,
test_hooks, test_hooks,
test_hooks_exhaustion, test_hooks_exhaustion,
test_thread_idle); test_thread_idle,
test_thread_peak);
} }