Add thread.peak.[read|reset] mallctls.
These can be used to track net allocator activity on a per-thread basis.
This commit is contained in:
parent
fe7108305a
commit
d82a164d0d
@ -129,6 +129,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
|
||||
$(srcroot)src/pa.c \
|
||||
$(srcroot)src/pa_extra.c \
|
||||
$(srcroot)src/pages.c \
|
||||
$(srcroot)src/peak_event.c \
|
||||
$(srcroot)src/prng.c \
|
||||
$(srcroot)src/prof.c \
|
||||
$(srcroot)src/prof_data.c \
|
||||
|
@ -1621,6 +1621,42 @@ malloc_conf = "xmalloc:true";]]></programlisting>
|
||||
should not be modified by the application.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="thread.peak.read">
|
||||
<term>
|
||||
<mallctl>thread.peak.read</mallctl>
|
||||
(<type>uint64_t</type>)
|
||||
<literal>r-</literal>
|
||||
[<option>--enable-stats</option>]
|
||||
</term>
|
||||
<listitem><para>Get an approximation of the maximum value of the
|
||||
difference between the number of bytes allocated and the number of bytes
|
||||
deallocated by the calling thread since the last call to <link
|
||||
linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>,
|
||||
or since the thread's creation if it has not called <link
|
||||
linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>.
|
||||
No guarantees are made about the quality of the approximation, but
|
||||
jemalloc currently endeavors to maintain accuracy to within one hundred
|
||||
kilobytes.
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="thread.peak.reset">
|
||||
<term>
|
||||
<mallctl>thread.peak.reset</mallctl>
|
||||
(<type>void</type>)
|
||||
<literal>--</literal>
|
||||
[<option>--enable-stats</option>]
|
||||
</term>
|
||||
<listitem><para>Resets the counter for net bytes allocated in the calling
|
||||
thread to zero. This affects subsequent calls to <link
|
||||
linkend="thread.peak.read"><mallctl>thread.peak.read</mallctl></link>,
|
||||
but not the values returned by <link
|
||||
linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
|
||||
or <link
|
||||
linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>.
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="thread.tcache.enabled">
|
||||
<term>
|
||||
<mallctl>thread.tcache.enabled</mallctl>
|
||||
|
24
include/jemalloc/internal/peak_event.h
Normal file
24
include/jemalloc/internal/peak_event.h
Normal file
@ -0,0 +1,24 @@
|
||||
#ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
|
||||
#define JEMALLOC_INTERNAL_PEAK_EVENT_H
|
||||
|
||||
/*
|
||||
* While peak.h contains the simple helper struct that tracks state, this
|
||||
* contains the allocator tie-ins (and knows about tsd, the event module, etc.).
|
||||
*/
|
||||
|
||||
/* Update the peak with current tsd state. */
|
||||
void peak_event_update(tsd_t *tsd);
|
||||
/* Set current state to zero. */
|
||||
void peak_event_zero(tsd_t *tsd);
|
||||
uint64_t peak_event_max(tsd_t *tsd);
|
||||
|
||||
/* Manual hooks. */
|
||||
/* The activity-triggered hooks. */
|
||||
uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
|
||||
uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
|
||||
void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
|
||||
uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
|
||||
uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
|
||||
void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
|
@ -56,7 +56,9 @@ void tsd_te_init(tsd_t *tsd);
|
||||
E(tcache_gc, (opt_tcache_gc_incr_bytes > 0), true) \
|
||||
E(prof_sample, (config_prof && opt_prof), true) \
|
||||
E(stats_interval, (opt_stats_interval >= 0), true) \
|
||||
E(tcache_gc_dalloc, (opt_tcache_gc_incr_bytes > 0), false)
|
||||
E(tcache_gc_dalloc, (opt_tcache_gc_incr_bytes > 0), false) \
|
||||
E(peak_alloc, config_stats, true) \
|
||||
E(peak_dalloc, config_stats, false)
|
||||
|
||||
#define E(event, condition_unused, is_alloc_event_unused) \
|
||||
C(event##_event_wait)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "jemalloc/internal/assert.h"
|
||||
#include "jemalloc/internal/bin_types.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_externs.h"
|
||||
#include "jemalloc/internal/peak.h"
|
||||
#include "jemalloc/internal/prof_types.h"
|
||||
#include "jemalloc/internal/ql.h"
|
||||
#include "jemalloc/internal/rtree_tsd.h"
|
||||
@ -69,6 +70,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
|
||||
O(prof_sample_last_event, uint64_t, uint64_t) \
|
||||
O(stats_interval_event_wait, uint64_t, uint64_t) \
|
||||
O(stats_interval_last_event, uint64_t, uint64_t) \
|
||||
O(peak_alloc_event_wait, uint64_t, uint64_t) \
|
||||
O(peak_dalloc_event_wait, uint64_t, uint64_t) \
|
||||
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
|
||||
O(prng_state, uint64_t, uint64_t) \
|
||||
O(iarena, arena_t *, arena_t *) \
|
||||
@ -77,6 +80,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
|
||||
O(binshards, tsd_binshards_t, tsd_binshards_t)\
|
||||
O(tsd_link, tsd_link_t, tsd_link_t) \
|
||||
O(in_hook, bool, bool) \
|
||||
O(peak, peak_t, peak_t) \
|
||||
O(tcache_slow, tcache_slow_t, tcache_slow_t) \
|
||||
O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
|
||||
|
||||
@ -95,6 +99,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
|
||||
/* prof_sample_last_event */ 0, \
|
||||
/* stats_interval_event_wait */ 0, \
|
||||
/* stats_interval_last_event */ 0, \
|
||||
/* peak_alloc_event_wait */ 0, \
|
||||
/* peak_dalloc_event_wait */ 0, \
|
||||
/* prof_tdata */ NULL, \
|
||||
/* prng_state */ 0, \
|
||||
/* iarena */ NULL, \
|
||||
@ -103,6 +109,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
|
||||
/* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER, \
|
||||
/* tsd_link */ {NULL}, \
|
||||
/* in_hook */ false, \
|
||||
/* peak */ PEAK_INITIALIZER, \
|
||||
/* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER, \
|
||||
/* rtree_ctx */ RTREE_CTX_ZERO_INITIALIZER,
|
||||
|
||||
|
@ -70,6 +70,7 @@
|
||||
<ClCompile Include="..\..\..\..\src\pa.c" />
|
||||
<ClCompile Include="..\..\..\..\src\pa_extra.c" />
|
||||
<ClCompile Include="..\..\..\..\src\pages.c" />
|
||||
<ClCompile Include="..\..\..\..\src\peak_event.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prng.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prof.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prof_data.c" />
|
||||
|
@ -94,6 +94,9 @@
|
||||
<ClCompile Include="..\..\..\..\src\pages.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\..\src\peak_event.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\..\src\prng.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@ -70,6 +70,7 @@
|
||||
<ClCompile Include="..\..\..\..\src\pa.c" />
|
||||
<ClCompile Include="..\..\..\..\src\pa_extra.c" />
|
||||
<ClCompile Include="..\..\..\..\src\pages.c" />
|
||||
<ClCompile Include="..\..\..\..\src\peak_event.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prng.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prof.c" />
|
||||
<ClCompile Include="..\..\..\..\src\prof_data.c" />
|
||||
|
@ -94,6 +94,9 @@
|
||||
<ClCompile Include="..\..\..\..\src\pages.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\..\src\peak_event.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\..\src\prng.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
41
src/ctl.c
41
src/ctl.c
@ -9,6 +9,7 @@
|
||||
#include "jemalloc/internal/inspect.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
#include "jemalloc/internal/nstime.h"
|
||||
#include "jemalloc/internal/peak_event.h"
|
||||
#include "jemalloc/internal/sc.h"
|
||||
#include "jemalloc/internal/util.h"
|
||||
|
||||
@ -61,6 +62,8 @@ CTL_PROTO(background_thread)
|
||||
CTL_PROTO(max_background_threads)
|
||||
CTL_PROTO(thread_tcache_enabled)
|
||||
CTL_PROTO(thread_tcache_flush)
|
||||
CTL_PROTO(thread_peak_read)
|
||||
CTL_PROTO(thread_peak_reset)
|
||||
CTL_PROTO(thread_prof_name)
|
||||
CTL_PROTO(thread_prof_active)
|
||||
CTL_PROTO(thread_arena)
|
||||
@ -294,6 +297,11 @@ static const ctl_named_node_t thread_tcache_node[] = {
|
||||
{NAME("flush"), CTL(thread_tcache_flush)}
|
||||
};
|
||||
|
||||
static const ctl_named_node_t thread_peak_node[] = {
|
||||
{NAME("read"), CTL(thread_peak_read)},
|
||||
{NAME("reset"), CTL(thread_peak_reset)},
|
||||
};
|
||||
|
||||
static const ctl_named_node_t thread_prof_node[] = {
|
||||
{NAME("name"), CTL(thread_prof_name)},
|
||||
{NAME("active"), CTL(thread_prof_active)}
|
||||
@ -306,6 +314,7 @@ static const ctl_named_node_t thread_node[] = {
|
||||
{NAME("deallocated"), CTL(thread_deallocated)},
|
||||
{NAME("deallocatedp"), CTL(thread_deallocatedp)},
|
||||
{NAME("tcache"), CHILD(named, thread_tcache)},
|
||||
{NAME("peak"), CHILD(named, thread_peak)},
|
||||
{NAME("prof"), CHILD(named, thread_prof)},
|
||||
{NAME("idle"), CTL(thread_idle)}
|
||||
};
|
||||
@ -1953,6 +1962,38 @@ label_return:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
|
||||
size_t miblen, void *oldp, size_t *oldlenp, void *newp,
|
||||
size_t newlen) {
|
||||
int ret;
|
||||
if (!config_stats) {
|
||||
return ENOENT;
|
||||
}
|
||||
READONLY();
|
||||
peak_event_update(tsd);
|
||||
uint64_t result = peak_event_max(tsd);
|
||||
READ(result, uint64_t);
|
||||
ret = 0;
|
||||
label_return:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
|
||||
size_t miblen, void *oldp, size_t *oldlenp, void *newp,
|
||||
size_t newlen) {
|
||||
int ret;
|
||||
if (!config_stats) {
|
||||
return ENOENT;
|
||||
}
|
||||
NEITHER_READ_NOR_WRITE();
|
||||
peak_event_zero(tsd);
|
||||
ret = 0;
|
||||
label_return:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
|
||||
size_t miblen, void *oldp, size_t *oldlenp, void *newp,
|
||||
|
67
src/peak_event.c
Normal file
67
src/peak_event.c
Normal file
@ -0,0 +1,67 @@
|
||||
#include "jemalloc/internal/jemalloc_preamble.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_includes.h"
|
||||
|
||||
#include "jemalloc/internal/peak.h"
|
||||
#include "jemalloc/internal/peak_event.h"
|
||||
|
||||
/*
|
||||
* Update every 100k by default. We're not exposing this as a configuration
|
||||
* option for now; we don't want to bind ourselves too tightly to any particular
|
||||
* performance requirements for small values, or guarantee that we'll even be
|
||||
* able to provide fine-grained accuracy.
|
||||
*/
|
||||
#define PEAK_EVENT_WAIT (100 * 1024)
|
||||
|
||||
/* Update the peak with current tsd state. */
|
||||
void
|
||||
peak_event_update(tsd_t *tsd) {
|
||||
uint64_t alloc = tsd_thread_allocated_get(tsd);
|
||||
uint64_t dalloc = tsd_thread_deallocated_get(tsd);
|
||||
peak_t *peak = tsd_peakp_get(tsd);
|
||||
peak_update(peak, alloc, dalloc);
|
||||
}
|
||||
|
||||
/* Set current state to zero. */
|
||||
void
|
||||
peak_event_zero(tsd_t *tsd) {
|
||||
uint64_t alloc = tsd_thread_allocated_get(tsd);
|
||||
uint64_t dalloc = tsd_thread_deallocated_get(tsd);
|
||||
peak_t *peak = tsd_peakp_get(tsd);
|
||||
peak_set_zero(peak, alloc, dalloc);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
peak_event_max(tsd_t *tsd) {
|
||||
peak_t *peak = tsd_peakp_get(tsd);
|
||||
return peak_max(peak);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
peak_alloc_new_event_wait(tsd_t *tsd) {
|
||||
return PEAK_EVENT_WAIT;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
peak_alloc_postponed_event_wait(tsd_t *tsd) {
|
||||
return TE_MIN_START_WAIT;
|
||||
}
|
||||
|
||||
void
|
||||
peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
|
||||
peak_event_update(tsd);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
peak_dalloc_new_event_wait(tsd_t *tsd) {
|
||||
return PEAK_EVENT_WAIT;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
peak_dalloc_postponed_event_wait(tsd_t *tsd) {
|
||||
return TE_MIN_START_WAIT;
|
||||
}
|
||||
|
||||
void
|
||||
peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
|
||||
peak_event_update(tsd);
|
||||
}
|
@ -60,6 +60,16 @@ stats_interval_fetch_elapsed(tsd_t *tsd) {
|
||||
return last_event - last_stats_event;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
peak_alloc_fetch_elapsed(tsd_t *tsd) {
|
||||
return TE_INVALID_ELAPSED;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
peak_dalloc_fetch_elapsed(tsd_t *tsd) {
|
||||
return TE_INVALID_ELAPSED;
|
||||
}
|
||||
|
||||
/* Per event facilities done. */
|
||||
|
||||
static bool
|
||||
|
@ -955,6 +955,73 @@ TEST_BEGIN(test_thread_idle) {
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_thread_peak) {
|
||||
test_skip_if(!config_stats);
|
||||
|
||||
/*
|
||||
* We don't commit to any stable amount of accuracy for peak tracking
|
||||
* (in practice, when this test was written, we made sure to be within
|
||||
* 100k). But 10MB is big for more or less any definition of big.
|
||||
*/
|
||||
size_t big_size = 10 * 1024 * 1024;
|
||||
size_t small_size = 256;
|
||||
|
||||
void *ptr;
|
||||
int err;
|
||||
size_t sz;
|
||||
uint64_t peak;
|
||||
sz = sizeof(uint64_t);
|
||||
|
||||
err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
ptr = mallocx(SC_SMALL_MAXCLASS, 0);
|
||||
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Missed an update");
|
||||
free(ptr);
|
||||
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Freeing changed peak");
|
||||
ptr = mallocx(big_size, 0);
|
||||
free(ptr);
|
||||
/*
|
||||
* The peak should have hit big_size in the last two lines, even though
|
||||
* the net allocated bytes has since dropped back down to zero. We
|
||||
* should have noticed the peak change without having down any mallctl
|
||||
* calls while net allocated bytes was high.
|
||||
*/
|
||||
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
expect_u64_ge(peak, big_size, "Missed a peak change.");
|
||||
|
||||
/* Allocate big_size, but using small allocations. */
|
||||
size_t nallocs = big_size / small_size;
|
||||
void **ptrs = calloc(nallocs, sizeof(void *));
|
||||
err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
expect_u64_eq(0, peak, "Missed a reset.");
|
||||
for (size_t i = 0; i < nallocs; i++) {
|
||||
ptrs[i] = mallocx(small_size, 0);
|
||||
}
|
||||
for (size_t i = 0; i < nallocs; i++) {
|
||||
free(ptrs[i]);
|
||||
}
|
||||
err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
|
||||
expect_d_eq(err, 0, "");
|
||||
/*
|
||||
* We don't guarantee exactness; make sure we're within 10% of the peak,
|
||||
* though.
|
||||
*/
|
||||
expect_u64_ge(peak, nallocx(small_size, 0) * nallocs * 9 / 10,
|
||||
"Missed some peak changes.");
|
||||
expect_u64_le(peak, nallocx(small_size, 0) * nallocs * 11 / 10,
|
||||
"Overcounted peak changes.");
|
||||
free(ptrs);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(
|
||||
@ -987,5 +1054,6 @@ main(void) {
|
||||
test_stats_arenas,
|
||||
test_hooks,
|
||||
test_hooks_exhaustion,
|
||||
test_thread_idle);
|
||||
test_thread_idle,
|
||||
test_thread_peak);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user