Last-N profiling mode

This commit is contained in:
Yinan Zhang 2019-12-18 13:38:14 -08:00
parent 7a27a05940
commit 9a60cf54ec
26 changed files with 1218 additions and 44 deletions

View File

@ -126,6 +126,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/prof.c \
$(srcroot)src/prof_data.c \
$(srcroot)src/prof_log.c \
$(srcroot)src/prof_recent.c \
$(srcroot)src/rtree.c \
$(srcroot)src/safety_check.c \
$(srcroot)src/sc.c \
@ -216,6 +217,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/prof_gdump.c \
$(srcroot)test/unit/prof_idump.c \
$(srcroot)test/unit/prof_log.c \
$(srcroot)test/unit/prof_recent.c \
$(srcroot)test/unit/prof_reset.c \
$(srcroot)test/unit/prof_tctx.c \
$(srcroot)test/unit/prof_thread_name.c \

View File

@ -37,12 +37,12 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
JEMALLOC_ALWAYS_INLINE void
arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
prof_info_t *prof_info) {
prof_info_t *prof_info, bool reset_recent) {
cassert(config_prof);
assert(ptr != NULL);
assert(prof_info != NULL);
const edata_t *edata;
edata_t *edata = NULL;
bool is_slab;
/* Static check. */
@ -55,10 +55,14 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
if (unlikely(!is_slab)) {
/* edata must have been initialized at this point. */
large_prof_info_get(edata, prof_info);
assert(edata != NULL);
large_prof_info_get(tsd, edata, prof_info, reset_recent);
} else {
memset(prof_info, 0, sizeof(prof_info_t));
prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
/*
* No need to set other fields in prof_info; they will never be
* accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
*/
}
}
@ -92,11 +96,9 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
cassert(config_prof);
assert(ptr != NULL);
edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
assert(!edata_slab_get(edata));
large_prof_info_set(edata, tctx);
}

View File

@ -25,6 +25,20 @@ enum extent_head_state_e {
};
typedef enum extent_head_state_e extent_head_state_t;
struct e_prof_info_s {
/* Time when this was allocated. */
nstime_t e_prof_alloc_time;
/* Points to a prof_tctx_t. */
atomic_p_t e_prof_tctx;
/*
* Points to a prof_recent_t for the allocation; NULL
* means the recent allocation record no longer exists.
* Protected by prof_recent_alloc_mtx.
*/
atomic_p_t e_prof_recent_alloc;
};
typedef struct e_prof_info_s e_prof_info_t;
/* Extent (span of pages). Use accessor functions for e_* fields. */
typedef struct edata_s edata_t;
typedef ql_head(edata_t) edata_list_t;
@ -186,12 +200,7 @@ struct edata_s {
slab_data_t e_slab_data;
/* Profiling data, used for large objects. */
struct {
/* Time when this was allocated. */
nstime_t e_alloc_time;
/* Points to a prof_tctx_t. */
atomic_p_t e_prof_tctx;
};
e_prof_info_t e_prof_info;
};
};
@ -333,12 +342,21 @@ edata_slab_data_get_const(const edata_t *edata) {
return &edata->e_slab_data;
}
static inline void
edata_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
assert(prof_info != NULL);
prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
&edata->e_prof_tctx, ATOMIC_ACQUIRE);
prof_info->alloc_time = edata->e_alloc_time;
static inline prof_tctx_t *
edata_prof_tctx_get(const edata_t *edata) {
return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
ATOMIC_ACQUIRE);
}
static inline const nstime_t *
edata_prof_alloc_time_get(const edata_t *edata) {
return &edata->e_prof_info.e_prof_alloc_time;
}
static inline prof_recent_t *
edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
return (prof_recent_t *)atomic_load_p(
&edata->e_prof_info.e_prof_recent_alloc, ATOMIC_RELAXED);
}
static inline void
@ -457,12 +475,19 @@ edata_slab_set(edata_t *edata, bool slab) {
static inline void
edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
atomic_store_p(&edata->e_prof_tctx, tctx, ATOMIC_RELEASE);
atomic_store_p(&edata->e_prof_info.e_prof_tctx, tctx, ATOMIC_RELEASE);
}
static inline void
edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
nstime_copy(&edata->e_alloc_time, t);
nstime_copy(&edata->e_prof_info.e_prof_alloc_time, t);
}
static inline void
edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
prof_recent_t *recent_alloc) {
atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
ATOMIC_RELAXED);
}
static inline bool

View File

@ -22,7 +22,8 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
void large_dalloc(tsdn_t *tsdn, edata_t *edata);
size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
void large_prof_info_get(const edata_t *edata, prof_info_t *prof_info);
void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
bool reset_recent);
void large_prof_tctx_reset(edata_t *edata);
void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);

View File

@ -9,6 +9,8 @@ typedef struct {
uint64_t ns;
} nstime_t;
static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
void nstime_init(nstime_t *time, uint64_t ns);
void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
uint64_t nstime_ns(const nstime_t *time);
@ -35,8 +37,14 @@ bool nstime_init_update(nstime_t *time);
JEMALLOC_ALWAYS_INLINE void
nstime_init_zero(nstime_t *time) {
static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
nstime_copy(time, &zero);
}
JEMALLOC_ALWAYS_INLINE bool
nstime_equals_zero(nstime_t *time) {
int diff = nstime_compare(time, &zero);
assert(diff >= 0);
return diff == 0;
}
#endif /* JEMALLOC_INTERNAL_NSTIME_H */

View File

@ -24,6 +24,10 @@ extern char opt_prof_prefix[
#endif
1];
/* For recording recent allocations */
extern ssize_t opt_prof_recent_alloc_max;
extern malloc_mutex_t prof_recent_alloc_mtx;
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
extern bool prof_active;
@ -99,4 +103,9 @@ void prof_sample_threshold_update(tsd_t *tsd);
bool prof_log_start(tsdn_t *tsdn, const char *filename);
bool prof_log_stop(tsdn_t *tsdn);
ssize_t prof_recent_alloc_max_ctl_read();
ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
void prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
void *cbopaque);
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */

View File

@ -46,7 +46,17 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
assert(ptr != NULL);
assert(prof_info != NULL);
arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info);
arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
}
JEMALLOC_ALWAYS_INLINE void
prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
cassert(config_prof);
assert(ptr != NULL);
assert(prof_info != NULL);
arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
}
JEMALLOC_ALWAYS_INLINE void
@ -66,12 +76,12 @@ prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
}
JEMALLOC_ALWAYS_INLINE void
prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
cassert(config_prof);
assert(ptr != NULL);
assert(edata != NULL);
assert((uintptr_t)tctx > (uintptr_t)1U);
arena_prof_info_set(tsd, ptr, tctx);
arena_prof_info_set(tsd, edata, tctx);
}
JEMALLOC_ALWAYS_INLINE bool
@ -190,7 +200,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
JEMALLOC_ALWAYS_INLINE void
prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
prof_info_t prof_info;
prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
cassert(config_prof);
assert(usize == isalloc(tsd_tsdn(tsd), ptr));

View File

@ -0,0 +1,16 @@
#ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
#define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize);
void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
bool prof_recent_init();
void edata_prof_recent_alloc_init(edata_t *edata);
#ifdef JEMALLOC_JET
prof_recent_t *prof_recent_alloc_begin(tsd_t *tsd);
prof_recent_t *prof_recent_alloc_end(tsd_t *tsd);
prof_recent_t *prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node);
prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
#endif
#endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */

View File

@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
#include "jemalloc/internal/ckh.h"
#include "jemalloc/internal/edata.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/rb.h"
@ -55,6 +56,12 @@ struct prof_tctx_s {
uint64_t thr_uid;
uint64_t thr_discrim;
/*
* Reference count of how many times this tctx object is referenced in
* recent allocation / deallocation records, protected by tdata->lock.
*/
uint64_t recent_count;
/* Profiling counters, protected by tdata->lock. */
prof_cnt_t cnts;
@ -97,10 +104,10 @@ struct prof_tctx_s {
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
struct prof_info_s {
/* Points to the prof_tctx_t corresponding to the allocation. */
prof_tctx_t *alloc_tctx;
/* Time when the allocation was made. */
nstime_t alloc_time;
/* Points to the prof_tctx_t corresponding to the allocation. */
prof_tctx_t *alloc_tctx;
};
struct prof_gctx_s {
@ -201,4 +208,15 @@ struct prof_tdata_s {
};
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
struct prof_recent_s {
nstime_t alloc_time;
nstime_t dalloc_time;
prof_recent_t *next;
size_t usize;
prof_tctx_t *alloc_tctx;
edata_t *alloc_edata; /* NULL means allocation has been freed. */
prof_tctx_t *dalloc_tctx;
};
#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */

View File

@ -8,6 +8,7 @@ typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_info_s prof_info_t;
typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
typedef struct prof_recent_s prof_recent_t;
/* Option defaults. */
#ifdef JEMALLOC_PROF
@ -53,4 +54,7 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_DUMP_FILENAME_LEN 1
#endif
/* Default number of recent allocations to record. */
#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */

View File

@ -61,6 +61,7 @@
#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_RECENT_ALLOC WITNESS_RANK_LEAF
/******************************************************************************/
/* PER-WITNESS DATA */

View File

@ -67,6 +67,7 @@
<ClCompile Include="..\..\..\..\src\prof.c" />
<ClCompile Include="..\..\..\..\src\prof_data.c" />
<ClCompile Include="..\..\..\..\src\prof_log.c" />
<ClCompile Include="..\..\..\..\src\prof_recent.c" />
<ClCompile Include="..\..\..\..\src\rtree.c" />
<ClCompile Include="..\..\..\..\src\safety_check.c" />
<ClCompile Include="..\..\..\..\src\sc.c" />

View File

@ -82,6 +82,9 @@
<ClCompile Include="..\..\..\..\src\prof_log.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\prof_recent.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\rtree.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -67,6 +67,7 @@
<ClCompile Include="..\..\..\..\src\prof.c" />
<ClCompile Include="..\..\..\..\src\prof_data.c" />
<ClCompile Include="..\..\..\..\src\prof_log.c" />
<ClCompile Include="..\..\..\..\src\prof_recent.c" />
<ClCompile Include="..\..\..\..\src\rtree.c" />
<ClCompile Include="..\..\..\..\src\safety_check.c" />
<ClCompile Include="..\..\..\..\src\sc.c" />

View File

@ -82,6 +82,9 @@
<ClCompile Include="..\..\..\..\src\prof_log.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\prof_recent.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\..\..\src\rtree.c">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -113,6 +113,7 @@ CTL_PROTO(opt_prof_gdump)
CTL_PROTO(opt_prof_final)
CTL_PROTO(opt_prof_leak)
CTL_PROTO(opt_prof_accum)
CTL_PROTO(opt_prof_recent_alloc_max)
CTL_PROTO(opt_zero_realloc)
CTL_PROTO(tcache_create)
CTL_PROTO(tcache_flush)
@ -232,6 +233,7 @@ CTL_PROTO(experimental_utilization_query)
CTL_PROTO(experimental_utilization_batch_query)
CTL_PROTO(experimental_arenas_i_pactivep)
INDEX_PROTO(experimental_arenas_i)
CTL_PROTO(experimental_prof_recent_alloc_max)
#define MUTEX_STATS_CTL_PROTO_GEN(n) \
CTL_PROTO(stats_##n##_num_ops) \
@ -343,6 +345,7 @@ static const ctl_named_node_t opt_node[] = {
{NAME("prof_final"), CTL(opt_prof_final)},
{NAME("prof_leak"), CTL(opt_prof_leak)},
{NAME("prof_accum"), CTL(opt_prof_accum)},
{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
{NAME("zero_realloc"), CTL(opt_zero_realloc)}
};
@ -620,10 +623,15 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
{INDEX(experimental_arenas_i)}
};
static const ctl_named_node_t experimental_prof_recent_node[] = {
{NAME("alloc_max"), CTL(experimental_prof_recent_alloc_max)},
};
static const ctl_named_node_t experimental_node[] = {
{NAME("hooks"), CHILD(named, experimental_hooks)},
{NAME("utilization"), CHILD(named, experimental_utilization)},
{NAME("arenas"), CHILD(indexed, experimental_arenas)}
{NAME("arenas"), CHILD(indexed, experimental_arenas)},
{NAME("prof_recent"), CHILD(named, experimental_prof_recent)}
};
static const ctl_named_node_t root_node[] = {
@ -1791,6 +1799,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
opt_prof_recent_alloc_max, ssize_t)
CTL_RO_NL_GEN(opt_zero_realloc,
zero_realloc_mode_names[opt_zero_realloc_action], const char *)
@ -3461,3 +3471,36 @@ label_return:
malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
return ret;
}
static int
experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
int ret;
if (!(config_prof && opt_prof)) {
ret = ENOENT;
goto label_return;
}
ssize_t old_max;
if (newp != NULL) {
if (newlen != sizeof(ssize_t)) {
ret = EINVAL;
goto label_return;
}
ssize_t max = *(ssize_t *)newp;
if (max < -1) {
ret = EINVAL;
goto label_return;
}
old_max = prof_recent_alloc_max_ctl_write(tsd, max);
} else {
old_max = prof_recent_alloc_max_ctl_read();
}
READ(old_max, ssize_t);
ret = 0;
label_return:
return ret;
}

View File

@ -1562,6 +1562,8 @@ extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
bool
extent_boot(void) {
assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
if (rtree_new(&extents_rtree, true)) {
return true;
}

View File

@ -1402,6 +1402,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
"prof_recent_alloc_max", -1, SSIZE_MAX)
}
if (config_log) {
if (CONF_MATCH("log")) {
@ -3015,7 +3017,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
prof_info_t old_prof_info;
prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
bool prof_active = prof_active_get_unlocked();
prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
void *p;
@ -3265,8 +3267,13 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
JEMALLOC_ALWAYS_INLINE size_t
ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
/*
* old_prof_info is only used for asserting that the profiling info
* isn't changed by the ixalloc() call.
*/
prof_info_t old_prof_info;
prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
/*
* usize isn't knowable before ixalloc() returns when extra is non-zero.
* Therefore, compute its maximum possible value and use that in
@ -3315,13 +3322,26 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
*/
thread_event(tsd, usize - usize_max);
}
if (usize == old_usize) {
prof_alloc_rollback(tsd, tctx, false);
return usize;
}
prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
&old_prof_info);
/*
* At this point we can still safely get the original profiling
* information associated with the ptr, because (a) the edata_t object
* associated with the ptr still lives and (b) the profiling info
* fields are not touched. "(a)" is asserted in the outer je_xallocx()
* function, and "(b)" is indirectly verified below by checking that
* the alloc_tctx field is unchanged.
*/
prof_info_t prof_info;
if (usize == old_usize) {
prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
prof_alloc_rollback(tsd, tctx, false);
} else {
prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
old_usize, &prof_info);
}
assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);
return usize;
}
@ -3342,6 +3362,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
tsd = tsd_fetch();
check_entry_exit_locking(tsd_tsdn(tsd));
/*
* old_edata is only for verifying that xallocx() keeps the edata_t
* object associated with the ptr (though the content of the edata_t
* object can be changed).
*/
edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
alloc_ctx_t alloc_ctx;
rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
@ -3374,6 +3401,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
extra, alignment, zero);
thread_event(tsd, usize);
}
/*
* xallocx() should keep using the same edata_t object (though its
* content can be changed).
*/
assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
if (unlikely(usize == old_usize)) {
thread_event_rollback(tsd, usize);
goto label_not_resized;

View File

@ -5,6 +5,7 @@
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/extent_mmap.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prof_recent.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/util.h"
@ -368,8 +369,22 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
}
void
large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
edata_prof_info_get(edata, prof_info);
large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
bool reset_recent) {
assert(prof_info != NULL);
nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
prof_info->alloc_tctx = alloc_tctx;
if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
/*
* This allocation was a prof sample. Reset the pointer on the
* recent allocation record, so that this allocation is
* recorded as released.
*/
prof_recent_alloc_reset(tsd, edata);
}
}
static void
@ -388,4 +403,5 @@ large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
nstime_t t;
nstime_init_update(&t);
edata_prof_alloc_time_set(edata, &t);
edata_prof_recent_alloc_init(edata);
}

View File

@ -7,6 +7,7 @@
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prof_data.h"
#include "jemalloc/internal/prof_log.h"
#include "jemalloc/internal/prof_recent.h"
#include "jemalloc/internal/thread_event.h"
/*
@ -146,7 +147,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
void
prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
prof_tctx_t *tctx) {
prof_info_set(tsd, ptr, tctx);
edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
prof_info_set(tsd, edata, tctx);
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
tctx->cnts.curobjs++;
@ -155,8 +157,13 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
tctx->cnts.accumobjs++;
tctx->cnts.accumbytes += usize;
}
bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
tctx->prepared = false;
malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
if (record_recent) {
assert(tctx == edata_prof_tctx_get(edata));
prof_recent_alloc(tsd, edata, usize);
}
}
void
@ -1068,6 +1075,10 @@ prof_boot2(tsd_t *tsd) {
return true;
}
if (prof_recent_init()) {
return true;
}
gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
CACHELINE);
@ -1134,6 +1145,7 @@ prof_prefork1(tsdn_t *tsdn) {
malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
}
}
@ -1142,6 +1154,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
if (config_prof && opt_prof) {
unsigned i;
malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
malloc_mutex_postfork_parent(tsdn,
&prof_thread_active_init_mtx);
malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
@ -1166,6 +1179,7 @@ prof_postfork_child(tsdn_t *tsdn) {
if (config_prof && opt_prof) {
unsigned i;
malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);

View File

@ -378,6 +378,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
ret.p->tdata = tdata;
ret.p->thr_uid = tdata->thr_uid;
ret.p->thr_discrim = tdata->thr_discrim;
ret.p->recent_count = 0;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
ret.p->gctx = gctx;
ret.p->tctx_uid = tdata->tctx_uid_next++;
@ -405,8 +406,15 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
prof_tctx_t *
prof_tctx_create(tsd_t *tsd) {
prof_tdata_t *tdata = prof_tdata_get(tsd, false);
assert(tdata != NULL);
if (tsd_reentrancy_level_get(tsd) > 0) {
return NULL;
}
prof_tdata_t *tdata = prof_tdata_get(tsd, true);
if (tdata == NULL) {
return NULL;
}
prof_bt_t bt;
bt_init(&bt, tdata->vec);
prof_backtrace(tsd, &bt);
@ -1417,6 +1425,9 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
if (tctx->prepared) {
return false;
}
if (tctx->recent_count != 0) {
return false;
}
return true;
}

553
src/prof_recent.c Normal file
View File

@ -0,0 +1,553 @@
#define JEMALLOC_PROF_RECENT_C_
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/emitter.h"
#include "jemalloc/internal/prof_data.h"
#include "jemalloc/internal/prof_recent.h"
#ifndef JEMALLOC_JET
# define STATIC_INLINE_IF_NOT_TEST static inline
#else
# define STATIC_INLINE_IF_NOT_TEST
#endif
ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
static atomic_zd_t prof_recent_alloc_max;
static ssize_t prof_recent_alloc_count = 0;
static prof_recent_t *prof_recent_alloc_dummy = NULL;
static void
prof_recent_alloc_max_init() {
atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
ATOMIC_RELAXED);
}
static inline ssize_t
prof_recent_alloc_max_get_no_lock() {
return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
}
static inline ssize_t
prof_recent_alloc_max_get(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
return prof_recent_alloc_max_get_no_lock();
}
static inline ssize_t
prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
ssize_t old_max = prof_recent_alloc_max_get(tsd);
atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
return old_max;
}
static inline void
increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
++tctx->recent_count;
assert(tctx->recent_count > 0);
}
bool
prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
assert(opt_prof && prof_booted);
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
/*
* Check whether last-N mode is turned on without trying to acquire the
* lock, so as to optimize for the following two scenarios:
* (1) Last-N mode is switched off;
* (2) Dumping, during which last-N mode is temporarily turned off so
* as not to block sampled allocations.
*/
if (prof_recent_alloc_max_get_no_lock() == 0) {
return false;
}
/*
* Increment recent_count to hold the tctx so that it won't be gone
* even after tctx->tdata->lock is released. This acts as a
* "placeholder"; the real recording of the allocation requires a lock
* on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
* tctx->tdata->lock has been released).
*/
increment_recent_count(tsd, tctx);
return true;
}
static void
decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(tctx != NULL);
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
assert(tctx->recent_count > 0);
--tctx->recent_count;
prof_tctx_try_destroy(tsd, tctx);
}
void
edata_prof_recent_alloc_init(edata_t *edata) {
edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
}
static inline prof_recent_t *
edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
return edata_prof_recent_alloc_get_dont_call_directly(edata);
}
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_t *recent_alloc =
edata_prof_recent_alloc_get_no_lock(edata);
assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
return recent_alloc;
}
static prof_recent_t *
edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
prof_recent_t *recent_alloc) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_t *old_recent_alloc =
edata_prof_recent_alloc_get(tsd, edata);
edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
return old_recent_alloc;
}
static void
edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
prof_recent_t *recent_alloc) {
assert(recent_alloc != NULL);
prof_recent_t *old_recent_alloc =
edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
assert(old_recent_alloc == NULL);
recent_alloc->alloc_edata = edata;
}
static void
edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
prof_recent_t *recent_alloc) {
assert(recent_alloc != NULL);
prof_recent_t *old_recent_alloc =
edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
assert(old_recent_alloc == recent_alloc);
assert(edata == recent_alloc->alloc_edata);
recent_alloc->alloc_edata = NULL;
}
/*
* This function should be called right before an allocation is released, so
* that the associated recent allocation record can contain the following
* information:
* (1) The allocation is released;
* (2) The time of the deallocation; and
* (3) The prof_tctx associated with the deallocation.
*/
void
prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
/*
* Check whether the recent allocation record still exists without
* trying to acquire the lock.
*/
if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
return;
}
prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
/*
* In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
* deallocation time / tctx, which is handled later, after we check
* again when holding the lock.
*/
if (dalloc_tctx != NULL) {
malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
increment_recent_count(tsd, dalloc_tctx);
dalloc_tctx->prepared = false;
malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
}
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
/* Check again after acquiring the lock. */
prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
if (recent != NULL) {
edata_prof_recent_alloc_reset(tsd, edata, recent);
assert(nstime_equals_zero(&recent->dalloc_time));
assert(recent->dalloc_tctx == NULL);
if (dalloc_tctx != NULL) {
nstime_update(&recent->dalloc_time);
recent->dalloc_tctx = dalloc_tctx;
}
} else if (dalloc_tctx != NULL) {
/* We lost the rase - the allocation record was just gone. */
decrement_recent_count(tsd, dalloc_tctx);
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
}
static void
prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
if (recent->alloc_edata != NULL) {
edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
}
}
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
prof_recent_alloc_begin(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(prof_recent_alloc_dummy != NULL);
return prof_recent_alloc_dummy->next;
}
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
prof_recent_alloc_end(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(prof_recent_alloc_dummy != NULL);
return prof_recent_alloc_dummy;
}
STATIC_INLINE_IF_NOT_TEST prof_recent_t *
prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(prof_recent_alloc_dummy != NULL);
assert(node != NULL && node != prof_recent_alloc_dummy);
return node->next;
}
static bool
prof_recent_alloc_is_empty(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
assert(prof_recent_alloc_count == 0);
return true;
} else {
assert(prof_recent_alloc_count > 0);
return false;
}
}
static void
prof_recent_alloc_assert_count(tsd_t *tsd) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
if (config_debug) {
ssize_t count = 0;
prof_recent_t *n = prof_recent_alloc_begin(tsd);
while (n != prof_recent_alloc_end(tsd)) {
++count;
n = prof_recent_alloc_next(tsd, n);
}
assert(count == prof_recent_alloc_count);
assert(prof_recent_alloc_max_get(tsd) == -1 ||
count <= prof_recent_alloc_max_get(tsd));
}
}
void
prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
assert(edata != NULL);
prof_tctx_t *tctx = edata_prof_tctx_get(edata);
malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_alloc_assert_count(tsd);
/*
* Reserve a new prof_recent_t node if needed. If needed, we release
* the prof_recent_alloc_mtx lock and allocate. Then, rather than
* immediately checking for OOM, we regain the lock and try to make use
* of the reserve node if needed. There are six scenarios:
*
* \ now | no need | need but OOMed | need and allocated
* later \ | | |
* ------------------------------------------------------------
* no need | (1) | (2) | (3)
* ------------------------------------------------------------
* need | (4) | (5) | (6)
*
* First, "(4)" never happens, because we don't release the lock in the
* middle if there's no need for a new node; in such cases "(1)" always
* takes place, which is trivial.
*
* Out of the remaining four scenarios, "(6)" is the common case and is
* trivial. "(5)" is also trivial, in which case we'll rollback the
* effect of prof_recent_alloc_prepare() as expected.
*
* "(2)" / "(3)" occurs when the need for a new node is gone after we
* regain the lock. If the new node is successfully allocated, i.e. in
* the case of "(3)", we'll release it in the end; otherwise, i.e. in
* the case of "(2)", we do nothing - we're lucky that the OOM ends up
* doing no harm at all.
*
* Therefore, the only performance cost of the "release lock" ->
* "allocate" -> "regain lock" design is the "(3)" case, but it happens
* very rarely, so the cost is relatively small compared to the gain of
* not having to have the lock order of prof_recent_alloc_mtx above all
* the allocation locks.
*/
prof_recent_t *reserve = NULL;
if (prof_recent_alloc_max_get(tsd) == -1 ||
prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
assert(prof_recent_alloc_max_get(tsd) != 0);
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
true);
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_alloc_assert_count(tsd);
}
if (prof_recent_alloc_max_get(tsd) == 0) {
assert(prof_recent_alloc_is_empty(tsd));
goto label_rollback;
}
assert(prof_recent_alloc_dummy != NULL);
{
/* Fill content into the dummy node. */
prof_recent_t *node = prof_recent_alloc_dummy;
node->usize = usize;
nstime_copy(&node->alloc_time,
edata_prof_alloc_time_get(edata));
node->alloc_tctx = tctx;
edata_prof_recent_alloc_set(tsd, edata, node);
nstime_init_zero(&node->dalloc_time);
node->dalloc_tctx = NULL;
}
prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
/* If upper limit is reached, simply shift the dummy. */
assert(prof_recent_alloc_max_get(tsd) != -1);
assert(!prof_recent_alloc_is_empty(tsd));
prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
assert(old_alloc_tctx != NULL);
old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
} else {
/* Otherwise use the new node as the dummy. */
assert(prof_recent_alloc_max_get(tsd) == -1 ||
prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
if (reserve == NULL) {
goto label_rollback;
}
reserve->next = prof_recent_alloc_dummy->next;
prof_recent_alloc_dummy->next = reserve;
prof_recent_alloc_dummy = reserve;
reserve = NULL;
old_alloc_tctx = NULL;
old_dalloc_tctx = NULL;
++prof_recent_alloc_count;
}
assert(!prof_recent_alloc_is_empty(tsd));
prof_recent_alloc_assert_count(tsd);
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
if (reserve != NULL) {
idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
}
/*
* Asynchronously handle the tctx of the old node, so that there's no
* simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
* In the worst case this may delay the tctx release but it's better
* than holding prof_recent_alloc_mtx for longer.
*/
if (old_alloc_tctx != NULL) {
decrement_recent_count(tsd, old_alloc_tctx);
}
if (old_dalloc_tctx != NULL) {
decrement_recent_count(tsd, old_dalloc_tctx);
}
return;
label_rollback:
assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
prof_recent_alloc_assert_count(tsd);
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
if (reserve != NULL) {
idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
}
decrement_recent_count(tsd, tctx);
}
ssize_t
prof_recent_alloc_max_ctl_read() {
/* Don't bother to acquire the lock. */
return prof_recent_alloc_max_get_no_lock();
}
ssize_t
prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
assert(max >= -1);
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_alloc_assert_count(tsd);
const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
if (max == -1 || prof_recent_alloc_count <= max) {
/* Easy case - no need to alter the list. */
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
return old_max;
}
prof_recent_t *begin = prof_recent_alloc_dummy->next;
/* For verification purpose only. */
ssize_t count = prof_recent_alloc_count - max;
do {
assert(!prof_recent_alloc_is_empty(tsd));
prof_recent_t *node = prof_recent_alloc_dummy->next;
assert(node != prof_recent_alloc_dummy);
prof_recent_alloc_evict_edata(tsd, node);
prof_recent_alloc_dummy->next = node->next;
--prof_recent_alloc_count;
} while (prof_recent_alloc_count > max);
prof_recent_t *end = prof_recent_alloc_dummy->next;
assert(begin != end);
prof_recent_alloc_assert_count(tsd);
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
/*
* Asynchronously handle the tctx of the to-be-deleted nodes, so that
* there's no simultaneous holdings of prof_recent_alloc_mtx and
* tdata->lock. In the worst case there can be slightly extra space
* overhead taken by these nodes, but the total number of nodes at any
* time is bounded by (max + sum(decreases)), where "max" means the
* most recent prof_recent_alloc_max and "sum(decreases)" means the
* sum of the deltas of all decreases in prof_recent_alloc_max in the
* past. This (max + sum(decreases)) value is completely transparent
* to and controlled by application.
*/
do {
prof_recent_t *node = begin;
decrement_recent_count(tsd, node->alloc_tctx);
if (node->dalloc_tctx != NULL) {
decrement_recent_count(tsd, node->dalloc_tctx);
}
begin = node->next;
idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
--count;
} while (begin != end);
assert(count == 0);
return old_max;
}
static void
dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
char bt_buf[2 * sizeof(intptr_t) + 3];
char *s = bt_buf;
assert(tctx != NULL);
prof_bt_t *bt = &tctx->gctx->bt;
for (size_t i = 0; i < bt->len; ++i) {
malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
emitter_json_value(emitter, emitter_type_string, &s);
}
}
#define PROF_RECENT_PRINT_BUFSIZE 4096
void
prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
void *cbopaque) {
char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
arena_get(tsd_tsdn(tsd), 0, false), true);
buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
PROF_RECENT_PRINT_BUFSIZE - 1, 0};
emitter_t emitter;
emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
&buf_arg);
emitter_begin(&emitter);
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_alloc_assert_count(tsd);
/*
* Set prof_recent_alloc_max to 0 so that dumping won't block sampled
* allocations: the allocations can complete but will not be recorded.
*/
ssize_t max = prof_recent_alloc_max_update(tsd, 0);
emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
emitter_json_array_kv_begin(&emitter, "recent_alloc");
for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
emitter_json_object_begin(&emitter);
emitter_json_kv(&emitter, "usize", emitter_type_size,
&n->usize);
bool released = n->alloc_edata == NULL;
emitter_json_kv(&emitter, "released", emitter_type_bool,
&released);
emitter_json_kv(&emitter, "alloc_thread_uid",
emitter_type_uint64, &n->alloc_tctx->thr_uid);
uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
&alloc_time_ns);
emitter_json_array_kv_begin(&emitter, "alloc_trace");
dump_bt(&emitter, n->alloc_tctx);
emitter_json_array_end(&emitter);
if (n->dalloc_tctx != NULL) {
assert(released);
emitter_json_kv(&emitter, "dalloc_thread_uid",
emitter_type_uint64, &n->dalloc_tctx->thr_uid);
assert(!nstime_equals_zero(&n->dalloc_time));
uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
emitter_json_kv(&emitter, "dalloc_time",
emitter_type_uint64, &dalloc_time_ns);
emitter_json_array_kv_begin(&emitter, "dalloc_trace");
dump_bt(&emitter, n->dalloc_tctx);
emitter_json_array_end(&emitter);
} else {
assert(nstime_equals_zero(&n->dalloc_time));
}
emitter_json_object_end(&emitter);
}
emitter_json_array_end(&emitter);
max = prof_recent_alloc_max_update(tsd, max);
assert(max == 0);
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
emitter_end(&emitter);
buf_writer_flush(&buf_arg);
idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
}
#undef PROF_RECENT_PRINT_BUFSIZE
bool
prof_recent_init() {
prof_recent_alloc_max_init();
if (malloc_mutex_init(&prof_recent_alloc_mtx,
"prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
malloc_mutex_rank_exclusive)) {
return true;
}
assert(prof_recent_alloc_dummy == NULL);
prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
TSDN_NULL, sizeof(prof_recent_t),
sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
arena_get(TSDN_NULL, 0, true), true);
if (prof_recent_alloc_dummy == NULL) {
return true;
}
prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
return false;
}

View File

@ -188,6 +188,7 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(bool, prof_gdump, prof);
TEST_MALLCTL_OPT(bool, prof_final, prof);
TEST_MALLCTL_OPT(bool, prof_leak, prof);
TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
#undef TEST_MALLCTL_OPT
}

391
test/unit/prof_recent.c Normal file
View File

@ -0,0 +1,391 @@
#include "test/jemalloc_test.h"
#include "jemalloc/internal/prof_recent.h"
/* As specified in the shell script */
#define OPT_ALLOC_MAX 3
/* Invariant before and after every test (when config_prof is on) */
static void confirm_prof_setup(tsd_t *tsd) {
/* Options */
assert_true(opt_prof, "opt_prof not on");
assert_true(opt_prof_active, "opt_prof_active not on");
assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
"opt_prof_recent_alloc_max not set correctly");
/* Dynamics */
assert_true(prof_active, "prof_active not on");
assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
"prof_recent_alloc_max not set correctly");
}
TEST_BEGIN(test_confirm_setup) {
test_skip_if(!config_prof);
confirm_prof_setup(tsd_fetch());
}
TEST_END
TEST_BEGIN(test_prof_recent_off) {
test_skip_if(config_prof);
const ssize_t past_ref = 0, future_ref = 0;
const size_t len_ref = sizeof(ssize_t);
ssize_t past = past_ref, future = future_ref;
size_t len = len_ref;
#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do { \
assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c, \
d), ENOENT, "Should return ENOENT when config_prof is off");\
assert_zd_eq(past, past_ref, "output was touched"); \
assert_zu_eq(len, len_ref, "output length was touched"); \
assert_zd_eq(future, future_ref, "input was touched"); \
} while (0)
ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, &future, len);
ASSERT_SHOULD_FAIL("alloc_max", &past, &len, &future, len);
#undef ASSERT_SHOULD_FAIL
}
TEST_END
TEST_BEGIN(test_prof_recent_on) {
test_skip_if(!config_prof);
ssize_t past, future;
size_t len = sizeof(ssize_t);
tsd_t *tsd = tsd_fetch();
confirm_prof_setup(tsd);
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
confirm_prof_setup(tsd);
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
&past, &len, NULL, 0), 0, "Read error");
assert_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
future = OPT_ALLOC_MAX + 1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, len), 0, "Write error");
future = -1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
&past, &len, &future, len), 0, "Read/write error");
assert_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
future = -2;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
&past, &len, &future, len), EINVAL,
"Invalid write should return EINVAL");
assert_zd_eq(past, OPT_ALLOC_MAX + 1,
"Output should not be touched given invalid write");
future = OPT_ALLOC_MAX;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
&past, &len, &future, len), 0, "Read/write error");
assert_zd_eq(past, -1, "Wrong read result");
future = OPT_ALLOC_MAX + 2;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
&past, &len, &future, len * 2), EINVAL,
"Invalid write should return EINVAL");
assert_zd_eq(past, -1,
"Output should not be touched given invalid write");
confirm_prof_setup(tsd);
}
TEST_END
/* Reproducible sequence of request sizes */
#define NTH_REQ_SIZE(n) ((n) * 97 + 101)
static void confirm_malloc(tsd_t *tsd, void *p) {
assert_ptr_not_null(p, "malloc failed unexpectedly");
edata_t *e = iealloc(TSDN_NULL, p);
assert_ptr_not_null(e, "NULL edata for living pointer");
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
assert_ptr_not_null(n, "Record in edata should not be NULL");
assert_ptr_not_null(n->alloc_tctx,
"alloc_tctx in record should not be NULL");
assert_ptr_eq(e, n->alloc_edata,
"edata pointer in record is not correct");
assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
}
static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_zu_eq(n->usize, sz_s2u(NTH_REQ_SIZE(kth)),
"Recorded allocation usize is wrong");
}
static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_ptr_not_null(n->alloc_tctx,
"alloc_tctx in record should not be NULL");
assert_ptr_not_null(n->alloc_edata,
"Recorded edata should not be NULL for living pointer");
assert_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
"Record in edata is not correct");
assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
}
static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_ptr_not_null(n->alloc_tctx,
"alloc_tctx in record should not be NULL");
assert_ptr_null(n->alloc_edata,
"Recorded edata should be NULL for released pointer");
assert_ptr_not_null(n->dalloc_tctx,
"dalloc_tctx in record should not be NULL for released pointer");
}
TEST_BEGIN(test_prof_recent_alloc) {
test_skip_if(!config_prof);
bool b;
unsigned i, c;
size_t req_size;
void *p;
prof_recent_t *n;
ssize_t future;
tsd_t *tsd = tsd_fetch();
confirm_prof_setup(tsd);
/*
* First batch of 2 * OPT_ALLOC_MAX allocations. After the
* (OPT_ALLOC_MAX - 1)'th allocation the recorded allocations should
* always be the last OPT_ALLOC_MAX allocations coming from here.
*/
for (i = 0; i < 2 * OPT_ALLOC_MAX; ++i) {
req_size = NTH_REQ_SIZE(i);
p = malloc(req_size);
confirm_malloc(tsd, p);
if (i < OPT_ALLOC_MAX - 1) {
malloc_mutex_lock(tsd_tsdn(tsd),
&prof_recent_alloc_mtx);
assert_ptr_ne(prof_recent_alloc_begin(tsd),
prof_recent_alloc_end(tsd),
"Empty recent allocation");
malloc_mutex_unlock(tsd_tsdn(tsd),
&prof_recent_alloc_mtx);
free(p);
/*
* The recorded allocations may still include some
* other allocations before the test run started,
* so keep allocating without checking anything.
*/
continue;
}
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
++c;
confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
if (c == OPT_ALLOC_MAX) {
confirm_record_living(tsd, n);
} else {
confirm_record_released(tsd, n);
}
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX,
"Incorrect total number of allocations");
free(p);
}
confirm_prof_setup(tsd);
b = false;
assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
"mallctl for turning off prof_active failed");
/*
* Second batch of OPT_ALLOC_MAX allocations. Since prof_active is
* turned off, this batch shouldn't be recorded.
*/
for (; i < 3 * OPT_ALLOC_MAX; ++i) {
req_size = NTH_REQ_SIZE(i);
p = malloc(req_size);
assert_ptr_not_null(p, "malloc failed unexpectedly");
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
confirm_record_released(tsd, n);
++c;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX,
"Incorrect total number of allocations");
free(p);
}
b = true;
assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
"mallctl for turning on prof_active failed");
confirm_prof_setup(tsd);
/*
* Third batch of OPT_ALLOC_MAX allocations. Since prof_active is
* turned back on, they should be recorded, and in the list of recorded
* allocations they should follow the first batch rather than the
* second batch.
*/
for (; i < 4 * OPT_ALLOC_MAX; ++i) {
req_size = NTH_REQ_SIZE(i);
p = malloc(req_size);
confirm_malloc(tsd, p);
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
++c;
confirm_record_size(tsd, n,
/* Is the allocation from the third batch? */
i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
/* If yes, then it's just recorded. */
i + c - OPT_ALLOC_MAX :
/*
* Otherwise, it should come from the first batch
* instead of the second batch.
*/
i + c - 2 * OPT_ALLOC_MAX);
if (c == OPT_ALLOC_MAX) {
confirm_record_living(tsd, n);
} else {
confirm_record_released(tsd, n);
}
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX,
"Incorrect total number of allocations");
free(p);
}
/* Increasing the limit shouldn't alter the list of records. */
future = OPT_ALLOC_MAX + 1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
confirm_record_released(tsd, n);
++c;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX,
"Incorrect total number of allocations");
/*
* Decreasing the limit shouldn't alter the list of records as long as
* the new limit is still no less than the length of the list.
*/
future = OPT_ALLOC_MAX;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
confirm_record_released(tsd, n);
++c;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX,
"Incorrect total number of allocations");
/*
* Decreasing the limit should shorten the list of records if the new
* limit is less than the length of the list.
*/
future = OPT_ALLOC_MAX - 1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
++c;
confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
confirm_record_released(tsd, n);
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX - 1,
"Incorrect total number of allocations");
/* Setting to unlimited shouldn't alter the list of records. */
future = -1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
c = 0;
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
for (n = prof_recent_alloc_begin(tsd);
n != prof_recent_alloc_end(tsd);
n = prof_recent_alloc_next(tsd, n)) {
++c;
confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
confirm_record_released(tsd, n);
}
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert_u_eq(c, OPT_ALLOC_MAX - 1,
"Incorrect total number of allocations");
/* Downshift to only one record. */
future = 1;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
n = prof_recent_alloc_begin(tsd);
assert(n != prof_recent_alloc_end(tsd));
confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
confirm_record_released(tsd, n);
n = prof_recent_alloc_next(tsd, n);
assert(n == prof_recent_alloc_end(tsd));
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
/* Completely turn off. */
future = 0;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
/* Restore the settings. */
future = OPT_ALLOC_MAX;
assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
confirm_prof_setup(tsd);
}
TEST_END
#undef NTH_REQ_SIZE
int
main(void) {
return test(
test_confirm_setup,
test_prof_recent_off,
test_prof_recent_on,
test_prof_recent_alloc);
}

5
test/unit/prof_recent.sh Normal file
View File

@ -0,0 +1,5 @@
#!/bin/sh
if [ "x${enable_prof}" = "x1" ] ; then
export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_recent_alloc_max:3"
fi

View File

@ -1,5 +1,5 @@
#!/bin/sh
if [ "x${enable_prof}" = "x1" ] ; then
export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0,prof_recent_alloc_max:0"
fi