From 40fa4d29d3e938765d0b608f92701410ce90b887 Mon Sep 17 00:00:00 2001 From: Yinan Zhang Date: Fri, 18 Dec 2020 17:14:59 -0800 Subject: [PATCH] Track per size class internal fragmentation --- Makefile.in | 2 + include/jemalloc/internal/prof_externs.h | 3 + include/jemalloc/internal/prof_stats.h | 17 ++++ include/jemalloc/internal/witness.h | 1 + .../projects/vc2015/jemalloc/jemalloc.vcxproj | 1 + .../vc2015/jemalloc/jemalloc.vcxproj.filters | 3 + .../projects/vc2017/jemalloc/jemalloc.vcxproj | 1 + .../vc2017/jemalloc/jemalloc.vcxproj.filters | 3 + src/ctl.c | 3 + src/jemalloc.c | 1 + src/prof.c | 31 +++++-- src/prof_stats.c | 57 +++++++++++++ test/unit/mallctl.c | 1 + test/unit/prof_stats.c | 80 +++++++++++++++++++ test/unit/prof_stats.sh | 5 ++ 15 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 include/jemalloc/internal/prof_stats.h create mode 100644 src/prof_stats.c create mode 100644 test/unit/prof_stats.c create mode 100644 test/unit/prof_stats.sh diff --git a/Makefile.in b/Makefile.in index ba6dd763..3cb3161e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -139,6 +139,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/prof_data.c \ $(srcroot)src/prof_log.c \ $(srcroot)src/prof_recent.c \ + $(srcroot)src/prof_stats.c \ $(srcroot)src/prof_sys.c \ $(srcroot)src/psset.c \ $(srcroot)src/rtree.c \ @@ -248,6 +249,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/prof_mdump.c \ $(srcroot)test/unit/prof_recent.c \ $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_stats.c \ $(srcroot)test/unit/prof_tctx.c \ $(srcroot)test/unit/prof_thread_name.c \ $(srcroot)test/unit/prof_sys_thread_name.c \ diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index b94fbed3..671ac9b8 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -27,6 +27,9 @@ extern ssize_t opt_prof_recent_alloc_max; /* Whether to use thread name provided by the system or by mallctl. */ extern bool opt_prof_sys_thread_name; +/* Whether to record per size class counts and request size totals. */ +extern bool opt_prof_stats; + /* Accessed via prof_active_[gs]et{_unlocked,}(). */ extern bool prof_active; diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h new file mode 100644 index 00000000..7954e82d --- /dev/null +++ b/include/jemalloc/internal/prof_stats.h @@ -0,0 +1,17 @@ +#ifndef JEMALLOC_INTERNAL_PROF_STATS_H +#define JEMALLOC_INTERNAL_PROF_STATS_H + +typedef struct prof_stats_s prof_stats_t; +struct prof_stats_s { + uint64_t req_sum; + uint64_t count; +}; + +extern malloc_mutex_t prof_stats_mtx; + +void prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size); +void prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size); +void prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats); +void prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats); + +#endif /* JEMALLOC_INTERNAL_PROF_STATS_H */ diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h index 662907c8..66dcf664 100644 --- a/include/jemalloc/internal/witness.h +++ b/include/jemalloc/internal/witness.h @@ -73,6 +73,7 @@ enum witness_rank_e { WITNESS_RANK_PROF_GDUMP = WITNESS_RANK_LEAF, WITNESS_RANK_PROF_NEXT_THR_UID = WITNESS_RANK_LEAF, WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF, + WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF, WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF, }; typedef enum witness_rank_e witness_rank_t; diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 531dd9a6..9443ac55 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -80,6 +80,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index f031fb10..3c4bff62 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -124,6 +124,9 @@ Source Files + + Source Files + Source Files diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj index bc64de5c..fafb4914 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj @@ -80,6 +80,7 @@ + diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters index f031fb10..3c4bff62 100644 --- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters @@ -124,6 +124,9 @@ Source Files + + Source Files + Source Files diff --git a/src/ctl.c b/src/ctl.c index 8f6aff3e..598759cd 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -136,6 +136,7 @@ CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_prof_recent_alloc_max) +CTL_PROTO(opt_prof_stats) CTL_PROTO(opt_prof_sys_thread_name) CTL_PROTO(opt_prof_time_res) CTL_PROTO(opt_zero_realloc) @@ -415,6 +416,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, {NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)}, + {NAME("prof_stats"), CTL(opt_prof_stats)}, {NAME("prof_sys_thread_name"), CTL(opt_prof_sys_thread_name)}, {NAME("prof_time_resolution"), CTL(opt_prof_time_res)}, {NAME("zero_realloc"), CTL(opt_zero_realloc)} @@ -2057,6 +2059,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max, opt_prof_recent_alloc_max, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_time_res, diff --git a/src/jemalloc.c b/src/jemalloc.c index b0a3b76b..02714158 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1552,6 +1552,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS], CONF_HANDLE_BOOL(opt_prof_log, "prof_log") CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max, "prof_recent_alloc_max", -1, SSIZE_MAX) + CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats") CONF_HANDLE_BOOL(opt_prof_sys_thread_name, "prof_sys_thread_name") if (CONF_MATCH("prof_time_resolution")) { diff --git a/src/prof.c b/src/prof.c index 258b5f2d..0f1f7a71 100644 --- a/src/prof.c +++ b/src/prof.c @@ -8,6 +8,7 @@ #include "jemalloc/internal/prof_data.h" #include "jemalloc/internal/prof_log.h" #include "jemalloc/internal/prof_recent.h" +#include "jemalloc/internal/prof_stats.h" #include "jemalloc/internal/prof_sys.h" #include "jemalloc/internal/thread_event.h" @@ -131,6 +132,10 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size, assert(tctx == edata_prof_tctx_get(edata)); prof_recent_alloc(tsd, edata, size, usize); } + + if (opt_prof_stats) { + prof_stats_inc(tsd, szind, size); + } } void @@ -160,6 +165,10 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) { prof_try_log(tsd, usize, prof_info); prof_tctx_try_destroy(tsd, tctx); + + if (opt_prof_stats) { + prof_stats_dec(tsd, szind, prof_info->alloc_size); + } } prof_tctx_t * @@ -587,7 +596,13 @@ prof_boot2(tsd_t *tsd, base_t *base) { next_thr_uid = 0; if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid", - WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) { + WITNESS_RANK_PROF_NEXT_THR_UID, + malloc_mutex_rank_exclusive)) { + return true; + } + + if (malloc_mutex_init(&prof_stats_mtx, "prof_stats", + WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) { return true; } @@ -595,8 +610,9 @@ prof_boot2(tsd_t *tsd, base_t *base) { return true; } - if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename", - WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) { + if (malloc_mutex_init(&prof_dump_filename_mtx, + "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME, + malloc_mutex_rank_exclusive)) { return true; } if (malloc_mutex_init(&prof_dump_mtx, "prof_dump", @@ -681,9 +697,10 @@ prof_prefork1(tsdn_t *tsdn) { malloc_mutex_prefork(tsdn, &prof_active_mtx); malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx); malloc_mutex_prefork(tsdn, &prof_gdump_mtx); + malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx); + malloc_mutex_prefork(tsdn, &prof_stats_mtx); malloc_mutex_prefork(tsdn, &next_thr_uid_mtx); malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx); - malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx); } } @@ -692,10 +709,11 @@ prof_postfork_parent(tsdn_t *tsdn) { if (config_prof && opt_prof) { unsigned i; - malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx); malloc_mutex_postfork_parent(tsdn, &prof_thread_active_init_mtx); malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx); + malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx); malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx); malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx); malloc_mutex_postfork_parent(tsdn, &prof_active_mtx); @@ -719,9 +737,10 @@ prof_postfork_child(tsdn_t *tsdn) { if (config_prof && opt_prof) { unsigned i; - malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx); malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx); malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx); + malloc_mutex_postfork_child(tsdn, &prof_stats_mtx); + malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx); malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx); malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx); malloc_mutex_postfork_child(tsdn, &prof_active_mtx); diff --git a/src/prof_stats.c b/src/prof_stats.c new file mode 100644 index 00000000..5d1a506b --- /dev/null +++ b/src/prof_stats.c @@ -0,0 +1,57 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/prof_stats.h" + +bool opt_prof_stats = false; +malloc_mutex_t prof_stats_mtx; +static prof_stats_t prof_stats_live[PROF_SC_NSIZES]; +static prof_stats_t prof_stats_accum[PROF_SC_NSIZES]; + +static void +prof_stats_enter(tsd_t *tsd, szind_t ind) { + assert(opt_prof && opt_prof_stats); + assert(ind < SC_NSIZES); + malloc_mutex_lock(tsd_tsdn(tsd), &prof_stats_mtx); +} + +static void +prof_stats_leave(tsd_t *tsd) { + malloc_mutex_unlock(tsd_tsdn(tsd), &prof_stats_mtx); +} + +void +prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size) { + cassert(config_prof); + prof_stats_enter(tsd, ind); + prof_stats_live[ind].req_sum += size; + prof_stats_live[ind].count++; + prof_stats_accum[ind].req_sum += size; + prof_stats_accum[ind].count++; + prof_stats_leave(tsd); +} + +void +prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size) { + cassert(config_prof); + prof_stats_enter(tsd, ind); + prof_stats_live[ind].req_sum -= size; + prof_stats_live[ind].count--; + prof_stats_leave(tsd); +} + +void +prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats) { + cassert(config_prof); + prof_stats_enter(tsd, ind); + memcpy(stats, &prof_stats_live[ind], sizeof(prof_stats_t)); + prof_stats_leave(tsd); +} + +void +prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats) { + cassert(config_prof); + prof_stats_enter(tsd, ind); + memcpy(stats, &prof_stats_accum[ind], sizeof(prof_stats_t)); + prof_stats_leave(tsd); +} diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 3d5b2788..85dcb4e2 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -317,6 +317,7 @@ TEST_BEGIN(test_mallctl_opt) { TEST_MALLCTL_OPT(bool, prof_final, prof); TEST_MALLCTL_OPT(bool, prof_leak, prof); TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof); + TEST_MALLCTL_OPT(bool, prof_stats, prof); TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof); #undef TEST_MALLCTL_OPT diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c new file mode 100644 index 00000000..555b69e3 --- /dev/null +++ b/test/unit/prof_stats.c @@ -0,0 +1,80 @@ +#include "test/jemalloc_test.h" + +#include "jemalloc/internal/prof_stats.h" + +static void +test_wrapper(szind_t ind) { +#define N_PTRS 3 + assert(opt_prof && opt_prof_stats); + + tsd_t *tsd = tsd_fetch(); + + prof_stats_t live_stats_orig; + prof_stats_get_live(tsd, ind, &live_stats_orig); + prof_stats_t accum_stats_orig; + prof_stats_get_accum(tsd, ind, &accum_stats_orig); + + void *ptrs[N_PTRS]; + + uint64_t live_req_sum = 0; + uint64_t live_count = 0; + uint64_t accum_req_sum = 0; + uint64_t accum_count = 0; + + for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS; + ++i, ++sz) { + void *p = malloc(sz); + assert_ptr_not_null(p, "malloc() failed"); + ptrs[i] = p; + live_req_sum += sz; + live_count++; + accum_req_sum += sz; + accum_count++; + prof_stats_t live_stats; + prof_stats_get_live(tsd, ind, &live_stats); + expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum, + live_req_sum, ""); + expect_u64_eq(live_stats.count - live_stats_orig.count, + live_count, ""); + prof_stats_t accum_stats; + prof_stats_get_accum(tsd, ind, &accum_stats); + expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum, + accum_req_sum, ""); + expect_u64_eq(accum_stats.count - accum_stats_orig.count, + accum_count, ""); + } + + for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS; + ++i, ++sz) { + free(ptrs[i]); + live_req_sum -= sz; + live_count--; + prof_stats_t live_stats; + prof_stats_get_live(tsd, ind, &live_stats); + expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum, + live_req_sum, ""); + expect_u64_eq(live_stats.count - live_stats_orig.count, + live_count, ""); + prof_stats_t accum_stats; + prof_stats_get_accum(tsd, ind, &accum_stats); + expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum, + accum_req_sum, ""); + expect_u64_eq(accum_stats.count - accum_stats_orig.count, + accum_count, ""); + } +#undef N_PTRS +} + +TEST_BEGIN(test_prof_stats) { + test_skip_if(!config_prof); + test_wrapper(0); + test_wrapper(1); + test_wrapper(2); +} +TEST_END + +int +main(void) { + return test( + test_prof_stats); +} diff --git a/test/unit/prof_stats.sh b/test/unit/prof_stats.sh new file mode 100644 index 00000000..b01dfd45 --- /dev/null +++ b/test/unit/prof_stats.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +if [ "x${enable_prof}" = "x1" ] ; then + export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_stats:true" +fi