Add a tool to examine random number distributions

This commit is contained in:
Yinan Zhang 2020-05-04 14:58:25 -07:00
parent d460333efb
commit 537a4bedb4
4 changed files with 311 additions and 7 deletions

5
.gitignore vendored
View File

@ -71,6 +71,11 @@ test/include/test/jemalloc_test_defs.h
/test/unit/*.[od]
/test/unit/*.out
/test/analyze/[A-Za-z]*
!/test/analyze/[A-Za-z]*.*
/test/analyze/*.[od]
/test/analyze/*.out
/VERSION
*.pdb

View File

@ -287,6 +287,7 @@ else
CPP_SRCS :=
TESTS_INTEGRATION_CPP :=
endif
TESTS_ANALYZE := $(srcroot)test/analyze/rand.c
TESTS_STRESS := $(srcroot)test/stress/microbench.c \
$(srcroot)test/stress/fill_flush.c \
$(srcroot)test/stress/large_microbench.c \
@ -294,7 +295,8 @@ TESTS_STRESS := $(srcroot)test/stress/microbench.c \
$(srcroot)test/stress/sizes.c
TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
$(TESTS_ANALYZE) $(TESTS_STRESS)
PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@ -310,14 +312,19 @@ C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
C_TESTLIB_ANALYZE_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.analyze.$(O))
C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) \
$(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_ANALYZE_OBJS) \
$(C_TESTLIB_STRESS_OBJS)
TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
$(TESTS_STRESS_OBJS)
TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
.PHONY: all dist build_doc_html build_doc_man build_doc
@ -391,12 +398,15 @@ $(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
$(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c
$(C_TESTLIB_ANALYZE_OBJS): $(objroot)test/src/%.analyze.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
$(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
$(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
$(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
$(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
@ -416,7 +426,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
endif
$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
$(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
@mkdir -p $(@D)
@ -479,6 +489,10 @@ $(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(
@mkdir -p $(@D)
$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
$(objroot)test/analyze/%$(EXE): $(objroot)test/analyze/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_ANALYZE_OBJS)
@mkdir -p $(@D)
$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
$(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
@ -559,13 +573,16 @@ endif
tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
tests: tests_unit tests_integration tests_stress
tests: tests_unit tests_integration tests_analyze tests_stress
check_unit_dir:
@mkdir -p $(objroot)test/unit
check_integration_dir:
@mkdir -p $(objroot)test/integration
analyze_dir:
@mkdir -p $(objroot)test/analyze
stress_dir:
@mkdir -p $(objroot)test/stress
check_dir: check_unit_dir check_integration_dir
@ -582,6 +599,12 @@ check_integration_decay: tests_integration check_integration_dir
$(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
check_integration: tests_integration check_integration_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
analyze: tests_analyze analyze_dir
ifeq ($(enable_prof), 1)
$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
else
$(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
endif
stress: tests_stress stress_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
check: check_unit check_integration check_integration_decay check_integration_prof

276
test/analyze/rand.c Normal file
View File

@ -0,0 +1,276 @@
#include "test/jemalloc_test.h"
/******************************************************************************/
/*
* General purpose tool for examining random number distributions.
*
* Input -
* (a) a random number generator, and
* (b) the buckets:
* (1) number of buckets,
* (2) width of each bucket, in log scale,
* (3) expected mean and stddev of the count of random numbers in each
* bucket, and
* (c) number of iterations to invoke the generator.
*
* The program generates the specified amount of random numbers, and assess how
* well they conform to the expectations: for each bucket, output -
* (a) the (given) expected mean and stddev,
* (b) the actual count and any interesting level of deviation:
* (1) ~68% buckets should show no interesting deviation, meaning a
* deviation less than stddev from the expectation;
* (2) ~27% buckets should show '+' / '-', meaning a deviation in the range
* of [stddev, 2 * stddev) from the expectation;
* (3) ~4% buckets should show '++' / '--', meaning a deviation in the
* range of [2 * stddev, 3 * stddev) from the expectation; and
* (4) less than 0.3% buckets should show more than two '+'s / '-'s.
*
* Technical remarks:
* (a) The generator is expected to output uint64_t numbers, so you might need
* to define a wrapper.
* (b) The buckets must be of equal width and the lowest bucket starts at
* [0, 2^lg_bucket_width - 1).
* (c) Any generated number >= n_bucket * 2^lg_bucket_width will be counted
* towards the last bucket; the expected mean and stddev provided should
* also reflect that.
* (d) The number of iterations is adviced to be determined so that the bucket
* with the minimal expected proportion gets a sufficient count.
*/
static void
fill(size_t a[], const size_t n, const size_t k) {
for (size_t i = 0; i < n; ++i) {
a[i] = k;
}
}
static void
collect_buckets(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
const size_t n_bucket, const size_t lg_bucket_width, const size_t n_iter) {
for (size_t i = 0; i < n_iter; ++i) {
uint64_t num = gen(opaque);
uint64_t bucket_id = num >> lg_bucket_width;
if (bucket_id >= n_bucket) {
bucket_id = n_bucket - 1;
}
++buckets[bucket_id];
}
}
static void
print_buckets(const size_t buckets[], const size_t means[],
const size_t stddevs[], const size_t n_bucket) {
for (size_t i = 0; i < n_bucket; ++i) {
malloc_printf("%zu:\tmean = %zu,\tstddev = %zu,\tbucket = %zu",
i, means[i], stddevs[i], buckets[i]);
/* Make sure there's no overflow. */
assert(buckets[i] + stddevs[i] >= stddevs[i]);
assert(means[i] + stddevs[i] >= stddevs[i]);
if (buckets[i] + stddevs[i] <= means[i]) {
malloc_write(" ");
for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
t -= stddevs[i]) {
malloc_write("-");
}
} else if (buckets[i] >= means[i] + stddevs[i]) {
malloc_write(" ");
for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
t -= stddevs[i]) {
malloc_write("+");
}
}
malloc_write("\n");
}
}
static void
bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
const size_t means[], const size_t stddevs[], const size_t n_bucket,
const size_t lg_bucket_width, const size_t n_iter) {
for (size_t i = 1; i <= 3; ++i) {
malloc_printf("round %zu\n", i);
fill(buckets, n_bucket, 0);
collect_buckets(gen, opaque, buckets, n_bucket,
lg_bucket_width, n_iter);
print_buckets(buckets, means, stddevs, n_bucket);
}
}
/* (Recommended) minimal bucket mean. */
#define MIN_BUCKET_MEAN 10000
/******************************************************************************/
/* Uniform random number generator. */
typedef struct uniform_gen_arg_s uniform_gen_arg_t;
struct uniform_gen_arg_s {
uint64_t state;
const unsigned lg_range;
};
static uint64_t
uniform_gen(void *opaque) {
uniform_gen_arg_t *arg = (uniform_gen_arg_t *)opaque;
return prng_lg_range_u64(&arg->state, arg->lg_range);
}
TEST_BEGIN(test_uniform) {
#define LG_N_BUCKET 5
#define N_BUCKET (1 << LG_N_BUCKET)
#define QUOTIENT_CEIL(n, d) (((n) - 1) / (d) + 1)
const unsigned lg_range_test = 25;
/*
* Mathematical tricks to guarantee that both mean and stddev are
* integers, and that the minimal bucket mean is at least
* MIN_BUCKET_MEAN.
*/
const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
const size_t stddev = (N_BUCKET - 1) * q;
const size_t mean = N_BUCKET * stddev * q;
const size_t n_iter = N_BUCKET * mean;
size_t means[N_BUCKET];
fill(means, N_BUCKET, mean);
size_t stddevs[N_BUCKET];
fill(stddevs, N_BUCKET, stddev);
uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
lg_range_test};
size_t buckets[N_BUCKET];
assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
N_BUCKET, lg_bucket_width, n_iter);
#undef LG_N_BUCKET
#undef N_BUCKET
#undef QUOTIENT_CEIL
}
TEST_END
/******************************************************************************/
/* Geometric random number generator; compiled only when prof is on. */
#ifdef JEMALLOC_PROF
/*
* Fills geometric proportions and returns the minimal proportion. See
* comments in test_prof_sample for explanations for n_divide.
*/
static double
fill_geometric_proportions(double proportions[], const size_t n_bucket,
const size_t n_divide) {
assert(n_bucket > 0);
assert(n_divide > 0);
double x = 1.;
for (size_t i = 0; i < n_bucket; ++i) {
if (i == n_bucket - 1) {
proportions[i] = x;
} else {
double y = x * exp(-1. / n_divide);
proportions[i] = x - y;
x = y;
}
}
/*
* The minimal proportion is the smaller one of the last two
* proportions for geometric distribution.
*/
double min_proportion = proportions[n_bucket - 1];
if (n_bucket >= 2 && proportions[n_bucket - 2] < min_proportion) {
min_proportion = proportions[n_bucket - 2];
}
return min_proportion;
}
static size_t
round_to_nearest(const double x) {
return (size_t)(x + .5);
}
static void
fill_references(size_t means[], size_t stddevs[], const double proportions[],
const size_t n_bucket, const size_t n_iter) {
for (size_t i = 0; i < n_bucket; ++i) {
double x = n_iter * proportions[i];
means[i] = round_to_nearest(x);
stddevs[i] = round_to_nearest(sqrt(x * (1. - proportions[i])));
}
}
static uint64_t
prof_sample_gen(void *opaque) {
return prof_sample_new_event_wait((tsd_t *)opaque) - 1;
}
#endif /* JEMALLOC_PROF */
TEST_BEGIN(test_prof_sample) {
test_skip_if(!config_prof);
#ifdef JEMALLOC_PROF
/* Number of divisions within [0, mean). */
#define LG_N_DIVIDE 3
#define N_DIVIDE (1 << LG_N_DIVIDE)
/* Coverage of buckets in terms of multiples of mean. */
#define LG_N_MULTIPLY 2
#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
test_skip_if(!opt_prof);
size_t lg_prof_sample_test = 25;
size_t lg_prof_sample_orig = lg_prof_sample;
assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
sizeof(size_t)), 0, "");
malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
double proportions[N_GEO_BUCKET + 1];
const double min_proportion = fill_geometric_proportions(proportions,
N_GEO_BUCKET + 1, N_DIVIDE);
const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
min_proportion);
size_t means[N_GEO_BUCKET + 1];
size_t stddevs[N_GEO_BUCKET + 1];
fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
tsd_t *tsd = tsd_fetch();
assert_ptr_not_null(tsd, "");
size_t buckets[N_GEO_BUCKET + 1];
assert_zu_ge(lg_prof_sample, LG_N_DIVIDE, "");
const size_t lg_bucket_width = lg_prof_sample - LG_N_DIVIDE;
bucket_analysis(prof_sample_gen, tsd, buckets, means, stddevs,
N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
sizeof(size_t)), 0, "");
#undef LG_N_DIVIDE
#undef N_DIVIDE
#undef LG_N_MULTIPLY
#undef N_GEO_BUCKET
#endif /* JEMALLOC_PROF */
}
TEST_END
/******************************************************************************/
int
main(void) {
return test_no_reentrancy(
test_uniform,
test_prof_sample);
}

View File

@ -38,9 +38,9 @@ extern "C" {
/******************************************************************************/
/*
* For unit tests, expose all public and private interfaces.
* For unit tests and analytics tests, expose all public and private interfaces.
*/
#ifdef JEMALLOC_UNIT_TEST
#if defined(JEMALLOC_UNIT_TEST) || defined (JEMALLOC_ANALYZE_TEST)
# define JEMALLOC_JET
# define JEMALLOC_MANGLE
# include "jemalloc/internal/jemalloc_preamble.h"