Add batch allocation API
This commit is contained in:
parent
c6f59e9bb4
commit
978f830ee3
@ -191,6 +191,7 @@ TESTS_UNIT := \
|
||||
$(srcroot)test/unit/background_thread.c \
|
||||
$(srcroot)test/unit/background_thread_enable.c \
|
||||
$(srcroot)test/unit/base.c \
|
||||
$(srcroot)test/unit/batch_alloc.c \
|
||||
$(srcroot)test/unit/binshard.c \
|
||||
$(srcroot)test/unit/bitmap.c \
|
||||
$(srcroot)test/unit/bit_util.c \
|
||||
@ -264,7 +265,8 @@ TESTS_UNIT := \
|
||||
$(srcroot)test/unit/zero_reallocs.c
|
||||
ifeq (@enable_prof@, 1)
|
||||
TESTS_UNIT += \
|
||||
$(srcroot)test/unit/arena_reset_prof.c
|
||||
$(srcroot)test/unit/arena_reset_prof.c \
|
||||
$(srcroot)test/unit/batch_alloc_prof.c
|
||||
endif
|
||||
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
|
||||
$(srcroot)test/integration/allocated.c \
|
||||
|
@ -54,6 +54,7 @@ void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
|
||||
void iarena_cleanup(tsd_t *tsd);
|
||||
void arena_cleanup(tsd_t *tsd);
|
||||
void arenas_tdata_cleanup(tsd_t *tsd);
|
||||
size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
|
||||
void jemalloc_prefork(void);
|
||||
void jemalloc_postfork_parent(void);
|
||||
void jemalloc_postfork_child(void);
|
||||
|
@ -229,6 +229,17 @@ prof_sample_aligned(const void *ptr) {
|
||||
return ((uintptr_t)ptr & PAGE_MASK) == 0;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
prof_sampled(tsd_t *tsd, const void *ptr) {
|
||||
prof_info_t prof_info;
|
||||
prof_info_get(tsd, ptr, NULL, &prof_info);
|
||||
bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
|
||||
if (sampled) {
|
||||
assert(prof_sample_aligned(ptr));
|
||||
}
|
||||
return sampled;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
prof_free(tsd_t *tsd, const void *ptr, size_t usize,
|
||||
emap_alloc_ctx_t *alloc_ctx) {
|
||||
|
@ -274,7 +274,6 @@ te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
|
||||
assert(usize == sz_s2u(usize));
|
||||
return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
|
||||
}
|
||||
|
||||
|
117
src/jemalloc.c
117
src/jemalloc.c
@ -3916,6 +3916,123 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
|
||||
assert(config_prof && opt_prof);
|
||||
bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
|
||||
batch * usize);
|
||||
assert(!prof_sample_event);
|
||||
size_t surplus;
|
||||
prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
|
||||
(batch + 1) * usize, &surplus);
|
||||
assert(prof_sample_event);
|
||||
assert(surplus < usize);
|
||||
}
|
||||
|
||||
size_t
|
||||
batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
|
||||
LOG("core.batch_alloc.entry",
|
||||
"ptrs: %p, num: %zu, size: %zu, flags: %d", ptrs, num, size, flags);
|
||||
|
||||
tsd_t *tsd = tsd_fetch();
|
||||
check_entry_exit_locking(tsd_tsdn(tsd));
|
||||
|
||||
size_t filled = 0;
|
||||
|
||||
if (unlikely(tsd == NULL || tsd_reentrancy_level_get(tsd) > 0)) {
|
||||
goto label_done;
|
||||
}
|
||||
|
||||
size_t alignment = MALLOCX_ALIGN_GET(flags);
|
||||
size_t usize;
|
||||
if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
|
||||
goto label_done;
|
||||
}
|
||||
|
||||
szind_t ind = sz_size2index(usize);
|
||||
if (unlikely(ind >= SC_NBINS)) {
|
||||
/* No optimization for large sizes. */
|
||||
void *p;
|
||||
while (filled < num && (p = je_mallocx(size, flags)) != NULL) {
|
||||
ptrs[filled++] = p;
|
||||
}
|
||||
goto label_done;
|
||||
}
|
||||
|
||||
bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
|
||||
|
||||
unsigned arena_ind = mallocx_arena_get(flags);
|
||||
arena_t *arena;
|
||||
if (arena_get_from_ind(tsd, arena_ind, &arena)) {
|
||||
goto label_done;
|
||||
}
|
||||
if (arena == NULL) {
|
||||
arena = arena_choose(tsd, NULL);
|
||||
} else {
|
||||
/* When a manual arena is specified, bypass the tcache. */
|
||||
flags |= MALLOCX_TCACHE_NONE;
|
||||
}
|
||||
if (unlikely(arena == NULL)) {
|
||||
goto label_done;
|
||||
}
|
||||
|
||||
while (filled < num) {
|
||||
size_t batch = num - filled;
|
||||
size_t surplus = SIZE_MAX; /* Dead store. */
|
||||
bool prof_sample_event = config_prof && opt_prof
|
||||
&& te_prof_sample_event_lookahead_surplus(tsd,
|
||||
batch * usize, &surplus);
|
||||
|
||||
if (prof_sample_event) {
|
||||
/*
|
||||
* Adjust so that the batch does not trigger prof
|
||||
* sampling.
|
||||
*/
|
||||
batch -= surplus / usize + 1;
|
||||
batch_alloc_prof_sample_assert(tsd, batch, usize);
|
||||
}
|
||||
|
||||
size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
|
||||
ind, ptrs + filled, batch, zero);
|
||||
filled += n;
|
||||
|
||||
/*
|
||||
* For thread events other than prof sampling, trigger them as
|
||||
* if there's a single allocation of size (n * usize). This is
|
||||
* fine because:
|
||||
* (a) these events do not alter the allocation itself, and
|
||||
* (b) it's possible that some event would have been triggered
|
||||
* multiple times, instead of only once, if the allocations
|
||||
* were handled individually, but it would do no harm (or
|
||||
* even be beneficial) to coalesce the triggerings.
|
||||
*/
|
||||
thread_alloc_event(tsd, n * usize);
|
||||
|
||||
if (n < batch) { /* OOM */
|
||||
break;
|
||||
}
|
||||
|
||||
if (prof_sample_event) {
|
||||
/*
|
||||
* The next allocation will be prof sampled. The
|
||||
* thread event logic is handled within the mallocx()
|
||||
* call.
|
||||
*/
|
||||
void *p = je_mallocx(size, flags);
|
||||
if (p == NULL) { /* OOM */
|
||||
break;
|
||||
}
|
||||
assert(prof_sampled(tsd, p));
|
||||
ptrs[filled++] = p;
|
||||
}
|
||||
}
|
||||
|
||||
label_done:
|
||||
check_entry_exit_locking(tsd_tsdn(tsd));
|
||||
LOG("core.batch_alloc.exit", "result: %zu", filled);
|
||||
return filled;
|
||||
}
|
||||
|
||||
/*
|
||||
* End non-standard functions.
|
||||
*/
|
||||
|
190
test/unit/batch_alloc.c
Normal file
190
test/unit/batch_alloc.c
Normal file
@ -0,0 +1,190 @@
|
||||
#include "test/jemalloc_test.h"
|
||||
|
||||
#define BATCH_MAX ((1U << 16) + 1024)
|
||||
static void *ptrs[BATCH_MAX];
|
||||
|
||||
#define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
|
||||
|
||||
static void
|
||||
verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
|
||||
unsigned nregs) {
|
||||
if (!config_stats) {
|
||||
return;
|
||||
}
|
||||
if (config_prof && opt_prof) {
|
||||
/*
|
||||
* Checking the stats when prof is on is feasible but
|
||||
* complicated, while checking the non-prof case suffices for
|
||||
* unit-test purpose.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
expect_u64_eq(before->nmalloc + batch, after->nmalloc, "");
|
||||
expect_u64_eq(before->nrequests + batch, after->nrequests, "");
|
||||
expect_zu_eq(before->curregs + batch, after->curregs, "");
|
||||
size_t nslab = batch / nregs;
|
||||
size_t n_nonfull = 0;
|
||||
if (batch % nregs != 0) {
|
||||
++nslab;
|
||||
++n_nonfull;
|
||||
}
|
||||
expect_u64_eq(before->nslabs + nslab, after->nslabs, "");
|
||||
expect_zu_eq(before->curslabs + nslab, after->curslabs, "");
|
||||
expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs,
|
||||
"");
|
||||
}
|
||||
|
||||
static void
|
||||
verify_batch(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero,
|
||||
arena_t *arena, unsigned nregs) {
|
||||
for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
|
||||
if (j == nregs) {
|
||||
j = 0;
|
||||
}
|
||||
void *p = ptrs[i];
|
||||
expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
|
||||
expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
|
||||
if (zero) {
|
||||
for (size_t k = 0; k < usize; ++k) {
|
||||
expect_true(*((unsigned char *)p + k) == 0, "");
|
||||
}
|
||||
}
|
||||
if (j == 0) {
|
||||
expect_true(PAGE_ALIGNED(p), "");
|
||||
continue;
|
||||
}
|
||||
assert(i > 0);
|
||||
void *q = ptrs[i - 1];
|
||||
bool adjacent = (uintptr_t)p > (uintptr_t)q
|
||||
&& (size_t)((uintptr_t)p - (uintptr_t)q) == usize;
|
||||
if (config_prof && opt_prof) {
|
||||
if (adjacent) {
|
||||
expect_false(prof_sampled(tsd, p)
|
||||
|| prof_sampled(tsd, q), "");
|
||||
} else {
|
||||
expect_true(prof_sampled(tsd, p)
|
||||
|| prof_sampled(tsd, q), "");
|
||||
expect_true(PAGE_ALIGNED(p), "");
|
||||
j = 0;
|
||||
}
|
||||
} else {
|
||||
expect_true(adjacent, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
release_batch(void **ptrs, size_t batch, size_t size) {
|
||||
for (size_t i = 0; i < batch; ++i) {
|
||||
sdallocx(ptrs[i], size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
|
||||
tsd_t *tsd = tsd_fetch();
|
||||
assert(tsd != NULL);
|
||||
const size_t usize =
|
||||
(alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
|
||||
const szind_t ind = sz_size2index(usize);
|
||||
const bin_info_t *bin_info = &bin_infos[ind];
|
||||
const unsigned nregs = bin_info->nregs;
|
||||
assert(nregs > 0);
|
||||
arena_t *arena;
|
||||
if (arena_flag != 0) {
|
||||
arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
|
||||
false);
|
||||
} else {
|
||||
arena = arena_choose(tsd, NULL);
|
||||
}
|
||||
assert(arena != NULL);
|
||||
bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL);
|
||||
assert(bin != NULL);
|
||||
int flags = arena_flag;
|
||||
if (alignment != 0) {
|
||||
flags |= MALLOCX_ALIGN(alignment);
|
||||
}
|
||||
if (zero) {
|
||||
flags |= MALLOCX_ZERO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate for the purpose of bootstrapping arena_tdata, so that the
|
||||
* change in bin stats won't contaminate the stats to be verified below.
|
||||
*/
|
||||
void *p = mallocx(size, flags | MALLOCX_TCACHE_NONE);
|
||||
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
size_t base = 0;
|
||||
if (i == 1) {
|
||||
base = nregs;
|
||||
} else if (i == 2) {
|
||||
base = nregs * 2;
|
||||
} else if (i == 3) {
|
||||
base = (1 << 16);
|
||||
}
|
||||
for (int j = -1; j <= 1; ++j) {
|
||||
if (base == 0 && j == -1) {
|
||||
continue;
|
||||
}
|
||||
size_t batch = base + (size_t)j;
|
||||
assert(batch < BATCH_MAX);
|
||||
bin_stats_t stats_before, stats_after;
|
||||
memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
|
||||
size_t filled = batch_alloc(ptrs, batch, size, flags);
|
||||
assert_zu_eq(filled, batch, "");
|
||||
memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
|
||||
verify_stats(&stats_before, &stats_after, batch, nregs);
|
||||
verify_batch(tsd, ptrs, batch, usize, zero, arena,
|
||||
nregs);
|
||||
release_batch(ptrs, batch, usize);
|
||||
}
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
TEST_BEGIN(test_batch_alloc) {
|
||||
test_wrapper(11, 0, false, 0);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_batch_alloc_zero) {
|
||||
test_wrapper(11, 0, true, 0);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_batch_alloc_aligned) {
|
||||
test_wrapper(7, 16, false, 0);
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_batch_alloc_manual_arena) {
|
||||
unsigned arena_ind;
|
||||
size_t len_unsigned = sizeof(unsigned);
|
||||
assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
|
||||
0), 0, "");
|
||||
test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
|
||||
}
|
||||
TEST_END
|
||||
|
||||
TEST_BEGIN(test_batch_alloc_fallback) {
|
||||
const size_t size = SC_LARGE_MINCLASS;
|
||||
for (size_t batch = 0; batch < 4; ++batch) {
|
||||
assert(batch < BATCH_MAX);
|
||||
size_t filled = batch_alloc(ptrs, batch, size, 0);
|
||||
assert_zu_eq(filled, batch, "");
|
||||
release_batch(ptrs, batch, size);
|
||||
}
|
||||
}
|
||||
TEST_END
|
||||
|
||||
int
|
||||
main(void) {
|
||||
return test(
|
||||
test_batch_alloc,
|
||||
test_batch_alloc_zero,
|
||||
test_batch_alloc_aligned,
|
||||
test_batch_alloc_manual_arena,
|
||||
test_batch_alloc_fallback);
|
||||
}
|
3
test/unit/batch_alloc.sh
Normal file
3
test/unit/batch_alloc.sh
Normal file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
export MALLOC_CONF="tcache_gc_incr_bytes:2147483648"
|
1
test/unit/batch_alloc_prof.c
Normal file
1
test/unit/batch_alloc_prof.c
Normal file
@ -0,0 +1 @@
|
||||
#include "batch_alloc.c"
|
3
test/unit/batch_alloc_prof.sh
Normal file
3
test/unit/batch_alloc_prof.sh
Normal file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
export MALLOC_CONF="prof:true,lg_prof_sample:14"
|
Loading…
Reference in New Issue
Block a user