diff --git a/src/jemalloc.c b/src/jemalloc.c index 2a791e17..575a63cf 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4088,32 +4088,15 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) { if (aligned_usize_get(size, alignment, &usize, NULL, false)) { goto label_done; } - szind_t ind = sz_size2index(usize); - if (unlikely(ind >= SC_NBINS)) { - /* No optimization for large sizes. */ - void *p; - while (filled < num && (p = je_mallocx(size, flags)) != NULL) { - ptrs[filled++] = p; - } - goto label_done; - } - bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true); - unsigned arena_ind = mallocx_arena_get(flags); - arena_t *arena; - if (arena_get_from_ind(tsd, arena_ind, &arena)) { - goto label_done; - } - if (arena == NULL) { - arena = arena_choose(tsd, NULL); - } else { - /* When a manual arena is specified, bypass the tcache. */ - flags |= MALLOCX_TCACHE_NONE; - } - if (unlikely(arena == NULL)) { - goto label_done; + cache_bin_t *bin = NULL; + arena_t *arena = NULL; + size_t nregs = 0; + if (likely(ind < SC_NBINS)) { + nregs = bin_infos[ind].nregs; + assert(nregs > 0); } while (filled < num) { @@ -4132,9 +4115,63 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) { batch_alloc_prof_sample_assert(tsd, batch, usize); } - size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena, - ind, ptrs + filled, batch, zero); - filled += n; + size_t progress = 0; + + if (likely(ind < SC_NBINS) && batch >= nregs) { + if (arena == NULL) { + unsigned arena_ind = mallocx_arena_get(flags); + if (arena_get_from_ind(tsd, arena_ind, + &arena)) { + goto label_done; + } + if (arena == NULL) { + arena = arena_choose(tsd, NULL); + } + if (unlikely(arena == NULL)) { + goto label_done; + } + } + size_t arena_batch = batch - batch % nregs; + size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena, + ind, ptrs + filled, arena_batch, zero); + progress += n; + filled += n; + } + + if (likely(ind < nhbins) && progress < batch) { + if (bin == NULL) { + unsigned tcache_ind = mallocx_tcache_get(flags); + tcache_t *tcache = tcache_get_from_ind(tsd, + tcache_ind, /* slow */ true, + /* is_alloc */ true); + if (tcache != NULL) { + bin = &tcache->bins[ind]; + } + } + if (bin != NULL) { + size_t bin_batch = batch - progress; + size_t n = cache_bin_alloc_batch(bin, bin_batch, + ptrs + filled); + if (config_stats) { + bin->tstats.nrequests += n; + } + if (zero) { + for (size_t i = 0; i < n; ++i) { + memset(ptrs[filled + i], 0, + usize); + } + } + if (config_prof && opt_prof + && unlikely(ind >= SC_NBINS)) { + for (size_t i = 0; i < n; ++i) { + prof_tctx_reset_sampled(tsd, + ptrs[filled + i]); + } + } + progress += n; + filled += n; + } + } /* * For thread events other than prof sampling, trigger them as @@ -4146,23 +4183,16 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) { * were handled individually, but it would do no harm (or * even be beneficial) to coalesce the triggerings. */ - thread_alloc_event(tsd, n * usize); + thread_alloc_event(tsd, progress * usize); - if (n < batch) { /* OOM */ - break; - } - - if (prof_sample_event) { - /* - * The next allocation will be prof sampled. The - * thread event logic is handled within the mallocx() - * call. - */ + if (progress < batch || prof_sample_event) { void *p = je_mallocx(size, flags); if (p == NULL) { /* OOM */ break; } - assert(prof_sampled(tsd, p)); + if (progress == batch) { + assert(prof_sampled(tsd, p)); + } ptrs[filled++] = p; } } diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c index cb46513d..992990f3 100644 --- a/test/unit/batch_alloc.c +++ b/test/unit/batch_alloc.c @@ -5,35 +5,6 @@ static void *ptrs[BATCH_MAX]; #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0) -static void -verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch, - unsigned nregs) { - if (!config_stats) { - return; - } - if (config_prof && opt_prof) { - /* - * Checking the stats when prof is on is feasible but - * complicated, while checking the non-prof case suffices for - * unit-test purpose. - */ - return; - } - expect_u64_eq(before->nmalloc + batch, after->nmalloc, ""); - expect_u64_eq(before->nrequests + batch, after->nrequests, ""); - expect_zu_eq(before->curregs + batch, after->curregs, ""); - size_t nslab = batch / nregs; - size_t n_nonfull = 0; - if (batch % nregs != 0) { - ++nslab; - ++n_nonfull; - } - expect_u64_eq(before->nslabs + nslab, after->nslabs, ""); - expect_zu_eq(before->curslabs + nslab, after->curslabs, ""); - expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs, - ""); -} - static void verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero) { @@ -51,10 +22,21 @@ verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, static void verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, arena_t *arena, unsigned nregs) { + if (config_prof && opt_prof) { + /* + * Checking batch locality when prof is on is feasible but + * complicated, while checking the non-prof case suffices for + * unit-test purpose. + */ + return; + } for (size_t i = 0, j = 0; i < batch; ++i, ++j) { if (j == nregs) { j = 0; } + if (j == 0 && batch - i < nregs) { + break; + } void *p = ptrs[i]; expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, ""); if (j == 0) { @@ -63,21 +45,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, } assert(i > 0); void *q = ptrs[i - 1]; - bool adjacent = (uintptr_t)p > (uintptr_t)q - && (size_t)((uintptr_t)p - (uintptr_t)q) == usize; - if (config_prof && opt_prof) { - if (adjacent) { - expect_false(prof_sampled(tsd, p) - || prof_sampled(tsd, q), ""); - } else { - expect_true(prof_sampled(tsd, p) - || prof_sampled(tsd, q), ""); - expect_true(PAGE_ALIGNED(p), ""); - j = 0; - } - } else { - expect_true(adjacent, ""); - } + expect_true((uintptr_t)p > (uintptr_t)q + && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, ""); } } @@ -124,8 +93,6 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) { arena = arena_choose(tsd, NULL); } assert(arena != NULL); - bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL); - assert(bin != NULL); int flags = arena_flag; if (alignment != 0) { flags |= MALLOCX_ALIGN(alignment); @@ -155,13 +122,9 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) { } size_t batch = base + (size_t)j; assert(batch < BATCH_MAX); - bin_stats_t stats_before, stats_after; - memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t)); size_t filled = batch_alloc_wrapper(ptrs, batch, size, flags); assert_zu_eq(filled, batch, ""); - memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t)); - verify_stats(&stats_before, &stats_after, batch, nregs); verify_batch_basic(tsd, ptrs, batch, usize, zero); verify_batch_locality(tsd, ptrs, batch, usize, arena, nregs); @@ -196,8 +159,15 @@ TEST_BEGIN(test_batch_alloc_manual_arena) { } TEST_END -TEST_BEGIN(test_batch_alloc_fallback) { - const size_t size = SC_LARGE_MINCLASS; +TEST_BEGIN(test_batch_alloc_large) { + size_t size = SC_LARGE_MINCLASS; + for (size_t batch = 0; batch < 4; ++batch) { + assert(batch < BATCH_MAX); + size_t filled = batch_alloc(ptrs, batch, size, 0); + assert_zu_eq(filled, batch, ""); + release_batch(ptrs, batch, size); + } + size = tcache_maxclass + 1; for (size_t batch = 0; batch < 4; ++batch) { assert(batch < BATCH_MAX); size_t filled = batch_alloc(ptrs, batch, size, 0); @@ -214,5 +184,5 @@ main(void) { test_batch_alloc_zero, test_batch_alloc_aligned, test_batch_alloc_manual_arena, - test_batch_alloc_fallback); + test_batch_alloc_large); }