Tcache flush: keep common path state in registers.
By carefully force-inlining the division constants and the operation sum count, we can eliminate redundant operations in the arena-level dalloc function. Do so.
This commit is contained in:
parent
31a629c3de
commit
229994a204
@ -2,6 +2,7 @@
|
|||||||
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
|
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
|
||||||
|
|
||||||
#include "jemalloc/internal/bin.h"
|
#include "jemalloc/internal/bin.h"
|
||||||
|
#include "jemalloc/internal/div.h"
|
||||||
#include "jemalloc/internal/extent_dss.h"
|
#include "jemalloc/internal/extent_dss.h"
|
||||||
#include "jemalloc/internal/hook.h"
|
#include "jemalloc/internal/hook.h"
|
||||||
#include "jemalloc/internal/pages.h"
|
#include "jemalloc/internal/pages.h"
|
||||||
@ -13,6 +14,8 @@ extern ssize_t opt_muzzy_decay_ms;
|
|||||||
extern percpu_arena_mode_t opt_percpu_arena;
|
extern percpu_arena_mode_t opt_percpu_arena;
|
||||||
extern const char *percpu_arena_mode_names[];
|
extern const char *percpu_arena_mode_names[];
|
||||||
|
|
||||||
|
extern div_info_t arena_binind_div_info[SC_NBINS];
|
||||||
|
|
||||||
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
|
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
|
||||||
extern malloc_mutex_t arenas_lock;
|
extern malloc_mutex_t arenas_lock;
|
||||||
extern emap_t arena_emap_global;
|
extern emap_t arena_emap_global;
|
||||||
@ -29,9 +32,6 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
|||||||
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
|
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
|
||||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
|
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
|
||||||
void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
|
void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
|
||||||
#ifdef JEMALLOC_JET
|
|
||||||
size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
|
|
||||||
#endif
|
|
||||||
edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
|
edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
|
||||||
size_t usize, size_t alignment, bool zero);
|
size_t usize, size_t alignment, bool zero);
|
||||||
void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
|
void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
|
||||||
@ -59,8 +59,11 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
|
|||||||
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
|
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
|
||||||
bool slow_path);
|
bool slow_path);
|
||||||
void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
|
void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
|
||||||
bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
szind_t binind, edata_t *edata, void *ptr);
|
void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
|
||||||
|
edata_t *slab, bin_t *bin);
|
||||||
|
void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
|
||||||
|
edata_t *slab, bin_t *bin);
|
||||||
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
|
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
|
||||||
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||||
size_t extra, bool zero, size_t *newsize);
|
size_t extra, bool zero, size_t *newsize);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
||||||
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
||||||
|
|
||||||
|
#include "jemalloc/internal/div.h"
|
||||||
#include "jemalloc/internal/emap.h"
|
#include "jemalloc/internal/emap.h"
|
||||||
#include "jemalloc/internal/jemalloc_internal_types.h"
|
#include "jemalloc/internal/jemalloc_internal_types.h"
|
||||||
#include "jemalloc/internal/mutex.h"
|
#include "jemalloc/internal/mutex.h"
|
||||||
@ -441,4 +442,96 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The dalloc bin info contains just the information that the common paths need
|
||||||
|
* during tcache flushes. By force-inlining these paths, and using local copies
|
||||||
|
* of data (so that the compiler knows it's constant), we avoid a whole bunch of
|
||||||
|
* redundant loads and stores by leaving this information in registers.
|
||||||
|
*/
|
||||||
|
typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
|
||||||
|
struct arena_dalloc_bin_locked_info_s {
|
||||||
|
div_info_t div_info;
|
||||||
|
uint32_t nregs;
|
||||||
|
uint64_t ndalloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE size_t
|
||||||
|
arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
|
||||||
|
edata_t *slab, const void *ptr) {
|
||||||
|
size_t diff, regind;
|
||||||
|
|
||||||
|
/* Freeing a pointer outside the slab can cause assertion failure. */
|
||||||
|
assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
|
||||||
|
assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
|
||||||
|
/* Freeing an interior pointer can cause assertion failure. */
|
||||||
|
assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
|
||||||
|
(uintptr_t)bin_infos[binind].reg_size == 0);
|
||||||
|
|
||||||
|
diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
|
||||||
|
|
||||||
|
/* Avoid doing division with a variable divisor. */
|
||||||
|
regind = div_compute(&info->div_info, diff);
|
||||||
|
|
||||||
|
assert(regind < bin_infos[binind].nregs);
|
||||||
|
|
||||||
|
return regind;
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
|
arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
|
||||||
|
szind_t binind) {
|
||||||
|
info->div_info = arena_binind_div_info[binind];
|
||||||
|
info->nregs = bin_infos[binind].nregs;
|
||||||
|
info->ndalloc = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Does the deallocation work associated with freeing a single pointer (a
|
||||||
|
* "step") in between a arena_dalloc_bin_locked begin and end call.
|
||||||
|
*
|
||||||
|
* Returns true if arena_slab_dalloc must be called on slab. Doesn't do
|
||||||
|
* stats updates, which happen during finish (this lets running counts get left
|
||||||
|
* in a register).
|
||||||
|
*/
|
||||||
|
JEMALLOC_ALWAYS_INLINE bool
|
||||||
|
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||||
|
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
|
||||||
|
void *ptr) {
|
||||||
|
const bin_info_t *bin_info = &bin_infos[binind];
|
||||||
|
size_t regind = arena_slab_regind(info, binind, slab, ptr);
|
||||||
|
slab_data_t *slab_data = edata_slab_data_get(slab);
|
||||||
|
|
||||||
|
assert(edata_nfree_get(slab) < bin_info->nregs);
|
||||||
|
/* Freeing an unallocated pointer can cause assertion failure. */
|
||||||
|
assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
|
||||||
|
|
||||||
|
bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
|
||||||
|
edata_nfree_inc(slab);
|
||||||
|
|
||||||
|
if (config_stats) {
|
||||||
|
info->ndalloc++;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned nfree = edata_nfree_get(slab);
|
||||||
|
if (nfree == bin_info->nregs) {
|
||||||
|
arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
|
||||||
|
bin);
|
||||||
|
return true;
|
||||||
|
} else if (nfree == 1 && slab != bin->slabcur) {
|
||||||
|
arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
|
||||||
|
bin);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
JEMALLOC_ALWAYS_INLINE void
|
||||||
|
arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||||
|
arena_dalloc_bin_locked_info_t *info) {
|
||||||
|
if (config_stats) {
|
||||||
|
bin->stats.ndalloc += info->ndalloc;
|
||||||
|
assert(bin->stats.curregs >= (size_t)info->ndalloc);
|
||||||
|
bin->stats.curregs -= (size_t)info->ndalloc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
||||||
|
83
src/arena.c
83
src/arena.c
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#include "jemalloc/internal/assert.h"
|
#include "jemalloc/internal/assert.h"
|
||||||
#include "jemalloc/internal/decay.h"
|
#include "jemalloc/internal/decay.h"
|
||||||
#include "jemalloc/internal/div.h"
|
|
||||||
#include "jemalloc/internal/ehooks.h"
|
#include "jemalloc/internal/ehooks.h"
|
||||||
#include "jemalloc/internal/extent_dss.h"
|
#include "jemalloc/internal/extent_dss.h"
|
||||||
#include "jemalloc/internal/extent_mmap.h"
|
#include "jemalloc/internal/extent_mmap.h"
|
||||||
@ -45,7 +44,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
|
|||||||
#undef STEP
|
#undef STEP
|
||||||
};
|
};
|
||||||
|
|
||||||
static div_info_t arena_binind_div_info[SC_NBINS];
|
div_info_t arena_binind_div_info[SC_NBINS];
|
||||||
|
|
||||||
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||||
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
|
||||||
@ -260,44 +259,6 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
|
|||||||
edata_nfree_sub(slab, cnt);
|
edata_nfree_sub(slab, cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef JEMALLOC_JET
|
|
||||||
static
|
|
||||||
#endif
|
|
||||||
size_t
|
|
||||||
arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr) {
|
|
||||||
size_t diff, regind;
|
|
||||||
|
|
||||||
/* Freeing a pointer outside the slab can cause assertion failure. */
|
|
||||||
assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
|
|
||||||
assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
|
|
||||||
/* Freeing an interior pointer can cause assertion failure. */
|
|
||||||
assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
|
|
||||||
(uintptr_t)bin_infos[binind].reg_size == 0);
|
|
||||||
|
|
||||||
diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
|
|
||||||
|
|
||||||
/* Avoid doing division with a variable divisor. */
|
|
||||||
regind = div_compute(&arena_binind_div_info[binind], diff);
|
|
||||||
|
|
||||||
assert(regind < bin_infos[binind].nregs);
|
|
||||||
|
|
||||||
return regind;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
|
|
||||||
szind_t binind = edata_szind_get(slab);
|
|
||||||
const bin_info_t *bin_info = &bin_infos[binind];
|
|
||||||
size_t regind = arena_slab_regind(slab, binind, ptr);
|
|
||||||
|
|
||||||
assert(edata_nfree_get(slab) < bin_info->nregs);
|
|
||||||
/* Freeing an unallocated pointer can cause assertion failure. */
|
|
||||||
assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
|
|
||||||
|
|
||||||
bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
|
|
||||||
edata_nfree_inc(slab);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
|
arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
|
||||||
szind_t index, hindex;
|
szind_t index, hindex;
|
||||||
@ -1189,39 +1150,20 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns true if arena_slab_dalloc must be called on slab */
|
void
|
||||||
static bool
|
arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
|
||||||
arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
edata_t *slab, bin_t *bin) {
|
||||||
szind_t binind, edata_t *slab, void *ptr) {
|
|
||||||
const bin_info_t *bin_info = &bin_infos[binind];
|
|
||||||
arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
|
|
||||||
|
|
||||||
bool ret = false;
|
|
||||||
unsigned nfree = edata_nfree_get(slab);
|
|
||||||
if (nfree == bin_info->nregs) {
|
|
||||||
arena_dissociate_bin_slab(arena, slab, bin);
|
arena_dissociate_bin_slab(arena, slab, bin);
|
||||||
arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
|
arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
|
||||||
ret = true;
|
}
|
||||||
} else if (nfree == 1 && slab != bin->slabcur) {
|
|
||||||
|
void
|
||||||
|
arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
|
||||||
|
edata_t *slab, bin_t *bin) {
|
||||||
arena_bin_slabs_full_remove(arena, bin, slab);
|
arena_bin_slabs_full_remove(arena, bin, slab);
|
||||||
arena_bin_lower_slab(tsdn, arena, slab, bin);
|
arena_bin_lower_slab(tsdn, arena, slab, bin);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config_stats) {
|
|
||||||
bin->stats.ndalloc++;
|
|
||||||
bin->stats.curregs--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
|
||||||
szind_t binind, edata_t *edata, void *ptr) {
|
|
||||||
return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
|
|
||||||
ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
|
arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
|
||||||
szind_t binind = edata_szind_get(edata);
|
szind_t binind = edata_szind_get(edata);
|
||||||
@ -1229,8 +1171,11 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
|
|||||||
bin_t *bin = &arena->bins[binind].bin_shards[binshard];
|
bin_t *bin = &arena->bins[binind].bin_shards[binshard];
|
||||||
|
|
||||||
malloc_mutex_lock(tsdn, &bin->lock);
|
malloc_mutex_lock(tsdn, &bin->lock);
|
||||||
bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
|
arena_dalloc_bin_locked_info_t info;
|
||||||
ptr);
|
arena_dalloc_bin_locked_begin(&info, binind);
|
||||||
|
bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
|
||||||
|
&info, binind, edata, ptr);
|
||||||
|
arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
|
||||||
malloc_mutex_unlock(tsdn, &bin->lock);
|
malloc_mutex_unlock(tsdn, &bin->lock);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
11
src/tcache.c
11
src/tcache.c
@ -399,6 +399,10 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
|
|
||||||
/* Deallocate whatever we can. */
|
/* Deallocate whatever we can. */
|
||||||
unsigned ndeferred = 0;
|
unsigned ndeferred = 0;
|
||||||
|
arena_dalloc_bin_locked_info_t dalloc_bin_info;
|
||||||
|
if (small) {
|
||||||
|
arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
|
||||||
|
}
|
||||||
for (unsigned i = 0; i < nflush; i++) {
|
for (unsigned i = 0; i < nflush; i++) {
|
||||||
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
void *ptr = cache_bin_ptr_array_get(&ptrs, i);
|
||||||
edata = item_edata[i].edata;
|
edata = item_edata[i].edata;
|
||||||
@ -417,8 +421,9 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (small) {
|
if (small) {
|
||||||
if (arena_dalloc_bin_locked(tsdn, cur_arena,
|
if (arena_dalloc_bin_locked_step(tsdn,
|
||||||
cur_bin, binind, edata, ptr)) {
|
cur_arena, cur_bin, &dalloc_bin_info,
|
||||||
|
binind, edata, ptr)) {
|
||||||
dalloc_slabs[dalloc_count] = edata;
|
dalloc_slabs[dalloc_count] = edata;
|
||||||
dalloc_count++;
|
dalloc_count++;
|
||||||
}
|
}
|
||||||
@ -432,6 +437,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (small) {
|
if (small) {
|
||||||
|
arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
|
||||||
|
&dalloc_bin_info);
|
||||||
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
malloc_mutex_unlock(tsdn, &cur_bin->lock);
|
||||||
}
|
}
|
||||||
arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
|
arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
|
||||||
|
@ -16,10 +16,13 @@ TEST_BEGIN(test_arena_slab_regind) {
|
|||||||
EXTENT_NOT_HEAD);
|
EXTENT_NOT_HEAD);
|
||||||
expect_ptr_not_null(edata_addr_get(&slab),
|
expect_ptr_not_null(edata_addr_get(&slab),
|
||||||
"Unexpected malloc() failure");
|
"Unexpected malloc() failure");
|
||||||
|
arena_dalloc_bin_locked_info_t dalloc_info;
|
||||||
|
arena_dalloc_bin_locked_begin(&dalloc_info, binind);
|
||||||
for (regind = 0; regind < bin_info->nregs; regind++) {
|
for (regind = 0; regind < bin_info->nregs; regind++) {
|
||||||
void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
|
void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
|
||||||
(bin_info->reg_size * regind));
|
(bin_info->reg_size * regind));
|
||||||
expect_zu_eq(arena_slab_regind(&slab, binind, reg),
|
expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
|
||||||
|
&slab, reg),
|
||||||
regind,
|
regind,
|
||||||
"Incorrect region index computed for size %zu",
|
"Incorrect region index computed for size %zu",
|
||||||
bin_info->reg_size);
|
bin_info->reg_size);
|
||||||
|
Loading…
Reference in New Issue
Block a user