Tcache flush: keep common path state in registers.
By carefully force-inlining the division constants and the operation sum count, we can eliminate redundant operations in the arena-level dalloc function. Do so.
This commit is contained in:
committed by
David Goldblatt
parent
31a629c3de
commit
229994a204
@@ -2,6 +2,7 @@
|
||||
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
|
||||
|
||||
#include "jemalloc/internal/bin.h"
|
||||
#include "jemalloc/internal/div.h"
|
||||
#include "jemalloc/internal/extent_dss.h"
|
||||
#include "jemalloc/internal/hook.h"
|
||||
#include "jemalloc/internal/pages.h"
|
||||
@@ -13,6 +14,8 @@ extern ssize_t opt_muzzy_decay_ms;
|
||||
extern percpu_arena_mode_t opt_percpu_arena;
|
||||
extern const char *percpu_arena_mode_names[];
|
||||
|
||||
extern div_info_t arena_binind_div_info[SC_NBINS];
|
||||
|
||||
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
|
||||
extern malloc_mutex_t arenas_lock;
|
||||
extern emap_t arena_emap_global;
|
||||
@@ -29,9 +32,6 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
|
||||
bin_stats_data_t *bstats, arena_stats_large_t *lstats,
|
||||
pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
|
||||
void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
|
||||
#ifdef JEMALLOC_JET
|
||||
size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
|
||||
#endif
|
||||
edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
|
||||
size_t usize, size_t alignment, bool zero);
|
||||
void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
|
||||
@@ -59,8 +59,11 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
|
||||
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
|
||||
bool slow_path);
|
||||
void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
|
||||
bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||
szind_t binind, edata_t *edata, void *ptr);
|
||||
|
||||
void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
|
||||
edata_t *slab, bin_t *bin);
|
||||
void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
|
||||
edata_t *slab, bin_t *bin);
|
||||
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
|
||||
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
|
||||
size_t extra, bool zero, size_t *newsize);
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
||||
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
|
||||
|
||||
#include "jemalloc/internal/div.h"
|
||||
#include "jemalloc/internal/emap.h"
|
||||
#include "jemalloc/internal/jemalloc_internal_types.h"
|
||||
#include "jemalloc/internal/mutex.h"
|
||||
@@ -441,4 +442,96 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The dalloc bin info contains just the information that the common paths need
|
||||
* during tcache flushes. By force-inlining these paths, and using local copies
|
||||
* of data (so that the compiler knows it's constant), we avoid a whole bunch of
|
||||
* redundant loads and stores by leaving this information in registers.
|
||||
*/
|
||||
typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
|
||||
struct arena_dalloc_bin_locked_info_s {
|
||||
div_info_t div_info;
|
||||
uint32_t nregs;
|
||||
uint64_t ndalloc;
|
||||
};
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE size_t
|
||||
arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
|
||||
edata_t *slab, const void *ptr) {
|
||||
size_t diff, regind;
|
||||
|
||||
/* Freeing a pointer outside the slab can cause assertion failure. */
|
||||
assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
|
||||
assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
|
||||
/* Freeing an interior pointer can cause assertion failure. */
|
||||
assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
|
||||
(uintptr_t)bin_infos[binind].reg_size == 0);
|
||||
|
||||
diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
|
||||
|
||||
/* Avoid doing division with a variable divisor. */
|
||||
regind = div_compute(&info->div_info, diff);
|
||||
|
||||
assert(regind < bin_infos[binind].nregs);
|
||||
|
||||
return regind;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
|
||||
szind_t binind) {
|
||||
info->div_info = arena_binind_div_info[binind];
|
||||
info->nregs = bin_infos[binind].nregs;
|
||||
info->ndalloc = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the deallocation work associated with freeing a single pointer (a
|
||||
* "step") in between a arena_dalloc_bin_locked begin and end call.
|
||||
*
|
||||
* Returns true if arena_slab_dalloc must be called on slab. Doesn't do
|
||||
* stats updates, which happen during finish (this lets running counts get left
|
||||
* in a register).
|
||||
*/
|
||||
JEMALLOC_ALWAYS_INLINE bool
|
||||
arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||
arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
|
||||
void *ptr) {
|
||||
const bin_info_t *bin_info = &bin_infos[binind];
|
||||
size_t regind = arena_slab_regind(info, binind, slab, ptr);
|
||||
slab_data_t *slab_data = edata_slab_data_get(slab);
|
||||
|
||||
assert(edata_nfree_get(slab) < bin_info->nregs);
|
||||
/* Freeing an unallocated pointer can cause assertion failure. */
|
||||
assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
|
||||
|
||||
bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
|
||||
edata_nfree_inc(slab);
|
||||
|
||||
if (config_stats) {
|
||||
info->ndalloc++;
|
||||
}
|
||||
|
||||
unsigned nfree = edata_nfree_get(slab);
|
||||
if (nfree == bin_info->nregs) {
|
||||
arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
|
||||
bin);
|
||||
return true;
|
||||
} else if (nfree == 1 && slab != bin->slabcur) {
|
||||
arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
|
||||
bin);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
JEMALLOC_ALWAYS_INLINE void
|
||||
arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
|
||||
arena_dalloc_bin_locked_info_t *info) {
|
||||
if (config_stats) {
|
||||
bin->stats.ndalloc += info->ndalloc;
|
||||
assert(bin->stats.curregs >= (size_t)info->ndalloc);
|
||||
bin->stats.curregs -= (size_t)info->ndalloc;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
|
||||
|
Reference in New Issue
Block a user