diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 866b12fe..bb65c7a9 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -441,10 +441,12 @@ struct arena_s { arena_bin_t bins[NBINS]; /* - * Quantized address-ordered heaps of this arena's available runs. The - * heaps are used for first-best-fit run allocation. + * Size-segregated address-ordered heaps of this arena's available runs, + * used for first-best-fit run allocation. Runs are quantized, i.e. + * they reside in the last heap which corresponds to a size class less + * than or equal to the run size. */ - arena_run_heap_t runs_avail[1]; /* Dynamically sized. */ + arena_run_heap_t runs_avail[NPSIZES]; }; /* Used in conjunction with tsd for fast arena-related context lookup. */ @@ -476,7 +478,6 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ -extern size_t run_quantize_max; /* Max run_quantize_*() input. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 224ceddc..eabb9ce3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -443,11 +443,16 @@ extern unsigned narenas_auto; */ extern arena_t **arenas; +/* + * pind2sz_tab encodes the same information as could be computed by + * pind2sz_compute(). + */ +extern size_t const pind2sz_tab[NPSIZES]; /* * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES+1]; +extern size_t const index2size_tab[NSIZES]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -529,6 +534,8 @@ void jemalloc_postfork_child(void); #ifndef JEMALLOC_ENABLE_INLINE pszind_t psz2ind(size_t psz); +size_t pind2sz_compute(pszind_t pind); +size_t pind2sz_lookup(pszind_t pind); size_t pind2sz(pszind_t pind); size_t psz2u(size_t psz); szind_t size2index_compute(size_t size); @@ -576,7 +583,7 @@ psz2ind(size_t psz) } JEMALLOC_INLINE size_t -pind2sz(pszind_t pind) +pind2sz_compute(pszind_t pind) { { @@ -596,6 +603,22 @@ pind2sz(pszind_t pind) } } +JEMALLOC_INLINE size_t +pind2sz_lookup(pszind_t pind) +{ + size_t ret = (size_t)pind2sz_tab[pind]; + assert(ret == pind2sz_compute(pind)); + return (ret); +} + +JEMALLOC_INLINE size_t +pind2sz(pszind_t pind) +{ + + assert(pind < NPSIZES); + return (pind2sz_lookup(pind)); +} + JEMALLOC_INLINE size_t psz2u(size_t psz) { diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index cbafc2b1..e046c3b1 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -394,6 +394,8 @@ pages_purge pages_trim pages_unmap pind2sz +pind2sz_compute +pind2sz_lookup pow2_ceil_u32 pow2_ceil_u64 pow2_ceil_zu @@ -468,7 +470,6 @@ rtree_val_read rtree_val_write run_quantize_ceil run_quantize_floor -run_quantize_max s2u s2u_compute s2u_lookup diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index ecee1a0a..d1b1db1e 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -40,6 +40,16 @@ lg() { done } +reg_size_compute() { + lg_grp=$1 + lg_delta=$2 + ndelta=$3 + + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + reg_size=$((${grp} + ${delta}*${ndelta})) +} + run_size() { lg_p=$1 lg_grp=$2 @@ -47,10 +57,7 @@ run_size() { ndelta=$4 pow2 ${lg_p}; p=${pow2_result} - - pow2 ${lg_grp}; grp=${pow2_result} - pow2 ${lg_delta}; delta=${pow2_result} - reg_size=$((${grp} + ${delta}*${ndelta})) + reg_size_compute ${lg_grp} ${lg_delta} ${ndelta} # Compute smallest run size that is an integer multiple of reg_size. try_run_size=${p} diff --git a/src/arena.c b/src/arena.c index ff119ba6..a0fd2ce6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -34,14 +34,11 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -size_t run_quantize_max; /* Max run_quantize_*() input. */ static bool *small_run_tab; /* Valid small run page multiples. */ static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ -static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */ -static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ /******************************************************************************/ /* @@ -177,7 +174,7 @@ run_quantize_floor(size_t size) size_t ret; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1]; @@ -200,7 +197,7 @@ run_quantize_ceil(size_t size) size_t ret; assert(size > 0); - assert(size <= run_quantize_max); + assert(size <= HUGE_MAXCLASS); assert((size & PAGE_MASK) == 0); ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1]; @@ -213,25 +210,15 @@ run_quantize_ceil(size_t size) run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil); #endif -static arena_run_heap_t * -arena_runs_avail_get(arena_t *arena, szind_t ind) -{ - - assert(ind >= runs_avail_bias); - assert(ind - runs_avail_bias < runs_avail_nclasses); - - return (&arena->runs_avail[ind - runs_avail_bias]); -} - static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_insert(arena_runs_avail_get(arena, ind), + arena_run_heap_insert(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -239,11 +226,11 @@ static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get( arena_miscelm_get_const(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_run_heap_remove(arena_runs_avail_get(arena, ind), + arena_run_heap_remove(&arena->runs_avail[pind], arena_miscelm_get_mutable(chunk, pageind)); } @@ -1088,12 +1075,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk, static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - szind_t ind, i; + pszind_t pind, i; - ind = size2index(run_quantize_ceil(size)); - for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { + pind = psz2ind(run_quantize_ceil(size)); + + for (i = pind; pind2sz(i) <= large_maxclass; i++) { arena_chunk_map_misc_t *miscelm = arena_run_heap_first( - arena_runs_avail_get(arena, i)); + &arena->runs_avail[i]); if (miscelm != NULL) return (&miscelm->run); } @@ -1946,7 +1934,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) assert(!arena->purging); arena->nactive = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t); + i++) arena_run_heap_new(&arena->runs_avail[i]); malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock); @@ -3388,23 +3377,19 @@ arena_t * arena_new(tsdn_t *tsdn, unsigned ind) { arena_t *arena; - size_t arena_size; unsigned i; - /* Compute arena size to incorporate sufficient runs_avail elements. */ - arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) * - runs_avail_nclasses); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { arena = (arena_t *)base_alloc(tsdn, - CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses * - sizeof(malloc_large_stats_t) + nhclasses) * - sizeof(malloc_huge_stats_t)); + CACHELINE_CEILING(sizeof(arena_t)) + + QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) + + (nhclasses * sizeof(malloc_huge_stats_t)))); } else - arena = (arena_t *)base_alloc(tsdn, arena_size); + arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t)); if (arena == NULL) return (NULL); @@ -3416,11 +3401,11 @@ arena_new(tsdn_t *tsdn, unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size)); + + CACHELINE_CEILING(sizeof(arena_t))); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(arena_size) + + + CACHELINE_CEILING(sizeof(arena_t)) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); @@ -3454,8 +3439,10 @@ arena_new(tsdn_t *tsdn, unsigned ind) arena->nactive = 0; arena->ndirty = 0; - for(i = 0; i < runs_avail_nclasses; i++) + for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t); + i++) arena_run_heap_new(&arena->runs_avail[i]); + qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); @@ -3526,6 +3513,7 @@ small_run_size_init(void) static bool run_quantize_init(void) { + size_t run_quantize_max; unsigned i; run_quantize_max = chunksize + large_pad; @@ -3604,9 +3592,6 @@ arena_boot(void) if (run_quantize_init()) return (true); - runs_avail_bias = size2index(PAGE); - runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias; - return (false); } diff --git a/src/jemalloc.c b/src/jemalloc.c index b907d9e5..849e9418 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -78,14 +78,25 @@ enum { }; static uint8_t malloc_slow_flags; -/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES+1] = { +const size_t pind2sz_tab[NPSIZES] = { +#define PSZ_yes(lg_grp, ndelta, lg_delta) \ + (((ZU(1)<> LG_PAGE; i++) { + for (i = 1; i <= large_maxclass >> LG_PAGE; i++) { size_t run_size, floor, ceil; run_size = i << LG_PAGE;