From 49f7e8f35ac63d0dd526cf68791dc0ca29538ac9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 15 Mar 2011 13:59:15 -0700 Subject: [PATCH] Create arena_bin_info_t. Move read-only fields from arena_bin_t into arena_bin_info_t, primarily in order to avoid false cacheline sharing. --- jemalloc/include/jemalloc/internal/arena.h | 95 ++-- .../jemalloc/internal/jemalloc_internal.h.in | 8 +- jemalloc/include/jemalloc/internal/tcache.h | 6 +- jemalloc/src/arena.c | 426 ++++++++++-------- jemalloc/src/ctl.c | 6 +- jemalloc/src/tcache.c | 6 +- 6 files changed, 324 insertions(+), 223 deletions(-) diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index 78828ef5..467ec65f 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -71,6 +71,7 @@ typedef struct arena_chunk_map_s arena_chunk_map_t; typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -218,6 +219,33 @@ struct arena_run_s { unsigned nfree; }; +/* + * Read-only information associated with each element for arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t ctx0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + struct arena_bin_s { /* * All operations on runcur, runs, and stats require that lock be @@ -242,26 +270,6 @@ struct arena_bin_s { */ arena_run_tree_t runs; - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - -#ifdef JEMALLOC_PROF - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). - */ - uint32_t ctx0_offset; -#endif - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; - #ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; @@ -398,6 +406,8 @@ extern ssize_t opt_lg_dirty_mult; extern uint8_t const *small_size2bin; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) +extern arena_bin_info_t *arena_bin_info; + /* Various bin-related settings. */ #ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ # define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) @@ -463,7 +473,8 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, +size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr, size_t size); # ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); @@ -473,8 +484,16 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE size_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + size_t binind = bin - arena->bins; + assert(binind < nbins); + return (binind); +} + JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr, size_t size) { unsigned shift, diff, regind; @@ -485,7 +504,8 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ shift = ffs(size) - 1; @@ -531,7 +551,7 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, #undef SIZE_INV_SHIFT } assert(diff == regind * size); - assert(regind < bin->nregs); + assert(regind < bin_info->nregs); return (regind); } @@ -558,13 +578,15 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - arena_bin_t *bin = run->bin; + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; assert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin, ptr, bin->reg_size); + regind = arena_run_regind(run, bin_info, ptr, + bin_info->reg_size); ret = *(prof_ctx_t **)((uintptr_t)run + - bin->ctx0_offset + (regind * + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t *))); } } else @@ -593,11 +615,15 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) PAGE_SHIFT)); arena_bin_t *bin = run->bin; unsigned regind; + size_t binind; + arena_bin_info_t *bin_info; assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin, ptr, bin->reg_size); + binind = arena_bin_index(chunk->arena, bin); + bin_info = &arena_bin_info[binind]; - *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t *)))) = ctx; } else assert((uintptr_t)ctx == (uintptr_t)1U); @@ -637,10 +663,17 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % - run->bin->reg_size == 0); bin = run->bin; +#ifndef NDEBUG + { + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + bin_info->reg_size == 0); + } +#endif malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); malloc_mutex_unlock(&bin->lock); diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 0f58a7a6..34b2a232 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -402,7 +402,7 @@ s2u(size_t size) { if (size <= small_maxclass) - return (arenas[0]->bins[SMALL_SIZE2BIN(size)].reg_size); + return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) return (PAGE_CEILING(size)); return (CHUNK_CEILING(size)); @@ -446,10 +446,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) { - return - (arenas[0]->bins[SMALL_SIZE2BIN(usize)].reg_size); - } + if (usize <= small_maxclass) + return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { size_t run_size; diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index 7b711720..ab02545f 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -232,7 +232,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == tcache->arena->bins[binind].reg_size); + assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { #ifdef JEMALLOC_FILL @@ -248,7 +248,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tbin->tstats.nrequests++; #endif #ifdef JEMALLOC_PROF - tcache->prof_accumbytes += tcache->arena->bins[binind].reg_size; + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; #endif tcache_event(tcache); return (ret); @@ -331,7 +331,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) #ifdef JEMALLOC_FILL if (opt_junk) - memset(ptr, 0x5a, bin->reg_size); + memset(ptr, 0x5a, arena_bin_info[binind].reg_size); #endif tbin = &tcache->tbins[binind]; diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index 2811fd10..e49b8edf 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -8,6 +8,7 @@ size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; uint8_t const *small_size2bin; +arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ unsigned nqbins; @@ -174,7 +175,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -192,6 +192,9 @@ static bool small_size2bin_init(void); static void small_size2bin_validate(void); #endif static bool small_size2bin_init_hard(void); +static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, + size_t min_run_size); +static bool bin_info_init(void); /******************************************************************************/ @@ -247,7 +250,7 @@ rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) +arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; @@ -261,16 +264,16 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) assert(ret != NULL); /* Write-after free can cause assertion failure. */ assert((uintptr_t)ret >= (uintptr_t)run + - (uintptr_t)bin->reg0_offset); + (uintptr_t)bin_info->reg0_offset); assert((uintptr_t)ret < (uintptr_t)run->next); assert(((uintptr_t)ret - ((uintptr_t)run + - (uintptr_t)bin->reg0_offset)) % (uintptr_t)bin->reg_size == - 0); + (uintptr_t)bin_info->reg0_offset)) % + (uintptr_t)bin_info->reg_size == 0); run->avail = *(void **)ret; return (ret); } ret = run->next; - run->next = (void *)((uintptr_t)ret + (uintptr_t)bin->reg_size); + run->next = (void *)((uintptr_t)ret + (uintptr_t)bin_info->reg_size); assert(ret != NULL); return (ret); } @@ -279,22 +282,27 @@ static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { - assert(run->nfree < run->bin->nregs); +#ifndef NDEBUG + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % (uintptr_t)run->bin->reg_size + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size == 0); /* * Freeing a pointer lower than region zero can cause assertion * failure. */ assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)run->bin->reg0_offset); + (uintptr_t)bin_info->reg0_offset); /* * Freeing a pointer past in the run's frontier can cause assertion * failure. */ assert((uintptr_t)ptr < (uintptr_t)run->next); +#endif *(void **)ptr = run->avail; run->avail = ptr; @@ -765,7 +773,11 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert((mapelm->bits >> PAGE_SHIFT) == 0); assert(run->magic == ARENA_RUN_MAGIC); - pageind += run->bin->run_size >> PAGE_SHIFT; + size_t binind = arena_bin_index(arena, + run->bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + pageind += bin_info->run_size >> PAGE_SHIFT; } } } @@ -947,8 +959,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_LARGE) != 0); assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - } else - size = run->bin->run_size; + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } run_pages = (size >> PAGE_SHIFT); arena->nactive -= run_pages; @@ -1175,6 +1190,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_chunk_map_t *mapelm; arena_run_t *run; + size_t binind; + arena_bin_info_t *bin_info; /* Look for a usable run. */ mapelm = arena_run_tree_first(&bin->runs); @@ -1198,18 +1215,21 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) } /* No existing runs have any space available. */ + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + /* Allocate a new run. */ malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin->run_size, false, false); + run = arena_run_alloc(arena, bin_info->run_size, false, false); if (run != NULL) { /* Initialize run internals. */ run->bin = bin; run->avail = NULL; run->next = (void *)((uintptr_t)run + - (uintptr_t)bin->reg0_offset); - run->nfree = bin->nregs; + (uintptr_t)bin_info->reg0_offset); + run->nfree = bin_info->nregs; #ifdef JEMALLOC_DEBUG run->magic = ARENA_RUN_MAGIC; #endif @@ -1260,18 +1280,23 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; + size_t binind; + arena_bin_info_t *bin_info; arena_run_t *run; + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; bin->runcur = NULL; run = arena_bin_nonfull_run_get(arena, bin); if (bin->runcur != NULL && bin->runcur->nfree > 0) { + /* * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin); + ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { arena_chunk_t *chunk; @@ -1284,7 +1309,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * from the run. */ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin->nregs) + if (run->nfree == bin_info->nregs) arena_dalloc_bin_run(arena, chunk, run, bin); else arena_bin_lower_run(arena, chunk, run, bin); @@ -1300,7 +1325,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); - return (arena_run_reg_alloc(bin->runcur, bin)); + return (arena_run_reg_alloc(bin->runcur, bin_info)); } #ifdef JEMALLOC_PROF @@ -1342,7 +1367,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tbin->ncached_max >> 1); i < nfill; i++) { if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, bin); + ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) @@ -1351,7 +1376,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind tbin->avail = ptr; } #ifdef JEMALLOC_STATS - bin->stats.allocated += (i - tbin->ncached) * bin->reg_size; + bin->stats.allocated += (i - tbin->ncached) * + arena_bin_info[binind].reg_size; bin->stats.nmalloc += i; bin->stats.nrequests += tbin->tstats.nrequests; bin->stats.nfills++; @@ -1362,112 +1388,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind } #endif -/* - * Calculate bin->run_size such that it meets the following constraints: - * - * *) bin->run_size >= min_run_size - * *) bin->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * - * bin->nregs and bin->reg0_offset are also calculated here, since these - * settings are all interdependent. - */ -static size_t -arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) -{ - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; -#ifdef JEMALLOC_PROF - uint32_t try_ctx0_offset, good_ctx0_offset; -#endif - uint32_t try_reg0_offset, good_reg0_offset; - - assert(min_run_size >= PAGE_SIZE); - assert(min_run_size <= arena_maxclass); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; -#endif - try_reg0_offset = try_run_size - (try_nregs * bin->reg_size); - } while (try_hdr_size > try_reg0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; -#ifdef JEMALLOC_PROF - good_ctx0_offset = try_ctx0_offset; -#endif - good_reg0_offset = try_reg0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin->reg_size); - } while (try_hdr_size > try_reg0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); - - assert(good_hdr_size <= good_reg0_offset); - - /* Copy final settings. */ - bin->run_size = good_run_size; - bin->nregs = good_nregs; -#ifdef JEMALLOC_PROF - bin->ctx0_offset = good_ctx0_offset; -#endif - bin->reg0_offset = good_reg0_offset; - - return (good_run_size); -} - void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1479,11 +1399,11 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) binind = SMALL_SIZE2BIN(size); assert(binind < nbins); bin = &arena->bins[binind]; - size = bin->reg_size; + size = arena_bin_info[binind].reg_size; malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, bin); + ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); else ret = arena_bin_malloc_hard(arena, bin); @@ -1688,10 +1608,12 @@ arena_salloc(const void *ptr) (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); - ret = run->bin->reg_size; + ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1739,10 +1661,12 @@ arena_salloc_demote(const void *ptr) (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)run->bin->reg0_offset)) % run->bin->reg_size == + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); - ret = run->bin->reg_size; + ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; @@ -1751,7 +1675,7 @@ arena_salloc_demote(const void *ptr) size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < nbins); - ret = chunk->arena->bins[binind].reg_size; + ret = arena_bin_info[binind].reg_size; } assert(ret != 0); } @@ -1768,17 +1692,22 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, /* Dissociate run from bin. */ if (run == bin->runcur) bin->runcur = NULL; - else if (bin->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - (uintptr_t)chunk)) >> - PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - /* - * This block's conditional is necessary because if the run - * only contains one region, then it never gets inserted into - * the non-full runs tree. - */ - arena_run_tree_remove(&bin->runs, run_mapelm); + else { + size_t binind = arena_bin_index(chunk->arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + + if (bin_info->nregs != 1) { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_run_tree_remove(&bin->runs, run_mapelm); + } } } @@ -1786,15 +1715,20 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { + size_t binind; + arena_bin_info_t *bin_info; size_t npages, run_ind, past; assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); + binind = arena_bin_index(chunk->arena, run->bin); + bin_info = &arena_bin_info[binind]; + malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin->run_size >> PAGE_SHIFT; + npages = bin_info->run_size >> PAGE_SHIFT; run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); past = (size_t)((PAGE_CEILING((uintptr_t)run->next) - (uintptr_t)chunk) >> PAGE_SHIFT); @@ -1814,7 +1748,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | (chunk->map[run_ind+npages-1-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin->run_size | + chunk->map[run_ind-map_bias].bits = bin_info->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), @@ -1885,8 +1819,10 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; #if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin->reg_size; + size = bin_info->reg_size; #endif #ifdef JEMALLOC_FILL @@ -1895,7 +1831,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin->nregs) { + if (run->nfree == bin_info->nregs) { arena_dissociate_bin_run(chunk, run, bin); arena_dalloc_bin_run(arena, chunk, run, bin); } else if (run->nfree == 1 && run != bin->runcur) @@ -2167,8 +2103,8 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= small_maxclass) { - assert(choose_arena()->bins[SMALL_SIZE2BIN( - oldsize)].reg_size == oldsize); + assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + == oldsize); if ((size + extra <= small_maxclass && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && @@ -2248,7 +2184,6 @@ arena_new(arena_t *arena, unsigned ind) { unsigned i; arena_bin_t *bin; - size_t prev_run_size; arena->ind = ind; @@ -2284,8 +2219,6 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - prev_run_size = PAGE_SIZE; - i = 0; #ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ @@ -2295,11 +2228,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = (1U << (LG_TINY_MIN + i)); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2313,11 +2241,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = (i - ntbins + 1) << LG_QUANTUM; - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2330,12 +2253,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2348,12 +2265,6 @@ arena_new(arena_t *arena, unsigned ind) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); - - bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) - << LG_SUBPAGE); - - prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); - #ifdef JEMALLOC_STATS memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif @@ -2487,6 +2398,162 @@ small_size2bin_init_hard(void) #undef CUSTOM_SMALL_SIZE2BIN } +/* + * Calculate bin_info->run_size such that it meets the following constraints: + * + * *) bin_info->run_size >= min_run_size + * *) bin_info->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * + * bin_info->nregs and bin_info->reg0_offset are also calculated here, since + * these settings are all interdependent. + */ +static size_t +bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +{ + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; +#ifdef JEMALLOC_PROF + uint32_t try_ctx0_offset, good_ctx0_offset; +#endif + uint32_t try_reg0_offset, good_reg0_offset; + + assert(min_run_size >= PAGE_SIZE); + assert(min_run_size <= arena_maxclass); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; +#ifdef JEMALLOC_PROF + good_ctx0_offset = try_ctx0_offset; +#endif + good_reg0_offset = try_reg0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE_SIZE; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); + + assert(good_hdr_size <= good_reg0_offset); + + /* Copy final settings. */ + bin_info->run_size = good_run_size; + bin_info->nregs = good_nregs; +#ifdef JEMALLOC_PROF + bin_info->ctx0_offset = good_ctx0_offset; +#endif + bin_info->reg0_offset = good_reg0_offset; + + return (good_run_size); +} + +static bool +bin_info_init(void) +{ + arena_bin_info_t *bin_info; + unsigned i; + size_t prev_run_size; + + arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); + if (arena_bin_info == NULL) + return (true); + + prev_run_size = PAGE_SIZE; + i = 0; +#ifdef JEMALLOC_TINY + /* (2^n)-spaced tiny bins. */ + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + } +#endif + + /* Quantum-spaced bins. */ + for (; i < ntbins + nqbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + } + + /* Cacheline-spaced bins. */ + for (; i < ntbins + nqbins + ncbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + LG_CACHELINE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + } + + /* Subpage-spaced bins. */ + for (; i < nbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + + ncbins)) << LG_SUBPAGE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + } + + return (false); +} + bool arena_boot(void) { @@ -2545,9 +2612,6 @@ arena_boot(void) abort(); } - if (small_size2bin_init()) - return (true); - /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2571,5 +2635,11 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); + if (small_size2bin_init()) + return (true); + + if (bin_info_init()) + return (true); + return (false); } diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index 1b28da47..c32e955e 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -1289,9 +1289,9 @@ CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) /******************************************************************************/ -CTL_RO_NL_GEN(arenas_bin_i_size, arenas[0]->bins[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arenas[0]->bins[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arenas[0]->bins[mib[2]].run_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index e9b067d4..88e1cc7c 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -253,9 +253,9 @@ tcache_create(arena_t *arena) tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nbins; i++) { - if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { - tcache->tbins[i].ncached_max = (arena->bins[i].nregs << - 1); + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache->tbins[i].ncached_max = (arena_bin_info[i].nregs + << 1); } else tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; }