From 155bfa7da18cab0d21d87aa2dce4554166836f5d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Oct 2014 17:54:10 -0700 Subject: [PATCH] Normalize size classes. Normalize size classes to use the same number of size classes per size doubling (currently hard coded to 4), across the intire range of size classes. Small size classes already used this spacing, but in order to support this change, additional small size classes now fill [4 KiB .. 16 KiB). Large size classes range from [16 KiB .. 4 MiB). Huge size classes now support non-multiples of the chunk size in order to fill (4 MiB .. 16 MiB). --- include/jemalloc/internal/arena.h | 231 +++----------- include/jemalloc/internal/chunk.h | 3 - include/jemalloc/internal/huge.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 299 +++++++++++++----- include/jemalloc/internal/private_symbols.txt | 22 +- include/jemalloc/internal/size_classes.sh | 15 +- include/jemalloc/internal/stats.h | 7 +- include/jemalloc/internal/tcache.h | 52 +-- src/arena.c | 223 ++++++------- src/chunk.c | 3 - src/ctl.c | 2 +- src/huge.c | 113 +++++-- src/jemalloc.c | 34 +- src/tcache.c | 8 +- test/unit/junk.c | 17 +- test/unit/mallctl.c | 2 +- 16 files changed, 558 insertions(+), 475 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1f985723..681b5802 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,6 +1,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) + /* Maximum number of regions in one run. */ #define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) @@ -96,11 +98,15 @@ struct arena_chunk_map_bits_s { * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx * -------- -------- ----++++ ++++D-LA * - * Large (sampled, size <= PAGE): + * Large (sampled, size <= LARGE_MINCLASS): * ssssssss ssssssss ssssnnnn nnnnD-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA * - * Large (not sampled, size == PAGE): + * Large (not sampled, size == LARGE_MINCLASS): * ssssssss ssssssss ssss++++ ++++D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA */ size_t bits; #define CHUNK_MAP_BININD_SHIFT 4 @@ -325,30 +331,21 @@ struct arena_s { #ifdef JEMALLOC_H_EXTERNS extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin_tab is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via small_size2bin(). - */ -extern uint8_t const small_size2bin_tab[]; -/* - * small_bin2size_tab duplicates information in arena_bin_info, but in a const - * array, for which it is easier for the compiler to optimize repeated - * dereferences. - */ -extern uint32_t const small_bin2size_tab[NBINS]; extern arena_bin_info_t arena_bin_info[NBINS]; -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ +extern size_t nlclasses; /* Number of large size classes. */ void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero); void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); + index_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -403,15 +400,6 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -size_t small_size2bin_compute(size_t size); -size_t small_size2bin_lookup(size_t size); -size_t small_size2bin(size_t size); -size_t small_bin2size_compute(size_t binind); -size_t small_bin2size_lookup(size_t binind); -size_t small_bin2size(size_t binind); -size_t small_s2u_compute(size_t size); -size_t small_s2u_lookup(size_t size); -size_t small_s2u(size_t size); arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, @@ -426,7 +414,7 @@ size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); @@ -439,16 +427,16 @@ void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); + index_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); + size_t runind, index_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +index_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); @@ -464,148 +452,6 @@ void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE size_t -small_size2bin_compute(size_t size) -{ -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; - - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t bin = NTBINS + grp + mod; - return (bin); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin_lookup(size_t size) -{ - - assert(size <= LOOKUP_MAXCLASS); - { - size_t ret = ((size_t)(small_size2bin_tab[(size-1) >> - LG_TINY_MIN])); - assert(ret == small_size2bin_compute(size)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_size2bin_lookup(size)); - else - return (small_size2bin_compute(size)); -} - -JEMALLOC_INLINE size_t -small_bin2size_compute(size_t binind) -{ -#if (NTBINS > 0) - if (binind < NTBINS) - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind)); - else -#endif - { - size_t reduced_binind = binind - NTBINS; - size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_QUANTUM-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t usize = grp_size + mod_size; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size_lookup(size_t binind) -{ - - assert(binind < NBINS); - { - size_t ret = (size_t)small_bin2size_tab[binind]; - assert(ret == small_bin2size_compute(binind)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size(size_t binind) -{ - - return (small_bin2size_lookup(binind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_compute(size_t size) -{ -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : - (ZU(1) << lg_ceil)); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (size + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_lookup(size_t size) -{ - size_t ret = small_bin2size(small_size2bin(size)); - - assert(ret == small_s2u_compute(size)); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_s2u_lookup(size)); - else - return (small_s2u_compute(size)); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { @@ -714,11 +560,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - size_t binind; + index_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -810,20 +656,20 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) + index_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS); arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) + index_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); @@ -893,10 +739,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - size_t binind; + index_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -908,7 +754,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - size_t actual_binind; + index_t actual_binind; arena_bin_info_t *bin_info; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -938,13 +784,13 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) return (binind); } -# endif /* JEMALLOC_ARENA_INLINE_B */ +# endif /* JEMALLOC_ARENA_INLINE_A */ -# ifdef JEMALLOC_ARENA_INLINE_C -JEMALLOC_INLINE size_t +# ifdef JEMALLOC_ARENA_INLINE_B +JEMALLOC_INLINE index_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - size_t binind = bin - arena->bins; + index_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -1102,7 +948,8 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1122,10 +969,6 @@ arena_salloc(const void *ptr, bool demote) ret = arena_mapbits_large_size_get(chunk, pageind); assert(ret != 0); assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); } else { @@ -1133,7 +976,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); - ret = small_bin2size(binind); + ret = index2size(binind); } return (ret); @@ -1155,7 +998,7 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = arena_ptr_small_binind_get(ptr, + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else @@ -1186,7 +1029,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = small_size2bin(size); + index_t binind = size2index(size); tcache_dalloc_small(tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> @@ -1203,7 +1046,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, arena_dalloc_large(chunk->arena, chunk, ptr); } } -# endif /* JEMALLOC_ARENA_INLINE_C */ +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 2e68a020..764b7aca 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -40,9 +40,6 @@ extern rtree_t *chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t map_misc_offset; -extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc_base(size_t size); void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 00d8c09d..939993f2 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a169221b..8f0beb9e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -165,6 +165,9 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Size class index type. */ +typedef unsigned index_t; + #define MALLOCX_ARENA_MASK ((int)~0xff) #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ @@ -397,6 +400,18 @@ extern arena_t **arenas; extern unsigned narenas_total; extern unsigned narenas_auto; /* Read-only after initialization. */ +/* + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). + */ +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(tsd_t *tsd); void thread_allocated_cleanup(tsd_t *tsd); @@ -449,15 +464,15 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -/* - * Include arena.h the first time in order to provide inline functions for this - * header's inlines. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A - #ifndef JEMALLOC_ENABLE_INLINE +index_t size2index_compute(size_t size); +index_t size2index_lookup(size_t size); +index_t size2index(size_t size); +size_t index2size_compute(index_t index); +size_t index2size_lookup(index_t index); +size_t index2size(index_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); unsigned narenas_total_get(void); @@ -465,6 +480,135 @@ arena_t *choose_arena(tsd_t *tsd, arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE index_t +size2index_compute(size_t size) +{ + +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + else + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(index_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); + else +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(index_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(index_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} + /* * Compute usable size that would result from allocating an object with the * specified size. @@ -473,11 +617,11 @@ JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { - if (size <= SMALL_MAXCLASS) - return (small_s2u(size)); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + else + return (s2u_compute(size)); } /* @@ -491,71 +635,78 @@ sa2u(size_t size, size_t alignment) assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); } - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (small_s2u(usize)); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - + /* Try for a large size class. */ + if (size <= arena_maxclass && alignment < chunksize) { /* * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. + * to the minimum that can actually be supported. */ alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); + if (usize + alignment - PAGE <= arena_maxrun) + return (usize); } + + /* Huge size class. Beware of size_t overflow. */ + + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } + + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); } JEMALLOC_INLINE unsigned @@ -591,16 +742,16 @@ choose_arena(tsd_t *tsd, arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" /* - * Include arena.h the second and third times in order to resolve circular - * dependencies with tcache.h. + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" #define JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/arena.h" #undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_C -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_C #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -678,7 +829,7 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE) + if (usize <= SMALL_MAXCLASS && alignment < PAGE) ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { @@ -742,7 +893,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = small_size2bin(usize); + index_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 4ea9a953..1a7fde4b 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -41,6 +41,7 @@ arena_mapbitsp_get arena_mapbitsp_read arena_mapbitsp_write arena_maxclass +arena_maxrun arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages @@ -216,6 +217,10 @@ idalloct imalloc imalloct in_valgrind +index2size +index2size_compute +index2size_lookup +index2size_tab ipalloc ipalloct iqalloc @@ -338,19 +343,14 @@ rtree_postfork_parent rtree_prefork rtree_set s2u +s2u_compute +s2u_lookup sa2u set_errno -small_bin2size -small_bin2size_compute -small_bin2size_lookup -small_bin2size_tab -small_s2u -small_s2u_compute -small_s2u_lookup -small_size2bin -small_size2bin_compute -small_size2bin_lookup -small_size2bin_tab +size2index +size2index_compute +size2index_lookup +size2index_tab stats_cactive stats_cactive_add stats_cactive_get diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 0cfac72d..897570cc 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -61,7 +61,7 @@ size_class() { rem="yes" fi - if [ ${lg_size} -lt ${lg_p} ] ; then + if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then bin="yes" else bin="no" @@ -159,6 +159,7 @@ size_classes() { nbins=$((${index} + 1)) # Final written value is correct: small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + lg_large_minclass=$((${lg_grp} + 1)) fi index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) @@ -167,14 +168,17 @@ size_classes() { lg_delta=$((${lg_delta} + 1)) done echo + nsizes=${index} # Defined upon completion: # - ntbins # - nlbins # - nbins + # - nsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass + # - lg_large_minclass } cat <tbins[binind]; - size = small_bin2size(binind); + usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { ret = tcache_alloc_small_hard(tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == size); + assert(tcache_salloc(ret) == usize); if (likely(!zero)) { if (config_fill) { @@ -254,20 +255,20 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else { if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - memset(ret, 0, size); + memset(ret, 0, usize); } if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; tcache_event(tcache); return (ret); } @@ -276,12 +277,13 @@ JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; - size_t binind; + index_t binind; + size_t usize; tcache_bin_t *tbin; - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -290,11 +292,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, size, zero); + ret = arena_malloc_large(tcache->arena, usize, zero); if (ret == NULL) return (NULL); } else { - if (config_prof && size == PAGE) { + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> @@ -305,17 +307,17 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (likely(!zero)) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else - memset(ret, 0, size); + memset(ret, 0, usize); if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; } tcache_event(tcache); @@ -323,7 +325,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -349,7 +351,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - size_t binind; + index_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -357,7 +359,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(tcache_salloc(ptr) > SMALL_MAXCLASS); assert(tcache_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); if (config_fill && unlikely(opt_junk)) memset(ptr, 0x5a, size); diff --git a/src/arena.c b/src/arena.c index b7300a92..49a30572 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,42 +7,11 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ALIGNED(CACHELINE) -const uint32_t small_bin2size_tab[NBINS] = { -#define B2S_bin_yes(size) \ - size, -#define B2S_bin_no(size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - B2S_bin_##bin((ZU(1)<> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); @@ -375,7 +344,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) static void arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, - size_t binind) + index_t binind) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -429,9 +398,9 @@ arena_chunk_init_spare(arena_t *arena) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -518,8 +487,7 @@ arena_chunk_init_hard(arena_t *arena) * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, unzeroed); /* * There is no need to initialize the internal page map entries unless * the chunk is not zeroed. @@ -544,7 +512,7 @@ arena_chunk_init_hard(arena_t *arena) } } } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun, unzeroed); return (chunk); @@ -607,9 +575,9 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -682,7 +650,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) { arena_run_t *run; arena_chunk_map_misc_t *miscelm; @@ -700,7 +668,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1034,7 +1002,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_large_size_get(chunk, run_ind+(size>>LG_PAGE)-1) == 0); } else { - size_t binind = arena_bin_index(arena, run->bin); + index_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; size = bin_info->run_size; } @@ -1079,9 +1047,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ - if (size == arena_maxclass) { + if (size == arena_maxrun) { assert(run_ind == map_bias); - assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(run_pages == (arena_maxrun >> LG_PAGE)); arena_chunk_dalloc(arena, chunk); } @@ -1212,7 +1180,7 @@ static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ @@ -1264,7 +1232,7 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; arena_run_t *run; @@ -1310,7 +1278,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; @@ -1450,14 +1418,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small = void arena_quarantine_junk_small(void *ptr, size_t usize) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; cassert(config_fill); assert(opt_junk); assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); - binind = small_size2bin(usize); + binind = size2index(usize); bin_info = &arena_bin_info[binind]; arena_redzones_validate(ptr, bin_info, true); } @@ -1468,12 +1436,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) void *ret; arena_bin_t *bin; arena_run_t *run; - size_t binind; + index_t binind; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = small_bin2size(binind); + size = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -1520,14 +1488,15 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + size_t usize; arena_run_t *run; arena_chunk_map_misc_t *miscelm; UNUSED bool idump; /* Large allocation. */ - size = PAGE_CEILING(size); + usize = s2u(size); malloc_mutex_lock(&arena->lock); - run = arena_run_alloc_large(arena, size, zero); + run = arena_run_alloc_large(arena, usize, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1535,15 +1504,17 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) miscelm = arena_run_to_miscelm(run); ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.allocated_large += usize; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } if (config_prof) - idump = arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, usize); malloc_mutex_unlock(&arena->lock); if (config_prof && idump) prof_idump(); @@ -1551,9 +1522,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (!zero) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } @@ -1610,12 +1581,14 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(size) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1632,22 +1605,23 @@ void arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); assert(isalloc(ptr, true) == size); } @@ -1660,7 +1634,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - size_t binind = arena_bin_index(chunk->arena, bin); + index_t binind = arena_bin_index(chunk->arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -1678,7 +1652,7 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; size_t npages, run_ind, past; arena_chunk_map_misc_t *miscelm; @@ -1762,7 +1736,8 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - size_t size, binind; + size_t size; + index_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); @@ -1851,10 +1826,12 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_dalloc_junk_large(ptr, usize); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= usize; - arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[index].ndalloc++; + arena->stats.lstats[index].curruns--; } } @@ -1887,17 +1864,20 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, run, oldsize, size, true); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); } @@ -1909,24 +1889,30 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = oldsize >> LG_PAGE; size_t followsize; + size_t usize_min = s2u(size); assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ - assert(size + extra > oldsize); + assert(usize_min > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= size - oldsize) { + pageind+npages)) >= usize_min - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; + size_t flag_dirty, splitsize, usize; + + usize = s2u(size + extra); + while (oldsize + followsize < usize) + usize = index2size(size2index(usize)-1); + assert(usize >= usize_min); + splitsize = usize - oldsize; + arena_run_t *run = &arena_miscelm_get(chunk, pageind+npages)->run; arena_run_split_large(arena, run, splitsize, zero); @@ -1948,17 +1934,20 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); @@ -1996,10 +1985,14 @@ static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - size_t psize; + size_t usize; - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { + /* Make sure extra can't cause size_t overflow. */ + if (extra >= arena_maxclass) + return (true); + + usize = s2u(size + extra); + if (usize == oldsize) { /* Same size class. */ return (false); } else { @@ -2009,16 +2002,15 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - if (psize < oldsize) { + if (usize < oldsize) { /* Fill before shrinking in order avoid a race. */ - arena_ralloc_junk_large(ptr, oldsize, psize); + arena_ralloc_junk_large(ptr, oldsize, usize); arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); + usize); return (false); } else { bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); + oldsize, size, extra, zero); if (config_fill && !ret && !zero) { if (unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + @@ -2045,12 +2037,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[small_size2bin(oldsize)].reg_size + assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - small_size2bin(size + extra) == - small_size2bin(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) + if ((size + extra <= SMALL_MAXCLASS && size2index(size + + extra) == size2index(oldsize)) || (size <= oldsize + && size + extra >= oldsize)) return (false); } else { assert(size <= arena_maxclass); @@ -2258,7 +2249,7 @@ arena_new(arena_t *arena, unsigned ind) /* * Calculate bin_info->run_size such that it meets the following constraints: * - * *) bin_info->run_size <= arena_maxclass + * *) bin_info->run_size <= arena_maxrun * *) bin_info->nregs <= RUN_MAXREGS * * bin_info->nregs and bin_info->reg0_offset are also calculated here, since @@ -2330,7 +2321,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* * Make sure that the run will fit within an arena chunk. */ - while (actual_run_size > arena_maxclass) { + while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; @@ -2396,7 +2387,17 @@ arena_boot(void) map_misc_offset = offsetof(arena_chunk_t, map_bits) + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); - arena_maxclass = chunksize - (map_bias << LG_PAGE); + arena_maxrun = chunksize - (map_bias << LG_PAGE); + arena_maxclass = index2size(size2index(chunksize)-1); + if (arena_maxclass > arena_maxrun) { + /* + * For small chunk sizes it's possible for there to be fewer + * non-header pages available than are necessary to serve the + * size classes just below chunksize. + */ + arena_maxclass = arena_maxrun; + } + nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); bin_info_init(); } diff --git a/src/chunk.c b/src/chunk.c index 32b8b3a6..618aaca0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -27,9 +27,6 @@ rtree_t *chunks_rtree; size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t map_bias; -size_t map_misc_offset; -size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* diff --git a/src/ctl.c b/src/ctl.c index 309f1f65..f1f3234b 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1628,7 +1628,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/huge.c b/src/huge.c index 6bdc0767..ae416253 100644 --- a/src/huge.c +++ b/src/huge.c @@ -15,12 +15,19 @@ static extent_tree_t huge; void * huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) { + size_t usize; - return (huge_palloc(tsd, arena, size, chunksize, zero)); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (NULL); + } + + return (huge_palloc(tsd, arena, usize, chunksize, zero)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero) { void *ret; @@ -30,11 +37,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Allocate one or more contiguous chunks for this request. */ - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } + csize = CHUNK_CEILING(usize); + assert(csize >= usize); /* Allocate an extent node with which to track the chunk. */ node = base_node_alloc(); @@ -55,7 +59,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Insert node into huge. */ node->addr = ret; - node->size = csize; + node->size = usize; node->arena = arena; malloc_mutex_lock(&huge_mtx); @@ -64,9 +68,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(ret, 0xa5, csize); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero) && !is_zeroed) - memset(ret, 0, csize); + memset(ret, 0, usize); } return (ret); @@ -97,7 +101,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { - size_t csize; + size_t usize; void *expand_addr; size_t expand_size; extent_node_t *node, key; @@ -105,14 +109,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { bool is_zeroed; void *ret; - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ return (true); } - expand_addr = ptr + oldsize; - expand_size = csize - oldsize; + expand_addr = ptr + CHUNK_CEILING(oldsize); + expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); malloc_mutex_lock(&huge_mtx); @@ -140,14 +144,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { malloc_mutex_lock(&huge_mtx); /* Update the size of the huge allocation. */ - node->size = csize; + node->size = usize; malloc_mutex_unlock(&huge_mtx); if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(expand_addr, 0xa5, expand_size); + memset(ptr + oldsize, 0xa5, usize - oldsize); else if (unlikely(opt_zero) && !is_zeroed) - memset(expand_addr, 0, expand_size); + memset(ptr + oldsize, 0, usize - oldsize); } return (false); } @@ -156,27 +160,71 @@ bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { + size_t usize; /* Both allocations must be huge to avoid a move. */ - if (oldsize <= arena_maxclass) + if (oldsize < chunksize) return (true); - assert(CHUNK_CEILING(oldsize) == oldsize); + assert(s2u(oldsize) == oldsize); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (true); + } /* - * Avoid moving the allocation if the size class can be left the same. + * Avoid moving the allocation if the existing chunk size accommodates + * the new size. */ + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + size_t usize_next; + + /* Increase usize to incorporate extra. */ + while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < + oldsize) + usize = usize_next; + + /* Update the size of the huge allocation if it changed. */ + if (oldsize != usize) { + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + assert(node->size != usize); + node->size = usize; + + malloc_mutex_unlock(&huge_mtx); + + if (oldsize < usize) { + if (zero || (config_fill && + unlikely(opt_zero))) { + memset(ptr + oldsize, 0, usize - + oldsize); + } else if (config_fill && unlikely(opt_junk)) { + memset(ptr + oldsize, 0xa5, usize - + oldsize); + } + } else if (config_fill && unlikely(opt_junk) && oldsize + > usize) + memset(ptr + usize, 0x5a, oldsize - usize); + } + return (false); + } + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { return (false); } - /* Overflow. */ - if (CHUNK_CEILING(size) == 0) - return (true); - /* Shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(size)) { + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) { extent_node_t *node, key; void *excess_addr; size_t excess_size; @@ -189,15 +237,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, assert(node->addr == ptr); /* Update the size of the huge allocation. */ - node->size = CHUNK_CEILING(size); + node->size = usize; malloc_mutex_unlock(&huge_mtx); - excess_addr = node->addr + CHUNK_CEILING(size); - excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(size); + excess_addr = node->addr + CHUNK_CEILING(usize); + excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); /* Zap the excess chunks. */ - huge_dalloc_junk(excess_addr, excess_size); + huge_dalloc_junk(ptr + usize, oldsize - usize); arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size); return (false); @@ -275,7 +323,8 @@ huge_dalloc(void *ptr) malloc_mutex_unlock(&huge_mtx); huge_dalloc_junk(node->addr, node->size); - arena_chunk_dalloc_huge(node->arena, node->addr, node->size); + arena_chunk_dalloc_huge(node->arena, node->addr, + CHUNK_CEILING(node->size)); base_node_dalloc(node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 3490ecdf..f3750b40 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -42,6 +42,38 @@ unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; +JEMALLOC_ALIGNED(CACHELINE) +const size_t index2size_tab[NSIZES] = { +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + ((ZU(1)<next_gc_bin; + index_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; @@ -62,7 +62,7 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) { void *ret; @@ -76,7 +76,7 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; @@ -153,7 +153,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; diff --git a/test/unit/junk.c b/test/unit/junk.c index 301428f2..5b35a879 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -88,7 +88,6 @@ test_junk(size_t sz_min, size_t sz_max) if (xallocx(s, sz+1, 0, 0) == sz) { void *junked = (void *)s; - s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); @@ -134,13 +133,25 @@ TEST_END arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig; static void *most_recently_trimmed; +static size_t +shrink_size(size_t size) +{ + size_t shrink_size; + + for (shrink_size = size - 1; nallocx(shrink_size, 0) == size; + shrink_size--) + ; /* Do nothing. */ + + return (shrink_size); +} + static void arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize) { arena_ralloc_junk_large_orig(ptr, old_usize, usize); assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize"); - assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize"); + assert_zu_eq(usize, shrink_size(arena_maxclass), "Unexpected usize"); most_recently_trimmed = ptr; } @@ -154,7 +165,7 @@ TEST_BEGIN(test_junk_large_ralloc_shrink) arena_ralloc_junk_large_orig = arena_ralloc_junk_large; arena_ralloc_junk_large = arena_ralloc_junk_large_intercept; - p2 = rallocx(p1, arena_maxclass-PAGE, 0); + p2 = rallocx(p1, shrink_size(arena_maxclass), 0); assert_ptr_eq(p1, p2, "Unexpected move during shrink"); arena_ralloc_junk_large = arena_ralloc_junk_large_orig; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index c70473cc..e62e54f2 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants) assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) - TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE)); + TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2))); #undef TEST_ARENAS_LRUN_CONSTANT }