diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1f985723..681b5802 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,6 +1,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) + /* Maximum number of regions in one run. */ #define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) @@ -96,11 +98,15 @@ struct arena_chunk_map_bits_s { * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx * -------- -------- ----++++ ++++D-LA * - * Large (sampled, size <= PAGE): + * Large (sampled, size <= LARGE_MINCLASS): * ssssssss ssssssss ssssnnnn nnnnD-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA * - * Large (not sampled, size == PAGE): + * Large (not sampled, size == LARGE_MINCLASS): * ssssssss ssssssss ssss++++ ++++D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA */ size_t bits; #define CHUNK_MAP_BININD_SHIFT 4 @@ -325,30 +331,21 @@ struct arena_s { #ifdef JEMALLOC_H_EXTERNS extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin_tab is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via small_size2bin(). - */ -extern uint8_t const small_size2bin_tab[]; -/* - * small_bin2size_tab duplicates information in arena_bin_info, but in a const - * array, for which it is easier for the compiler to optimize repeated - * dereferences. - */ -extern uint32_t const small_bin2size_tab[NBINS]; extern arena_bin_info_t arena_bin_info[NBINS]; -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ +extern size_t nlclasses; /* Number of large size classes. */ void *arena_chunk_alloc_huge(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero); void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t size); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); + index_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -403,15 +400,6 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -size_t small_size2bin_compute(size_t size); -size_t small_size2bin_lookup(size_t size); -size_t small_size2bin(size_t size); -size_t small_bin2size_compute(size_t binind); -size_t small_bin2size_lookup(size_t binind); -size_t small_bin2size(size_t binind); -size_t small_s2u_compute(size_t size); -size_t small_s2u_lookup(size_t size); -size_t small_s2u(size_t size); arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, @@ -426,7 +414,7 @@ size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); @@ -439,16 +427,16 @@ void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); + index_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); + size_t runind, index_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +index_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); @@ -464,148 +452,6 @@ void arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE size_t -small_size2bin_compute(size_t size) -{ -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; - - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t bin = NTBINS + grp + mod; - return (bin); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin_lookup(size_t size) -{ - - assert(size <= LOOKUP_MAXCLASS); - { - size_t ret = ((size_t)(small_size2bin_tab[(size-1) >> - LG_TINY_MIN])); - assert(ret == small_size2bin_compute(size)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_size2bin(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_size2bin_lookup(size)); - else - return (small_size2bin_compute(size)); -} - -JEMALLOC_INLINE size_t -small_bin2size_compute(size_t binind) -{ -#if (NTBINS > 0) - if (binind < NTBINS) - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + binind)); - else -#endif - { - size_t reduced_binind = binind - NTBINS; - size_t grp = reduced_binind >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_binind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_QUANTUM-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t usize = grp_size + mod_size; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size_lookup(size_t binind) -{ - - assert(binind < NBINS); - { - size_t ret = (size_t)small_bin2size_tab[binind]; - assert(ret == small_bin2size_compute(binind)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_bin2size(size_t binind) -{ - - return (small_bin2size_lookup(binind)); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_compute(size_t size) -{ -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : - (ZU(1) << lg_ceil)); - } else -#endif - { - size_t x = lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (size + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u_lookup(size_t size) -{ - size_t ret = small_bin2size(small_size2bin(size)); - - assert(ret == small_s2u_compute(size)); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -small_s2u(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (small_s2u_lookup(size)); - else - return (small_s2u_compute(size)); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { @@ -714,11 +560,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - size_t binind; + index_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -810,20 +656,20 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) + index_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS); arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) + index_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); @@ -893,10 +739,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE index_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - size_t binind; + index_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -908,7 +754,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - size_t actual_binind; + index_t actual_binind; arena_bin_info_t *bin_info; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -938,13 +784,13 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) return (binind); } -# endif /* JEMALLOC_ARENA_INLINE_B */ +# endif /* JEMALLOC_ARENA_INLINE_A */ -# ifdef JEMALLOC_ARENA_INLINE_C -JEMALLOC_INLINE size_t +# ifdef JEMALLOC_ARENA_INLINE_B +JEMALLOC_INLINE index_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - size_t binind = bin - arena->bins; + index_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -1102,7 +948,8 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1122,10 +969,6 @@ arena_salloc(const void *ptr, bool demote) ret = arena_mapbits_large_size_get(chunk, pageind); assert(ret != 0); assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); } else { @@ -1133,7 +976,7 @@ arena_salloc(const void *ptr, bool demote) assert(arena_mapbits_large_get(chunk, pageind) != 0 || arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, pageind)) == binind); - ret = small_bin2size(binind); + ret = index2size(binind); } return (ret); @@ -1155,7 +998,7 @@ arena_dalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, bool try_tcache) /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = arena_ptr_small_binind_get(ptr, + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else @@ -1186,7 +1029,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, /* Small allocation. */ if (likely(try_tcache) && likely((tcache = tcache_get(tsd, false)) != NULL)) { - size_t binind = small_size2bin(size); + index_t binind = size2index(size); tcache_dalloc_small(tcache, ptr, binind); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> @@ -1203,7 +1046,7 @@ arena_sdalloc(tsd_t *tsd, arena_chunk_t *chunk, void *ptr, size_t size, arena_dalloc_large(chunk->arena, chunk, ptr); } } -# endif /* JEMALLOC_ARENA_INLINE_C */ +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 2e68a020..764b7aca 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -40,9 +40,6 @@ extern rtree_t *chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t map_misc_offset; -extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc_base(size_t size); void *chunk_alloc_arena(chunk_alloc_t *chunk_alloc, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 00d8c09d..939993f2 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero); bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a169221b..8f0beb9e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -165,6 +165,9 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Size class index type. */ +typedef unsigned index_t; + #define MALLOCX_ARENA_MASK ((int)~0xff) #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ @@ -397,6 +400,18 @@ extern arena_t **arenas; extern unsigned narenas_total; extern unsigned narenas_auto; /* Read-only after initialization. */ +/* + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). + */ +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(tsd_t *tsd); void thread_allocated_cleanup(tsd_t *tsd); @@ -449,15 +464,15 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -/* - * Include arena.h the first time in order to provide inline functions for this - * header's inlines. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A - #ifndef JEMALLOC_ENABLE_INLINE +index_t size2index_compute(size_t size); +index_t size2index_lookup(size_t size); +index_t size2index(size_t size); +size_t index2size_compute(index_t index); +size_t index2size_lookup(index_t index); +size_t index2size(index_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); unsigned narenas_total_get(void); @@ -465,6 +480,135 @@ arena_t *choose_arena(tsd_t *tsd, arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE index_t +size2index_compute(size_t size) +{ + +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE index_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + else + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(index_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); + else +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(index_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(index_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } else +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} + /* * Compute usable size that would result from allocating an object with the * specified size. @@ -473,11 +617,11 @@ JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { - if (size <= SMALL_MAXCLASS) - return (small_s2u(size)); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + else + return (s2u_compute(size)); } /* @@ -491,71 +635,78 @@ sa2u(size_t size, size_t alignment) assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); } - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (small_s2u(usize)); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - + /* Try for a large size class. */ + if (size <= arena_maxclass && alignment < chunksize) { /* * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. + * to the minimum that can actually be supported. */ alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); + if (usize + alignment - PAGE <= arena_maxrun) + return (usize); } + + /* Huge size class. Beware of size_t overflow. */ + + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } + + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); } JEMALLOC_INLINE unsigned @@ -591,16 +742,16 @@ choose_arena(tsd_t *tsd, arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" /* - * Include arena.h the second and third times in order to resolve circular - * dependencies with tcache.h. + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" #define JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/arena.h" #undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_C -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_C #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -678,7 +829,7 @@ ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, bool try_tcache, assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE) + if (usize <= SMALL_MAXCLASS && alignment < PAGE) ret = arena_malloc(tsd, arena, usize, zero, try_tcache); else { if (usize <= arena_maxclass) { @@ -742,7 +893,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = small_size2bin(usize); + index_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 4ea9a953..1a7fde4b 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -41,6 +41,7 @@ arena_mapbitsp_get arena_mapbitsp_read arena_mapbitsp_write arena_maxclass +arena_maxrun arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages @@ -216,6 +217,10 @@ idalloct imalloc imalloct in_valgrind +index2size +index2size_compute +index2size_lookup +index2size_tab ipalloc ipalloct iqalloc @@ -338,19 +343,14 @@ rtree_postfork_parent rtree_prefork rtree_set s2u +s2u_compute +s2u_lookup sa2u set_errno -small_bin2size -small_bin2size_compute -small_bin2size_lookup -small_bin2size_tab -small_s2u -small_s2u_compute -small_s2u_lookup -small_size2bin -small_size2bin_compute -small_size2bin_lookup -small_size2bin_tab +size2index +size2index_compute +size2index_lookup +size2index_tab stats_cactive stats_cactive_add stats_cactive_get diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 0cfac72d..897570cc 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -61,7 +61,7 @@ size_class() { rem="yes" fi - if [ ${lg_size} -lt ${lg_p} ] ; then + if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then bin="yes" else bin="no" @@ -159,6 +159,7 @@ size_classes() { nbins=$((${index} + 1)) # Final written value is correct: small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + lg_large_minclass=$((${lg_grp} + 1)) fi index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) @@ -167,14 +168,17 @@ size_classes() { lg_delta=$((${lg_delta} + 1)) done echo + nsizes=${index} # Defined upon completion: # - ntbins # - nlbins # - nbins + # - nsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass + # - lg_large_minclass } cat <tbins[binind]; - size = small_bin2size(binind); + usize = index2size(binind); ret = tcache_alloc_easy(tbin); if (unlikely(ret == NULL)) { ret = tcache_alloc_small_hard(tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == size); + assert(tcache_salloc(ret) == usize); if (likely(!zero)) { if (config_fill) { @@ -254,20 +255,20 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else { if (config_fill && unlikely(opt_junk)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - memset(ret, 0, size); + memset(ret, 0, usize); } if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; tcache_event(tcache); return (ret); } @@ -276,12 +277,13 @@ JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; - size_t binind; + index_t binind; + size_t usize; tcache_bin_t *tbin; - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -290,11 +292,11 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, size, zero); + ret = arena_malloc_large(tcache->arena, usize, zero); if (ret == NULL) return (NULL); } else { - if (config_prof && size == PAGE) { + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> @@ -305,17 +307,17 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (likely(!zero)) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } else - memset(ret, 0, size); + memset(ret, 0, usize); if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; } tcache_event(tcache); @@ -323,7 +325,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +tcache_dalloc_small(tcache_t *tcache, void *ptr, index_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -349,7 +351,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - size_t binind; + index_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -357,7 +359,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(tcache_salloc(ptr) > SMALL_MAXCLASS); assert(tcache_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); if (config_fill && unlikely(opt_junk)) memset(ptr, 0x5a, size); diff --git a/src/arena.c b/src/arena.c index b7300a92..49a30572 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,42 +7,11 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ALIGNED(CACHELINE) -const uint32_t small_bin2size_tab[NBINS] = { -#define B2S_bin_yes(size) \ - size, -#define B2S_bin_no(size) -#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ - B2S_bin_##bin((ZU(1)<> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_small_binind_get(ptr, mapbits); + index_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); @@ -375,7 +344,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero) static void arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size, - size_t binind) + index_t binind) { arena_chunk_t *chunk; arena_chunk_map_misc_t *miscelm; @@ -429,9 +398,9 @@ arena_chunk_init_spare(arena_t *arena) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -518,8 +487,7 @@ arena_chunk_init_hard(arena_t *arena) * the pages as zeroed iff chunk_alloc() returned a zeroed chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, - unzeroed); + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun, unzeroed); /* * There is no need to initialize the internal page map entries unless * the chunk is not zeroed. @@ -544,7 +512,7 @@ arena_chunk_init_hard(arena_t *arena) } } } - arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, + arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun, unzeroed); return (chunk); @@ -607,9 +575,9 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == - arena_maxclass); + arena_maxrun); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); @@ -682,7 +650,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero) } static arena_run_t * -arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind) { arena_run_t *run; arena_chunk_map_misc_t *miscelm; @@ -700,7 +668,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind) } static arena_run_t * -arena_run_alloc_small(arena_t *arena, size_t size, size_t binind) +arena_run_alloc_small(arena_t *arena, size_t size, index_t binind) { arena_chunk_t *chunk; arena_run_t *run; @@ -1034,7 +1002,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_mapbits_large_size_get(chunk, run_ind+(size>>LG_PAGE)-1) == 0); } else { - size_t binind = arena_bin_index(arena, run->bin); + index_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; size = bin_info->run_size; } @@ -1079,9 +1047,9 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) arena_dirty_insert(arena, chunk, run_ind, run_pages); /* Deallocate chunk if it is now completely unused. */ - if (size == arena_maxclass) { + if (size == arena_maxrun) { assert(run_ind == map_bias); - assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(run_pages == (arena_maxrun >> LG_PAGE)); arena_chunk_dalloc(arena, chunk); } @@ -1212,7 +1180,7 @@ static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { arena_run_t *run; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ @@ -1264,7 +1232,7 @@ static void * arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) { void *ret; - size_t binind; + index_t binind; arena_bin_info_t *bin_info; arena_run_t *run; @@ -1310,7 +1278,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; @@ -1450,14 +1418,14 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small = void arena_quarantine_junk_small(void *ptr, size_t usize) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; cassert(config_fill); assert(opt_junk); assert(opt_quarantine); assert(usize <= SMALL_MAXCLASS); - binind = small_size2bin(usize); + binind = size2index(usize); bin_info = &arena_bin_info[binind]; arena_redzones_validate(ptr, bin_info, true); } @@ -1468,12 +1436,12 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) void *ret; arena_bin_t *bin; arena_run_t *run; - size_t binind; + index_t binind; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; - size = small_bin2size(binind); + size = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -1520,14 +1488,15 @@ void * arena_malloc_large(arena_t *arena, size_t size, bool zero) { void *ret; + size_t usize; arena_run_t *run; arena_chunk_map_misc_t *miscelm; UNUSED bool idump; /* Large allocation. */ - size = PAGE_CEILING(size); + usize = s2u(size); malloc_mutex_lock(&arena->lock); - run = arena_run_alloc_large(arena, size, zero); + run = arena_run_alloc_large(arena, usize, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1535,15 +1504,17 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) miscelm = arena_run_to_miscelm(run); ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.allocated_large += usize; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } if (config_prof) - idump = arena_prof_accum_locked(arena, size); + idump = arena_prof_accum_locked(arena, usize); malloc_mutex_unlock(&arena->lock); if (config_prof && idump) prof_idump(); @@ -1551,9 +1522,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) if (!zero) { if (config_fill) { if (unlikely(opt_junk)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } } @@ -1610,12 +1581,14 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) ret = arena_miscelm_to_rpages(miscelm); if (config_stats) { + index_t index = size2index(size) - NBINS; + arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1632,22 +1605,23 @@ void arena_prof_promoted(const void *ptr, size_t size) { arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + index_t binind; cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr, false) == PAGE); - assert(isalloc(ptr, true) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); + assert(isalloc(ptr, true) == LARGE_MINCLASS); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - binind = small_size2bin(size); + binind = size2index(size); assert(binind < NBINS); arena_mapbits_large_binind_set(chunk, pageind, binind); - assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, false) == LARGE_MINCLASS); assert(isalloc(ptr, true) == size); } @@ -1660,7 +1634,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, if (run == bin->runcur) bin->runcur = NULL; else { - size_t binind = arena_bin_index(chunk->arena, bin); + index_t binind = arena_bin_index(chunk->arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { @@ -1678,7 +1652,7 @@ static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin) { - size_t binind; + index_t binind; arena_bin_info_t *bin_info; size_t npages, run_ind, past; arena_chunk_map_misc_t *miscelm; @@ -1762,7 +1736,8 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_run_t *run; arena_bin_t *bin; arena_bin_info_t *bin_info; - size_t size, binind; + size_t size; + index_t binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind); @@ -1851,10 +1826,12 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_dalloc_junk_large(ptr, usize); if (config_stats) { + index_t index = size2index(usize) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= usize; - arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[index].ndalloc++; + arena->stats.lstats[index].curruns--; } } @@ -1887,17 +1864,20 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, run, oldsize, size, true); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); } @@ -1909,24 +1889,30 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t npages = oldsize >> LG_PAGE; size_t followsize; + size_t usize_min = s2u(size); assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ - assert(size + extra > oldsize); + assert(usize_min > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && (followsize = arena_mapbits_unallocated_size_get(chunk, - pageind+npages)) >= size - oldsize) { + pageind+npages)) >= usize_min - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing * allocation. */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; + size_t flag_dirty, splitsize, usize; + + usize = s2u(size + extra); + while (oldsize + followsize < usize) + usize = index2size(size2index(usize)-1); + assert(usize >= usize_min); + splitsize = usize - oldsize; + arena_run_t *run = &arena_miscelm_get(chunk, pageind+npages)->run; arena_run_split_large(arena, run, splitsize, zero); @@ -1948,17 +1934,20 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { + index_t oldindex = size2index(oldsize) - NBINS; + index_t index = size2index(size) - NBINS; + arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + arena->stats.lstats[oldindex].ndalloc++; + arena->stats.lstats[oldindex].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + arena->stats.lstats[index].nmalloc++; + arena->stats.lstats[index].nrequests++; + arena->stats.lstats[index].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); @@ -1996,10 +1985,14 @@ static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - size_t psize; + size_t usize; - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { + /* Make sure extra can't cause size_t overflow. */ + if (extra >= arena_maxclass) + return (true); + + usize = s2u(size + extra); + if (usize == oldsize) { /* Same size class. */ return (false); } else { @@ -2009,16 +2002,15 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - if (psize < oldsize) { + if (usize < oldsize) { /* Fill before shrinking in order avoid a race. */ - arena_ralloc_junk_large(ptr, oldsize, psize); + arena_ralloc_junk_large(ptr, oldsize, usize); arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); + usize); return (false); } else { bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); + oldsize, size, extra, zero); if (config_fill && !ret && !zero) { if (unlikely(opt_junk)) { memset((void *)((uintptr_t)ptr + @@ -2045,12 +2037,11 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, */ if (oldsize <= arena_maxclass) { if (oldsize <= SMALL_MAXCLASS) { - assert(arena_bin_info[small_size2bin(oldsize)].reg_size + assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((size + extra <= SMALL_MAXCLASS && - small_size2bin(size + extra) == - small_size2bin(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) + if ((size + extra <= SMALL_MAXCLASS && size2index(size + + extra) == size2index(oldsize)) || (size <= oldsize + && size + extra >= oldsize)) return (false); } else { assert(size <= arena_maxclass); @@ -2258,7 +2249,7 @@ arena_new(arena_t *arena, unsigned ind) /* * Calculate bin_info->run_size such that it meets the following constraints: * - * *) bin_info->run_size <= arena_maxclass + * *) bin_info->run_size <= arena_maxrun * *) bin_info->nregs <= RUN_MAXREGS * * bin_info->nregs and bin_info->reg0_offset are also calculated here, since @@ -2330,7 +2321,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* * Make sure that the run will fit within an arena chunk. */ - while (actual_run_size > arena_maxclass) { + while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; @@ -2396,7 +2387,17 @@ arena_boot(void) map_misc_offset = offsetof(arena_chunk_t, map_bits) + sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias); - arena_maxclass = chunksize - (map_bias << LG_PAGE); + arena_maxrun = chunksize - (map_bias << LG_PAGE); + arena_maxclass = index2size(size2index(chunksize)-1); + if (arena_maxclass > arena_maxrun) { + /* + * For small chunk sizes it's possible for there to be fewer + * non-header pages available than are necessary to serve the + * size classes just below chunksize. + */ + arena_maxclass = arena_maxrun; + } + nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS); bin_info_init(); } diff --git a/src/chunk.c b/src/chunk.c index 32b8b3a6..618aaca0 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -27,9 +27,6 @@ rtree_t *chunks_rtree; size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ size_t chunk_npages; -size_t map_bias; -size_t map_misc_offset; -size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* diff --git a/src/ctl.c b/src/ctl.c index 309f1f65..f1f3234b 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1628,7 +1628,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/huge.c b/src/huge.c index 6bdc0767..ae416253 100644 --- a/src/huge.c +++ b/src/huge.c @@ -15,12 +15,19 @@ static extent_tree_t huge; void * huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero) { + size_t usize; - return (huge_palloc(tsd, arena, size, chunksize, zero)); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (NULL); + } + + return (huge_palloc(tsd, arena, usize, chunksize, zero)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero) { void *ret; @@ -30,11 +37,8 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Allocate one or more contiguous chunks for this request. */ - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } + csize = CHUNK_CEILING(usize); + assert(csize >= usize); /* Allocate an extent node with which to track the chunk. */ node = base_node_alloc(); @@ -55,7 +59,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, /* Insert node into huge. */ node->addr = ret; - node->size = csize; + node->size = usize; node->arena = arena; malloc_mutex_lock(&huge_mtx); @@ -64,9 +68,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(ret, 0xa5, csize); + memset(ret, 0xa5, usize); else if (unlikely(opt_zero) && !is_zeroed) - memset(ret, 0, csize); + memset(ret, 0, usize); } return (ret); @@ -97,7 +101,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl); static bool huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { - size_t csize; + size_t usize; void *expand_addr; size_t expand_size; extent_node_t *node, key; @@ -105,14 +109,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { bool is_zeroed; void *ret; - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ return (true); } - expand_addr = ptr + oldsize; - expand_size = csize - oldsize; + expand_addr = ptr + CHUNK_CEILING(oldsize); + expand_size = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize); malloc_mutex_lock(&huge_mtx); @@ -140,14 +144,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) { malloc_mutex_lock(&huge_mtx); /* Update the size of the huge allocation. */ - node->size = csize; + node->size = usize; malloc_mutex_unlock(&huge_mtx); if (config_fill && !zero) { if (unlikely(opt_junk)) - memset(expand_addr, 0xa5, expand_size); + memset(ptr + oldsize, 0xa5, usize - oldsize); else if (unlikely(opt_zero) && !is_zeroed) - memset(expand_addr, 0, expand_size); + memset(ptr + oldsize, 0, usize - oldsize); } return (false); } @@ -156,27 +160,71 @@ bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { + size_t usize; /* Both allocations must be huge to avoid a move. */ - if (oldsize <= arena_maxclass) + if (oldsize < chunksize) return (true); - assert(CHUNK_CEILING(oldsize) == oldsize); + assert(s2u(oldsize) == oldsize); + usize = s2u(size); + if (usize == 0) { + /* size_t overflow. */ + return (true); + } /* - * Avoid moving the allocation if the size class can be left the same. + * Avoid moving the allocation if the existing chunk size accommodates + * the new size. */ + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + size_t usize_next; + + /* Increase usize to incorporate extra. */ + while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < + oldsize) + usize = usize_next; + + /* Update the size of the huge allocation if it changed. */ + if (oldsize != usize) { + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + + assert(node->size != usize); + node->size = usize; + + malloc_mutex_unlock(&huge_mtx); + + if (oldsize < usize) { + if (zero || (config_fill && + unlikely(opt_zero))) { + memset(ptr + oldsize, 0, usize - + oldsize); + } else if (config_fill && unlikely(opt_junk)) { + memset(ptr + oldsize, 0xa5, usize - + oldsize); + } + } else if (config_fill && unlikely(opt_junk) && oldsize + > usize) + memset(ptr + usize, 0x5a, oldsize - usize); + } + return (false); + } + if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { return (false); } - /* Overflow. */ - if (CHUNK_CEILING(size) == 0) - return (true); - /* Shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(size)) { + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize)) { extent_node_t *node, key; void *excess_addr; size_t excess_size; @@ -189,15 +237,15 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, assert(node->addr == ptr); /* Update the size of the huge allocation. */ - node->size = CHUNK_CEILING(size); + node->size = usize; malloc_mutex_unlock(&huge_mtx); - excess_addr = node->addr + CHUNK_CEILING(size); - excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(size); + excess_addr = node->addr + CHUNK_CEILING(usize); + excess_size = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize); /* Zap the excess chunks. */ - huge_dalloc_junk(excess_addr, excess_size); + huge_dalloc_junk(ptr + usize, oldsize - usize); arena_chunk_dalloc_huge(node->arena, excess_addr, excess_size); return (false); @@ -275,7 +323,8 @@ huge_dalloc(void *ptr) malloc_mutex_unlock(&huge_mtx); huge_dalloc_junk(node->addr, node->size); - arena_chunk_dalloc_huge(node->arena, node->addr, node->size); + arena_chunk_dalloc_huge(node->arena, node->addr, + CHUNK_CEILING(node->size)); base_node_dalloc(node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 3490ecdf..f3750b40 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -42,6 +42,38 @@ unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; +JEMALLOC_ALIGNED(CACHELINE) +const size_t index2size_tab[NSIZES] = { +#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ + ((ZU(1)<next_gc_bin; + index_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; @@ -62,7 +62,7 @@ tcache_event_hard(tcache_t *tcache) } void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, index_t binind) { void *ret; @@ -76,7 +76,7 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_small(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; @@ -153,7 +153,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, +tcache_bin_flush_large(tcache_bin_t *tbin, index_t binind, unsigned rem, tcache_t *tcache) { void *ptr; diff --git a/test/unit/junk.c b/test/unit/junk.c index 301428f2..5b35a879 100644 --- a/test/unit/junk.c +++ b/test/unit/junk.c @@ -88,7 +88,6 @@ test_junk(size_t sz_min, size_t sz_max) if (xallocx(s, sz+1, 0, 0) == sz) { void *junked = (void *)s; - s = (char *)rallocx(s, sz+1, 0); assert_ptr_not_null((void *)s, "Unexpected rallocx() failure"); @@ -134,13 +133,25 @@ TEST_END arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig; static void *most_recently_trimmed; +static size_t +shrink_size(size_t size) +{ + size_t shrink_size; + + for (shrink_size = size - 1; nallocx(shrink_size, 0) == size; + shrink_size--) + ; /* Do nothing. */ + + return (shrink_size); +} + static void arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize) { arena_ralloc_junk_large_orig(ptr, old_usize, usize); assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize"); - assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize"); + assert_zu_eq(usize, shrink_size(arena_maxclass), "Unexpected usize"); most_recently_trimmed = ptr; } @@ -154,7 +165,7 @@ TEST_BEGIN(test_junk_large_ralloc_shrink) arena_ralloc_junk_large_orig = arena_ralloc_junk_large; arena_ralloc_junk_large = arena_ralloc_junk_large_intercept; - p2 = rallocx(p1, arena_maxclass-PAGE, 0); + p2 = rallocx(p1, shrink_size(arena_maxclass), 0); assert_ptr_eq(p1, p2, "Unexpected move during shrink"); arena_ralloc_junk_large = arena_ralloc_junk_large_orig; diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index c70473cc..e62e54f2 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -357,7 +357,7 @@ TEST_BEGIN(test_arenas_lrun_constants) assert_zu_eq(name, expected, "Incorrect "#name" size"); \ } while (0) - TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE)); + TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << (LG_PAGE+2))); #undef TEST_ARENAS_LRUN_CONSTANT }