Optimize [nmd]alloc() fast paths.

Optimize [nmd]alloc() fast paths such that the (flags == 0) case is
streamlined, flags decoding only happens to the minimum degree
necessary, and no conditionals are repeated.
This commit is contained in:
Jason Evans
2014-09-07 14:40:19 -07:00
parent c21b05ea09
commit b718cf77e9
7 changed files with 172 additions and 131 deletions

View File

@@ -577,7 +577,7 @@ small_bin2size_lookup(size_t binind)
assert(binind < NBINS);
{
size_t ret = ((size_t)(small_bin2size_tab[binind]));
size_t ret = (size_t)small_bin2size_tab[binind];
assert(ret == small_bin2size_compute(binind));
return (ret);
}
@@ -615,7 +615,7 @@ small_s2u_compute(size_t size)
JEMALLOC_ALWAYS_INLINE size_t
small_s2u_lookup(size_t size)
{
size_t ret = (small_bin2size(small_size2bin(size)));
size_t ret = small_bin2size(small_size2bin(size));
assert(ret == small_s2u_compute(size));
return (ret);

View File

@@ -165,7 +165,17 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/jemalloc_internal_macros.h"
#define MALLOCX_ARENA_MASK ((int)~0xff)
#define MALLOCX_LG_ALIGN_MASK ((int)0x3f)
/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \
(ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
#define MALLOCX_ALIGN_GET(flags) \
(MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
#define MALLOCX_ZERO_GET(flags) \
((bool)(flags & MALLOCX_ZERO))
#define MALLOCX_ARENA_GET(flags) \
(((unsigned)(flags >> 8)) - 1)
/* Smallest size class to support. */
#define LG_TINY_MIN 3
@@ -625,15 +635,13 @@ size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
void idalloct(void *ptr, bool try_tcache);
void idalloc(void *ptr);
void iqalloct(void *ptr, bool try_tcache);
void iqalloc(void *ptr);
void iqalloc(void *ptr, bool try_tcache);
void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
arena_t *arena);
void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment,
bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena);
void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
bool zero);
void *iralloct(void *ptr, size_t size, size_t alignment, bool zero,
bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena);
void *iralloc(void *ptr, size_t size, size_t alignment, bool zero);
bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment,
bool zero);
malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t)
@@ -787,7 +795,7 @@ idalloc(void *ptr)
}
JEMALLOC_ALWAYS_INLINE void
iqalloct(void *ptr, bool try_tcache)
iqalloc(void *ptr, bool try_tcache)
{
if (config_fill && opt_quarantine)
@@ -796,13 +804,6 @@ iqalloct(void *ptr, bool try_tcache)
idalloct(ptr, try_tcache);
}
JEMALLOC_ALWAYS_INLINE void
iqalloc(void *ptr)
{
iqalloct(ptr, true);
}
JEMALLOC_ALWAYS_INLINE void *
iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
@@ -832,12 +833,12 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize);
iqalloct(ptr, try_tcache_dalloc);
iqalloc(ptr, try_tcache_dalloc);
return (p);
}
JEMALLOC_ALWAYS_INLINE void *
iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
iralloct(void *ptr, size_t size, size_t alignment, bool zero,
bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena)
{
size_t oldsize;
@@ -853,25 +854,24 @@ iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
* Existing object alignment is inadequate; allocate new space
* and copy.
*/
return (iralloct_realign(ptr, oldsize, size, extra, alignment,
zero, try_tcache_alloc, try_tcache_dalloc, arena));
return (iralloct_realign(ptr, oldsize, size, 0, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena));
}
if (size + extra <= arena_maxclass) {
return (arena_ralloc(arena, ptr, oldsize, size, extra,
alignment, zero, try_tcache_alloc,
try_tcache_dalloc));
if (size <= arena_maxclass) {
return (arena_ralloc(arena, ptr, oldsize, size, 0, alignment,
zero, try_tcache_alloc, try_tcache_dalloc));
} else {
return (huge_ralloc(arena, ptr, oldsize, size, extra,
alignment, zero, try_tcache_dalloc));
return (huge_ralloc(arena, ptr, oldsize, size, 0, alignment,
zero, try_tcache_dalloc));
}
}
JEMALLOC_ALWAYS_INLINE void *
iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
iralloc(void *ptr, size_t size, size_t alignment, bool zero)
{
return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL));
return (iralloct(ptr, size, alignment, zero, true, true, NULL));
}
JEMALLOC_ALWAYS_INLINE bool

View File

@@ -224,7 +224,6 @@ in_valgrind
ipalloc
ipalloct
iqalloc
iqalloct
iralloc
iralloct
iralloct_realign

View File

@@ -202,6 +202,7 @@ cat <<EOF
* LG_TINY_MAXCLASS: Lg of maximum tiny size class.
* LOOKUP_MAXCLASS: Maximum size class included in lookup table.
* SMALL_MAXCLASS: Maximum small size class.
* LARGE_MINCLASS: Minimum large size class.
*/
#define LG_SIZE_CLASS_GROUP ${lg_g}
@@ -246,6 +247,8 @@ cat <<EOF
# error "Too many small size classes"
#endif
#define LARGE_MINCLASS (PAGE_CEILING(SMALL_MAXCLASS+1))
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS