diff --git a/COPYING b/COPYING index e27fc4d6..019e8132 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2012 Jason Evans . +Copyright (C) 2002-2013 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index ab3476c6..65782253 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,23 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.3.0 (January 23, 2013) + + This version includes a few minor performance improvements in addition to the + listed new features and bug fixes. + + New features: + - Add clipping support to lg_chunk option processing. + - Add the --enable-ivsalloc option. + - Add the --without-export option. + - Add the --disable-zone-allocator option. + + Bug fixes: + - Fix "arenas.extend" mallctl to output the number of arenas. + - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory + is undefined. + - Fix build break on FreeBSD related to alloca.h. + * 3.2.0 (November 9, 2012) In addition to a couple of bug fixes, this version modifies page run diff --git a/INSTALL b/INSTALL index e40a7edd..6e371ce5 100644 --- a/INSTALL +++ b/INSTALL @@ -55,6 +55,11 @@ any of the following arguments (not a definitive list) to 'configure': jemalloc overlays the default malloc zone, but makes no attempt to actually replace the "malloc", "calloc", etc. symbols. +--without-export + Don't export public APIs. This can be useful when building jemalloc as a + static library, or to avoid exporting public APIs when using the zone + allocator on OSX. + --with-private-namespace= Prefix all library-private APIs with . For shared libraries, symbol visibility mechanisms prevent these symbols from being exported, but @@ -74,6 +79,12 @@ any of the following arguments (not a definitive list) to 'configure': --enable-debug Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. + Implies --enable-ivsalloc. + +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a substantial + performance hit. --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" @@ -136,6 +147,10 @@ any of the following arguments (not a definitive list) to 'configure': --disable-experimental Disable support for the experimental API (*allocm()). +--disable-zone-allocator + Disable zone allocator for Darwin. This means jemalloc won't be hooked as + the default allocator on OSX/iOS. + --enable-utrace Enable utrace(2)-based allocation tracing. This feature is not broadly portable (FreeBSD has it, but Linux and OS X do not). diff --git a/Makefile.in b/Makefile.in index 36448189..74810472 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ +enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ @@ -80,7 +81,7 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ $(srcroot)src/util.c $(srcroot)src/tsd.c -ifeq (macho, $(ABI)) +ifeq ($(enable_zone_allocator), 1) CSRCS += $(srcroot)src/zone.c endif ifeq ($(IMPORTLIB),$(SO)) @@ -112,9 +113,9 @@ COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) -.PHONY: all dist doc_html doc_man doc +.PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib -.PHONY: install_html install_man install_doc install +.PHONY: install_doc_html install_doc_man install_doc install .PHONY: tests check clean distclean relclean .SECONDARY : $(CTESTOBJS) diff --git a/configure.ac b/configure.ac index 1c52439e..c270662b 100644 --- a/configure.ac +++ b/configure.ac @@ -86,7 +86,7 @@ MANDIR=`eval echo $MANDIR` AC_SUBST([MANDIR]) dnl Support for building documentation. -AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +AC_PATH_PROG([XSLTPROC], [xsltproc], [false], [$PATH]) if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then @@ -261,6 +261,7 @@ case "${host}" in CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" + AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) JEMALLOC_USABLE_SIZE_CONST="" @@ -402,9 +403,9 @@ AC_SUBST([enable_autogen]) AC_PROG_INSTALL AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], , [$PATH]) -AC_PATH_PROG([LD], [ld], , [$PATH]) -AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +AC_PATH_PROG([AR], [ar], [false], [$PATH]) +AC_PATH_PROG([LD], [ld], [false], [$PATH]) +AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" @@ -471,6 +472,13 @@ for stem in ${public_syms}; do AC_DEFINE_UNQUOTED([${n}], [${m}]) done +AC_ARG_WITH([export], + [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], + [if test "x$with_export" = "xno"; then + AC_DEFINE([JEMALLOC_EXPORT],[]) +fi] +) + dnl Do not mangle library-private APIs by default. AC_ARG_WITH([private_namespace], [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], @@ -544,7 +552,7 @@ fi dnl Do not compile with debugging by default. AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code])], + [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])], [if test "x$enable_debug" = "xno" ; then enable_debug="0" else @@ -555,10 +563,25 @@ fi ) if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_DEBUG], [ ]) - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) + enable_ivsalloc="1" fi AC_SUBST([enable_debug]) +dnl Do not validate pointers by default. +AC_ARG_ENABLE([ivsalloc], + [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])], +[if test "x$enable_ivsalloc" = "xno" ; then + enable_ivsalloc="0" +else + enable_ivsalloc="1" +fi +], +[enable_ivsalloc="0"] +) +if test "x$enable_ivsalloc" = "x1" ; then + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) +fi + dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. @@ -906,7 +929,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], #include ]], [[ - long result; + int result; FILE *f; #ifdef _WIN32 @@ -925,7 +948,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], if (f == NULL) { return 1; } - fprintf(f, "%u\n", result); + fprintf(f, "%d\n", result); fclose(f); return 0; @@ -1070,11 +1093,13 @@ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. JE_COMPILABLE([a program using ffsl], [ +#include #include #include ], [ { int rv = ffsl(0x08); + printf("%d\n", rv); } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" != "xyes" ; then @@ -1178,7 +1203,26 @@ fi dnl ============================================================================ dnl Darwin-related configuration. -if test "x${abi}" = "xmacho" ; then +AC_ARG_ENABLE([zone-allocator], + [AS_HELP_STRING([--disable-zone-allocator], + [Disable zone allocator for Darwin])], +[if test "x$enable_zone_allocator" = "xno" ; then + enable_zone_allocator="0" +else + enable_zone_allocator="1" +fi +], +[if test "x${abi}" = "xmacho"; then + enable_zone_allocator="1" +fi +] +) +AC_SUBST([enable_zone_allocator]) + +if test "x${enable_zone_allocator}" = "x1" ; then + if test "x${abi}" != "xmacho"; then + AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin]) + fi AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) AC_DEFINE([JEMALLOC_ZONE], [ ]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 54b87474..09305801 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -790,8 +790,11 @@ for (i = 0; i < nbins; i++) { (size_t) r- - Virtual memory chunk size (log base 2). The default - chunk size is 4 MiB (2^22). + Virtual memory chunk size (log base 2). If a chunk + size outside the supported size range is specified, the size is + silently clipped to the minimum/maximum supported size. The default + chunk size is 4 MiB (2^22). + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 561c9b6f..8fdee931 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -400,7 +400,6 @@ extern arena_bin_info_t arena_bin_info[NBINS]; #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, @@ -464,6 +463,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); +void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, @@ -478,7 +480,7 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE arena_chunk_map_t * +JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * arena_mapp_get(arena_chunk_t *chunk, size_t pageind) { @@ -488,21 +490,21 @@ arena_mapp_get(arena_chunk_t *chunk, size_t pageind) return (&chunk->map[pageind-map_bias]); } -JEMALLOC_INLINE size_t * +JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { return (&arena_mapp_get(chunk, pageind)->bits); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) { return (*arena_mapbitsp_get(chunk, pageind)); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -512,7 +514,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -523,7 +525,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -534,7 +536,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -546,7 +548,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) return (binind); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -555,7 +557,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_DIRTY); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -564,7 +566,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_UNZEROED); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -573,7 +575,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_LARGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -582,7 +584,7 @@ arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_ALLOCATED); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -595,7 +597,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { @@ -607,7 +609,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = size | (*mapbitsp & PAGE_MASK); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -622,7 +624,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, size_t binind) { @@ -635,7 +637,7 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, CHUNK_MAP_BININD_SHIFT); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { @@ -651,7 +653,7 @@ arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, flags | unzeroed | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed) { @@ -661,7 +663,45 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; } -JEMALLOC_INLINE size_t +JEMALLOC_INLINE void +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } +} + +JEMALLOC_INLINE void +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + arena_prof_accum_impl(arena, accumbytes); +} + +JEMALLOC_INLINE void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + malloc_mutex_lock(&arena->lock); + arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); +} + +JEMALLOC_ALWAYS_INLINE size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { size_t binind; @@ -856,7 +896,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) { tcache_t *tcache; @@ -887,7 +927,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } /* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_salloc(const void *ptr, bool demote) { size_t ret; @@ -933,7 +973,7 @@ arena_salloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 05d1fc03..50c39ed9 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -5,7 +5,7 @@ typedef struct ckh_s ckh_t; typedef struct ckhc_s ckhc_t; /* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef void ckh_hash_t (const void *, size_t[2]); typedef bool ckh_keycomp_t (const void *, const void *); /* Maintain counters used to get an idea of performance. */ @@ -75,11 +75,9 @@ bool ckh_insert(ckh_t *ckh, const void *key, const void *data); bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); bool ckh_pointer_keycomp(const void *k1, const void *k2); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 2f501f5d..56ecc793 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,3 +1,8 @@ +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * details. + */ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -14,55 +19,311 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -uint64_t hash(const void *key, size_t len, uint64_t seed); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/* - * The following hash function is based on MurmurHash64A(), placed into the - * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for - * details. - */ -JEMALLOC_INLINE uint64_t -hash(const void *key, size_t len, uint64_t seed) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) { - const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - const int r = 47; - uint64_t h = seed ^ (len * m); - const uint64_t *data = (const uint64_t *)key; - const uint64_t *end = data + (len/8); - const unsigned char *data2; - assert(((uintptr_t)key & 0x7) == 0); + return (x << r) | (x >> (32 - r)); +} - while(data != end) { - uint64_t k = *data++; +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} - k *= m; - k ^= k >> r; - k *= m; +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ - h ^= k; - h *= m; + return p[i]; +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + return p[i]; +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= QU(0xff51afd7ed558ccdLLU); + k ^= k >> 33; + k *= QU(0xc4ceb9fe1a85ec53LLU); + k ^= k >> 33; + + return k; +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } } - data2 = (const unsigned char *)data; - switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } } - h ^= h >> r; - h *= m; - h ^= h >> r; + /* finalization */ + h1 ^= len; - return (h); + h1 = hash_fmix_32(h1); + + return h1; +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = QU(0x87c37b91114253d5LLU); + const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ +#if (LG_SIZEOF_PTR == 3) + hash_x64_128(key, len, seed, (uint64_t *)r_hash); +#else + uint64_t hashes[2]; + hash_x86_128(key, len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; +#endif } #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 475821ac..c606c122 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -226,6 +226,7 @@ static const bool config_ivsalloc = #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) #define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) @@ -233,10 +234,17 @@ static const bool config_ivsalloc = #ifdef JEMALLOC_DEBUG /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE # define JEMALLOC_INLINE # define inline #else # define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# endif # define JEMALLOC_INLINE static inline # ifdef _MSC_VER # define inline _inline @@ -359,7 +367,11 @@ static const bool config_ivsalloc = # include # define alloca _alloca # else -# include +# ifdef JEMALLOC_HAS_ALLOCA_H +# include +# else +# include +# endif # endif # define VARIABLE_ARRAY(type, name, count) \ type *name = alloca(sizeof(type) * count) @@ -591,13 +603,14 @@ arena_t *choose_arena(arena_t *arena); * for allocations. */ malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, + arenas_cleanup) /* * Compute usable size that would result from allocating an object with the * specified size. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { @@ -612,7 +625,7 @@ s2u(size_t size) * Compute usable size that would result from allocating an object with the * specified size and alignment. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t sa2u(size_t size, size_t alignment) { size_t usize; @@ -757,7 +770,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -769,14 +782,14 @@ imallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, false)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imalloc(size_t size) { return (imallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -786,14 +799,14 @@ icallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, true)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icalloc(size_t size) { return (icallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { @@ -818,7 +831,7 @@ ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipalloc(size_t usize, size_t alignment, bool zero) { @@ -830,7 +843,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) * void *ptr = [...] * size_t sz = isalloc(ptr, config_prof); */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t isalloc(const void *ptr, bool demote) { size_t ret; @@ -849,7 +862,7 @@ isalloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { @@ -882,7 +895,7 @@ p2rz(const void *ptr) return (u2rz(usize)); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idallocx(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -896,14 +909,14 @@ idallocx(void *ptr, bool try_tcache) huge_dalloc(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { idallocx(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqallocx(void *ptr, bool try_tcache) { @@ -913,14 +926,14 @@ iqallocx(void *ptr, bool try_tcache) idallocx(ptr, try_tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr) { iqallocx(ptr, true); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { @@ -989,7 +1002,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) { @@ -999,7 +1012,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 06241cd2..903fb4df 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -41,6 +41,8 @@ #define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) #define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) +#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) @@ -63,6 +65,7 @@ #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) #define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) #define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) #define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) #define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) @@ -174,6 +177,15 @@ #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define get_errno JEMALLOC_N(get_errno) #define hash JEMALLOC_N(hash) +#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) +#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) +#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) +#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) +#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) +#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) +#define hash_x64_128 JEMALLOC_N(hash_x64_128) +#define hash_x86_128 JEMALLOC_N(hash_x86_128) +#define hash_x86_32 JEMALLOC_N(hash_x86_32) #define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) @@ -291,12 +303,14 @@ #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) #define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) @@ -340,6 +354,7 @@ #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) @@ -355,6 +370,7 @@ #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) #define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) #define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) @@ -363,5 +379,6 @@ #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) #define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) #define u2rz JEMALLOC_N(u2rz) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 38d735c8..71900c2f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -140,11 +140,11 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) /* Map of thread-specific caches. */ malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, tcache_thread_cleanup) /* Per thread flag that allows thread caches to be disabled. */ malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, tcache_enabled_default, malloc_tsd_no_cleanup) JEMALLOC_INLINE void @@ -206,7 +206,7 @@ tcache_enabled_set(bool enabled) } } -JEMALLOC_INLINE tcache_t * +JEMALLOC_ALWAYS_INLINE tcache_t * tcache_get(bool create) { tcache_t *tcache; @@ -258,7 +258,7 @@ tcache_get(bool create) return (tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_event(tcache_t *tcache) { @@ -271,7 +271,7 @@ tcache_event(tcache_t *tcache) tcache_event_hard(tcache); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; @@ -287,7 +287,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -320,6 +320,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) @@ -330,7 +331,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -370,6 +371,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) @@ -382,7 +384,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { tcache_bin_t *tbin; @@ -406,7 +408,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) tcache_event(tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { size_t binind; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 1cd60254..3fcf93ce 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -249,6 +249,11 @@ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +/* + * Define if operating system has alloca.h header. + */ +#undef JEMALLOC_HAS_ALLOCA_H + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/src/arena.c b/src/arena.c index 0c53b071..8d50f4d4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -359,13 +359,29 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) } static inline void -arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) +{ + + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); + memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, + (npages << LG_PAGE)); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); +} + +static inline void +arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); + VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); } static void @@ -441,19 +457,10 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, for (i = 0; i < need_pages; i++) { if (arena_mapbits_unzeroed_get(chunk, run_ind+i) != 0) { - VALGRIND_MAKE_MEM_UNDEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - memset((void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), 0, PAGE); + arena_run_zero(chunk, run_ind+i, + 1); } else if (config_debug) { - VALGRIND_MAKE_MEM_DEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - arena_chunk_validate_zeroed( + arena_run_page_validate_zeroed( chunk, run_ind+i); } } @@ -462,11 +469,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * The run is dirty, so all pages must be * zeroed. */ - VALGRIND_MAKE_MEM_UNDEFINED((void - *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (need_pages << LG_PAGE)); - memset((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), 0, (need_pages << LG_PAGE)); + arena_run_zero(chunk, run_ind, need_pages); } } @@ -492,19 +495,21 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, */ if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_chunk_validate_zeroed(chunk, run_ind); + arena_run_page_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) - arena_chunk_validate_zeroed(chunk, run_ind+i); + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } } arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, binind, flag_dirty); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) { - arena_chunk_validate_zeroed(chunk, + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); } } @@ -1321,21 +1326,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) return (arena_run_reg_alloc(bin->runcur, bin_info)); } -void -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (config_prof && prof_interval != 0) { - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); - arena->prof_accumbytes -= prof_interval; - } - } -} - void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes) @@ -1347,11 +1337,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, assert(tbin->ncached == 0); - if (config_prof) { - malloc_mutex_lock(&arena->lock); + if (config_prof) arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); - } bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1459,11 +1446,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false) { - malloc_mutex_lock(&arena->lock); + if (config_prof && isthreaded == false) arena_prof_accum(arena, size); - malloc_mutex_unlock(&arena->lock); - } if (zero == false) { if (config_fill) { @@ -1480,6 +1464,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); @@ -1507,7 +1492,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) - arena_prof_accum(arena, size); + arena_prof_accum_locked(arena, size); malloc_mutex_unlock(&arena->lock); if (zero == false) { diff --git a/src/chunk.c b/src/chunk.c index 1a3bb4f6..46e387e1 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -78,6 +78,9 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, assert(node->size >= leadsize + size); trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); + zeroed = node->zeroed; + if (zeroed) + *zero = true; /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); @@ -114,17 +117,21 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } malloc_mutex_unlock(&chunks_mtx); - zeroed = false; - if (node != NULL) { - if (node->zeroed) { - zeroed = true; - *zero = true; - } + if (node != NULL) base_node_dealloc(node); - } - if (zeroed == false && *zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + if (*zero) { + if (zeroed == false) + memset(ret, 0, size); + else if (config_debug) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + + VALGRIND_MAKE_MEM_DEFINED(ret, size); + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } } return (ret); } @@ -194,14 +201,6 @@ label_return: if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); } - if (config_debug && *zero && ret != NULL) { - size_t i; - size_t *p = (size_t *)(uintptr_t)ret; - - VALGRIND_MAKE_MEM_DEFINED(ret, size); - for (i = 0; i < size / sizeof(size_t); i++) - assert(p[i] == 0); - } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 24781cc5..d1aea930 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -127,6 +127,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); } diff --git a/src/ckh.c b/src/ckh.c index 742a950b..2f38348b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -70,20 +70,20 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) JEMALLOC_INLINE size_t ckh_isearch(ckh_t *ckh, const void *key) { - size_t hash1, hash2, bucket, cell; + size_t hashes[2], bucket, cell; assert(ckh != NULL); - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Search primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); if (cell != SIZE_T_MAX) return (cell); /* Search secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); return (cell); } @@ -126,7 +126,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, { const void *key, *data, *tkey, *tdata; ckhc_t *cell; - size_t hash1, hash2, bucket, tbucket; + size_t hashes[2], bucket, tbucket; unsigned i; bucket = argbucket; @@ -155,10 +155,11 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, #endif /* Find the alternate bucket for the evicted item. */ - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + ckh->hash(key, hashes); + tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (tbucket == bucket) { - tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) + - 1); /* * It may be that (tbucket == bucket) still, if the * item's hashes both indicate this bucket. However, @@ -192,19 +193,19 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, JEMALLOC_INLINE bool ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) { - size_t hash1, hash2, bucket; + size_t hashes[2], bucket; const void *key = *argkey; const void *data = *argdata; - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Try to insert in primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); /* Try to insert in secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); @@ -417,9 +418,8 @@ ckh_delete(ckh_t *ckh) #endif idalloc(ckh->tab); -#ifdef JEMALLOC_DEBUG - memset(ckh, 0x5a, sizeof(ckh_t)); -#endif + if (config_debug) + memset(ckh, 0x5a, sizeof(ckh_t)); } size_t @@ -526,31 +526,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) } void -ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +ckh_string_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(key, strlen((const char *)key), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(key, strlen((const char *)key), 0x94122f33U, r_hash); } bool @@ -564,41 +543,16 @@ ckh_string_keycomp(const void *k1, const void *k2) } void -ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2) +ckh_pointer_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; union { const void *v; - uint64_t i; + size_t i; } u; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - assert(sizeof(u.v) == sizeof(u.i)); -#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) - u.i = 0; -#endif u.v = key; - h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - assert(SIZEOF_PTR == 8); - ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash); } bool diff --git a/src/ctl.c b/src/ctl.c index 6e01b1e2..f2ef4e60 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -960,11 +960,11 @@ ctl_postfork_child(void) if (*oldlenp != sizeof(t)) { \ size_t copylen = (sizeof(t) <= *oldlenp) \ ? sizeof(t) : *oldlenp; \ - memcpy(oldp, (void *)&v, copylen); \ + memcpy(oldp, (void *)&(v), copylen); \ ret = EINVAL; \ goto label_return; \ } else \ - *(t *)oldp = v; \ + *(t *)oldp = (v); \ } \ } while (0) @@ -974,7 +974,7 @@ ctl_postfork_child(void) ret = EINVAL; \ goto label_return; \ } \ - v = *(t *)newp; \ + (v) = *(t *)newp; \ } \ } while (0) @@ -995,7 +995,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if (l) \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1017,7 +1017,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ return (ENOENT); \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1036,7 +1036,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1060,7 +1060,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if ((c) == false) \ return (ENOENT); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1077,7 +1077,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ t oldval; \ \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1492,6 +1492,7 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + unsigned narenas; malloc_mutex_lock(&ctl_mtx); READONLY(); @@ -1499,7 +1500,8 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EAGAIN; goto label_return; } - READ(ctl_stats.narenas - 1, unsigned); + narenas = ctl_stats.narenas - 1; + READ(narenas, unsigned); ret = 0; label_return: diff --git a/src/jemalloc.c b/src/jemalloc.c index 8a667b62..c117685a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -10,17 +10,20 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, /* Runtime configuration options. */ const char *je_malloc_conf; +bool opt_abort = #ifdef JEMALLOC_DEBUG -bool opt_abort = true; -# ifdef JEMALLOC_FILL -bool opt_junk = true; -# else -bool opt_junk = false; -# endif + true #else -bool opt_abort = false; -bool opt_junk = false; + false #endif + ; +bool opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; @@ -83,11 +86,13 @@ typedef struct { #ifdef JEMALLOC_UTRACE # define UTRACE(a, b, c) do { \ if (opt_utrace) { \ + int utrace_serrno = errno; \ malloc_utrace_t ut; \ ut.p = (a); \ ut.s = (b); \ ut.r = (c); \ utrace(&ut, sizeof(ut)); \ + errno = utrace_serrno; \ } \ } while (0) #else @@ -277,7 +282,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -static inline bool +static JEMALLOC_ATTR(always_inline) bool malloc_init(void) { @@ -469,7 +474,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ +#define CONF_HANDLE_BOOL(o, n) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ @@ -483,16 +488,9 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ - hit = true; \ - } else \ - hit = false; -#define CONF_HANDLE_BOOL(o, n) { \ - bool hit; \ - CONF_HANDLE_BOOL_HIT(o, n, hit); \ - if (hit) \ continue; \ -} -#define CONF_HANDLE_SIZE_T(o, n, min, max) \ + } +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ uintmax_t um; \ @@ -505,12 +503,22 @@ malloc_conf_init(void) malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (um < min || um > max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - o = um; \ + } else if (clip) { \ + if (um < min) \ + o = min; \ + else if (um > max) \ + o = max; \ + else \ + o = um; \ + } else { \ + if (um < min || um > max) { \ + malloc_conf_error( \ + "Out-of-range " \ + "conf value", \ + k, klen, v, vlen); \ + } else \ + o = um; \ + } \ continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ @@ -555,7 +563,8 @@ malloc_conf_init(void) * config_fill. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, + true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -581,14 +590,14 @@ malloc_conf_init(void) continue; } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX) + SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (config_fill) { CONF_HANDLE_BOOL(opt_junk, "junk") CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX) + 0, SIZE_T_MAX, false) CONF_HANDLE_BOOL(opt_redzone, "redzone") CONF_HANDLE_BOOL(opt_zero, "zero") } @@ -886,7 +895,7 @@ JEMALLOC_ATTR(nonnull(1)) * Avoid any uncertainty as to how many backtrace frames to ignore in * PROF_ALLOC_PREP(). */ -JEMALLOC_ATTR(noinline) +JEMALLOC_NOINLINE #endif static int imemalign(void **memptr, size_t alignment, size_t size, @@ -1372,7 +1381,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_INLINE void * +static JEMALLOC_ATTR(always_inline) void * iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { diff --git a/src/prof.c b/src/prof.c index 04964ef7..b9f03a0d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -26,7 +26,7 @@ bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[PATH_MAX + 1]; -uint64_t prof_interval; +uint64_t prof_interval = 0; bool prof_promote; /* @@ -90,8 +90,7 @@ static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); -static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +static void prof_bt_hash(const void *key, size_t r_hash[2]); static bool prof_bt_keycomp(const void *k1, const void *k2); static malloc_mutex_t *prof_ctx_mutex_choose(void); @@ -1043,34 +1042,13 @@ prof_gdump(void) } static void -prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +prof_bt_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; cassert(config_prof); - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - h = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); } static bool @@ -1206,13 +1184,11 @@ prof_boot1(void) */ opt_prof = true; opt_prof_gdump = false; - prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); - } else - prof_interval = 0; + } } prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); diff --git a/src/tcache.c b/src/tcache.c index 47e14f30..7befdc86 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,9 +97,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_bin_t *bin = &arena->bins[binind]; if (config_prof && arena == tcache->arena) { - malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&arena->lock); tcache->prof_accumbytes = 0; } @@ -180,7 +178,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, malloc_mutex_lock(&arena->lock); if ((config_prof || config_stats) && arena == tcache->arena) { if (config_prof) { - arena_prof_accum(arena, + arena_prof_accum_locked(arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } @@ -343,11 +341,8 @@ tcache_destroy(tcache_t *tcache) } } - if (config_prof && tcache->prof_accumbytes > 0) { - malloc_mutex_lock(&tcache->arena->lock); + if (config_prof && tcache->prof_accumbytes > 0) arena_prof_accum(tcache->arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&tcache->arena->lock); - } tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) {