From a3b3386ddde8048b9d6b54c397bb93da5e806cef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Nov 2012 12:56:27 -0800 Subject: [PATCH 01/21] Avoid arena_prof_accum()-related locking when possible. Refactor arena_prof_accum() and its callers to avoid arena locking when prof_interval is 0 (as when profiling is disabled). Reported by Ben Maurer. --- include/jemalloc/internal/arena.h | 42 ++++++++++++++++++- include/jemalloc/internal/private_namespace.h | 2 + src/arena.c | 27 ++---------- src/prof.c | 6 +-- src/tcache.c | 9 +--- 5 files changed, 50 insertions(+), 36 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 561c9b6f..5ba16406 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -400,7 +400,6 @@ extern arena_bin_info_t arena_bin_info[NBINS]; #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, @@ -464,6 +463,9 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); +void arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, @@ -661,6 +663,44 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; } +JEMALLOC_INLINE void +arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + assert(prof_interval != 0); + + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } +} + +JEMALLOC_INLINE void +arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + arena_prof_accum_impl(arena, accumbytes); +} + +JEMALLOC_INLINE void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + cassert(config_prof); + + if (prof_interval == 0) + return; + malloc_mutex_lock(&arena->lock); + arena_prof_accum_impl(arena, accumbytes); + malloc_mutex_unlock(&arena->lock); +} + JEMALLOC_INLINE size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 06241cd2..951df24b 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -41,6 +41,8 @@ #define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) #define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl) +#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) diff --git a/src/arena.c b/src/arena.c index 0c53b071..f9406c79 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1321,21 +1321,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) return (arena_run_reg_alloc(bin->runcur, bin_info)); } -void -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (config_prof && prof_interval != 0) { - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); - arena->prof_accumbytes -= prof_interval; - } - } -} - void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes) @@ -1347,11 +1332,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, assert(tbin->ncached == 0); - if (config_prof) { - malloc_mutex_lock(&arena->lock); + if (config_prof) arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); - } bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1459,11 +1441,8 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) bin->stats.nrequests++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && isthreaded == false) { - malloc_mutex_lock(&arena->lock); + if (config_prof && isthreaded == false) arena_prof_accum(arena, size); - malloc_mutex_unlock(&arena->lock); - } if (zero == false) { if (config_fill) { @@ -1507,7 +1486,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) - arena_prof_accum(arena, size); + arena_prof_accum_locked(arena, size); malloc_mutex_unlock(&arena->lock); if (zero == false) { diff --git a/src/prof.c b/src/prof.c index 04964ef7..e21d1667 100644 --- a/src/prof.c +++ b/src/prof.c @@ -26,7 +26,7 @@ bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[PATH_MAX + 1]; -uint64_t prof_interval; +uint64_t prof_interval = 0; bool prof_promote; /* @@ -1206,13 +1206,11 @@ prof_boot1(void) */ opt_prof = true; opt_prof_gdump = false; - prof_interval = 0; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); - } else - prof_interval = 0; + } } prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); diff --git a/src/tcache.c b/src/tcache.c index 47e14f30..7befdc86 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -97,9 +97,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_bin_t *bin = &arena->bins[binind]; if (config_prof && arena == tcache->arena) { - malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&arena->lock); tcache->prof_accumbytes = 0; } @@ -180,7 +178,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, malloc_mutex_lock(&arena->lock); if ((config_prof || config_stats) && arena == tcache->arena) { if (config_prof) { - arena_prof_accum(arena, + arena_prof_accum_locked(arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } @@ -343,11 +341,8 @@ tcache_destroy(tcache_t *tcache) } } - if (config_prof && tcache->prof_accumbytes > 0) { - malloc_mutex_lock(&tcache->arena->lock); + if (config_prof && tcache->prof_accumbytes > 0) arena_prof_accum(tcache->arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&tcache->arena->lock); - } tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { From 9906660eb7365abb54e4495407ffddb1069ef654 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 19 Nov 2012 10:55:26 +0100 Subject: [PATCH 02/21] Allow to build without exporting symbols When statically linking jemalloc, it may be beneficial not to export its symbols if it makes sense, which allows the compiler and the linker to do some further optimizations. --- INSTALL | 5 +++++ configure.ac | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/INSTALL b/INSTALL index e40a7edd..7c2ed686 100644 --- a/INSTALL +++ b/INSTALL @@ -55,6 +55,11 @@ any of the following arguments (not a definitive list) to 'configure': jemalloc overlays the default malloc zone, but makes no attempt to actually replace the "malloc", "calloc", etc. symbols. +--without-export + Don't export public APIs. This can be useful when building jemalloc as a + static library, or to avoid exporting public APIs when using the zone + allocator on OSX. + --with-private-namespace= Prefix all library-private APIs with . For shared libraries, symbol visibility mechanisms prevent these symbols from being exported, but diff --git a/configure.ac b/configure.ac index 1c52439e..8558961c 100644 --- a/configure.ac +++ b/configure.ac @@ -471,6 +471,13 @@ for stem in ${public_syms}; do AC_DEFINE_UNQUOTED([${n}], [${m}]) done +AC_ARG_WITH([export], + [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], + [if test "x$with_export" = "xno"; then + AC_DEFINE([JEMALLOC_EXPORT],[])] +fi] +) + dnl Do not mangle library-private APIs by default. AC_ARG_WITH([private_namespace], [AS_HELP_STRING([--with-private-namespace=], [Prefix to prepend to all library-private APIs])], From 6eb84fbe315add1e1d4f8deedc25d260fff3ae97 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 29 Nov 2012 22:13:04 -0800 Subject: [PATCH 03/21] Fix "arenas.extend" mallctl to return the number of arenas. Reported by Mike Hommey. --- ChangeLog | 5 +++++ src/ctl.c | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index ab3476c6..3dff01e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,11 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.x.x (Not yet released) + + Bug fixes: + - Fix "arenas.extend" mallctl to output the number of arenas. + * 3.2.0 (November 9, 2012) In addition to a couple of bug fixes, this version modifies page run diff --git a/src/ctl.c b/src/ctl.c index 6e01b1e2..f2ef4e60 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -960,11 +960,11 @@ ctl_postfork_child(void) if (*oldlenp != sizeof(t)) { \ size_t copylen = (sizeof(t) <= *oldlenp) \ ? sizeof(t) : *oldlenp; \ - memcpy(oldp, (void *)&v, copylen); \ + memcpy(oldp, (void *)&(v), copylen); \ ret = EINVAL; \ goto label_return; \ } else \ - *(t *)oldp = v; \ + *(t *)oldp = (v); \ } \ } while (0) @@ -974,7 +974,7 @@ ctl_postfork_child(void) ret = EINVAL; \ goto label_return; \ } \ - v = *(t *)newp; \ + (v) = *(t *)newp; \ } \ } while (0) @@ -995,7 +995,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if (l) \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1017,7 +1017,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ return (ENOENT); \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1036,7 +1036,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ \ malloc_mutex_lock(&ctl_mtx); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1060,7 +1060,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ if ((c) == false) \ return (ENOENT); \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1077,7 +1077,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ t oldval; \ \ READONLY(); \ - oldval = v; \ + oldval = (v); \ READ(oldval, t); \ \ ret = 0; \ @@ -1492,6 +1492,7 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; + unsigned narenas; malloc_mutex_lock(&ctl_mtx); READONLY(); @@ -1499,7 +1500,8 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EAGAIN; goto label_return; } - READ(ctl_stats.narenas - 1, unsigned); + narenas = ctl_stats.narenas - 1; + READ(narenas, unsigned); ret = 0; label_return: From 1271185b87fcf54afb37dc05e7e0c58e5fb8f06a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 12 Dec 2012 10:12:18 -0800 Subject: [PATCH 04/21] Fix chunk_recycle() Valgrind integration. Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory is undefined. This fixes Valgrind warnings that would result from a huge allocation being freed, then recycled for use as an arena chunk. The arena code would write metadata to the chunk header, and Valgrind would consider these invalid writes. --- ChangeLog | 2 ++ src/chunk.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3dff01e1..374459de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,8 @@ found in the git revision history: Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. + - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory + is undefined. * 3.2.0 (November 9, 2012) diff --git a/src/chunk.c b/src/chunk.c index 1a3bb4f6..40c108a4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -122,10 +122,9 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } base_node_dealloc(node); } - if (zeroed == false && *zero) { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + if (zeroed == false && *zero) memset(ret, 0, size); - } return (ret); } From 1bf2743e08ba66cc141e296812839947223e4370 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 23 Dec 2012 08:51:48 -0800 Subject: [PATCH 05/21] Add clipping support to lg_chunk option processing. Modify processing of the lg_chunk option so that it clips an out-of-range input to the edge of the valid range. This makes it possible to request the minimum possible chunk size without intimate knowledge of allocator internals. Submitted by Ian Lepore (see FreeBSD PR bin/174641). --- doc/jemalloc.xml.in | 7 +++++-- src/jemalloc.c | 42 +++++++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 54b87474..09305801 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -790,8 +790,11 @@ for (i = 0; i < nbins; i++) { (size_t) r- - Virtual memory chunk size (log base 2). The default - chunk size is 4 MiB (2^22). + Virtual memory chunk size (log base 2). If a chunk + size outside the supported size range is specified, the size is + silently clipped to the minimum/maximum supported size. The default + chunk size is 4 MiB (2^22). + diff --git a/src/jemalloc.c b/src/jemalloc.c index 8a667b62..cecd012c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -469,7 +469,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ +#define CONF_HANDLE_BOOL(o, n) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ @@ -483,16 +483,9 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ - hit = true; \ - } else \ - hit = false; -#define CONF_HANDLE_BOOL(o, n) { \ - bool hit; \ - CONF_HANDLE_BOOL_HIT(o, n, hit); \ - if (hit) \ continue; \ -} -#define CONF_HANDLE_SIZE_T(o, n, min, max) \ + } +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ uintmax_t um; \ @@ -505,12 +498,22 @@ malloc_conf_init(void) malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (um < min || um > max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - o = um; \ + } else if (clip) { \ + if (um < min) \ + o = min; \ + else if (um > max) \ + o = max; \ + else \ + o = um; \ + } else { \ + if (um < min || um > max) { \ + malloc_conf_error( \ + "Out-of-range " \ + "conf value", \ + k, klen, v, vlen); \ + } else \ + o = um; \ + } \ continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ @@ -555,7 +558,8 @@ malloc_conf_init(void) * config_fill. */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + - (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1, + true) if (strncmp("dss", k, klen) == 0) { int i; bool match = false; @@ -581,14 +585,14 @@ malloc_conf_init(void) continue; } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX) + SIZE_T_MAX, false) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (config_fill) { CONF_HANDLE_BOOL(opt_junk, "junk") CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", - 0, SIZE_T_MAX) + 0, SIZE_T_MAX, false) CONF_HANDLE_BOOL(opt_redzone, "redzone") CONF_HANDLE_BOOL(opt_zero, "zero") } From d0357f7a09a6fcbf1df461b07851f61a7f0bdc2d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Nov 2012 18:52:41 +0100 Subject: [PATCH 06/21] Allow to disable the zone allocator on Darwin --- INSTALL | 4 ++++ Makefile.in | 3 ++- configure.ac | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/INSTALL b/INSTALL index 7c2ed686..9bd1dac0 100644 --- a/INSTALL +++ b/INSTALL @@ -141,6 +141,10 @@ any of the following arguments (not a definitive list) to 'configure': --disable-experimental Disable support for the experimental API (*allocm()). +--disable-zone-allocator + Disable zone allocator for Darwin. This means jemalloc won't be hooked as + the default allocator on OSX/iOS. + --enable-utrace Enable utrace(2)-based allocation tracing. This feature is not broadly portable (FreeBSD has it, but Linux and OS X do not). diff --git a/Makefile.in b/Makefile.in index 36448189..0062747c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ +enable_zone_allocator := @enable_zone_allocator@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ @@ -80,7 +81,7 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ $(srcroot)src/util.c $(srcroot)src/tsd.c -ifeq (macho, $(ABI)) +ifeq ($(enable_zone_allocator), 1) CSRCS += $(srcroot)src/zone.c endif ifeq ($(IMPORTLIB),$(SO)) diff --git a/configure.ac b/configure.ac index 8558961c..249a66c4 100644 --- a/configure.ac +++ b/configure.ac @@ -1185,7 +1185,26 @@ fi dnl ============================================================================ dnl Darwin-related configuration. -if test "x${abi}" = "xmacho" ; then +AC_ARG_ENABLE([zone-allocator], + [AS_HELP_STRING([--disable-zone-allocator], + [Disable zone allocator for Darwin])], +[if test "x$enable_zone_allocator" = "xno" ; then + enable_zone_allocator="0" +else + enable_zone_allocator="1" +fi +], +[if test "x${abi}" = "xmacho"; then + enable_zone_allocator="1" +fi +] +) +AC_SUBST([enable_zone_allocator]) + +if test "x${enable_zone_allocator}" = "x1" ; then + if test "x${abi}" != "xmacho"; then + AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin]) + fi AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) AC_DEFINE([JEMALLOC_ZONE], [ ]) From 5135e34062584f67ae2d12b1f3940a9fad32ca9f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 6 Dec 2012 22:16:26 +0100 Subject: [PATCH 07/21] Allow to enable ivsalloc independently --- INSTALL | 6 ++++++ configure.ac | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/INSTALL b/INSTALL index 9bd1dac0..6e371ce5 100644 --- a/INSTALL +++ b/INSTALL @@ -79,6 +79,12 @@ any of the following arguments (not a definitive list) to 'configure': --enable-debug Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. + Implies --enable-ivsalloc. + +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a substantial + performance hit. --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" diff --git a/configure.ac b/configure.ac index 249a66c4..9d062a98 100644 --- a/configure.ac +++ b/configure.ac @@ -551,7 +551,7 @@ fi dnl Do not compile with debugging by default. AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code])], + [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])], [if test "x$enable_debug" = "xno" ; then enable_debug="0" else @@ -562,10 +562,25 @@ fi ) if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_DEBUG], [ ]) - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) + enable_ivsalloc="1" fi AC_SUBST([enable_debug]) +dnl Do not validate pointers by default. +AC_ARG_ENABLE([ivsalloc], + [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])], +[if test "x$enable_ivsalloc" = "xno" ; then + enable_ivsalloc="0" +else + enable_ivsalloc="1" +fi +], +[enable_ivsalloc="0"] +) +if test "x$enable_ivsalloc" = "x1" ; then + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) +fi + dnl Only optimize if not debugging. if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. From 6e6164ae159d9c3bd4f44bd2cba6fc3237687c80 Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:56:25 -0800 Subject: [PATCH 08/21] Don't mangle errno with free(3) if utrace(2) fails This ensures POLA on FreeBSD (at least) as free(3) is generally assumed to not fiddle around with errno. Signed-off-by: Garrett Cooper --- src/jemalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index cecd012c..ec88700a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -83,11 +83,13 @@ typedef struct { #ifdef JEMALLOC_UTRACE # define UTRACE(a, b, c) do { \ if (opt_utrace) { \ + int utrace_serrno = errno; \ malloc_utrace_t ut; \ ut.p = (a); \ ut.s = (b); \ ut.r = (c); \ utrace(&ut, sizeof(ut)); \ + errno = utrace_serrno; \ } \ } while (0) #else From 72c1e59fd249f99dcf5d3992cbdd570a381a67ce Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:57:28 -0800 Subject: [PATCH 09/21] Improve configure tests for ffsl In particular: - ffsl always returns int, not long, on FreeBSD, Linux, and OSX. - Mute compiler warnings about rv being unused (and the potential for compilers optimizing out the call completely) by dumping the value with printf(3). Signed-off-by: Garrett Cooper --- configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 9d062a98..88297c5f 100644 --- a/configure.ac +++ b/configure.ac @@ -928,7 +928,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], #include ]], [[ - long result; + int result; FILE *f; #ifdef _WIN32 @@ -947,7 +947,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], if (f == NULL) { return 1; } - fprintf(f, "%u\n", result); + fprintf(f, "%d\n", result); fclose(f); return 0; @@ -1092,11 +1092,13 @@ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. JE_COMPILABLE([a program using ffsl], [ +#include #include #include ], [ { int rv = ffsl(0x08); + printf("%d\n", rv); } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" != "xyes" ; then From 13e4e24c42d17492f85cdd550c1e13d6f929307e Mon Sep 17 00:00:00 2001 From: Garrett Cooper Date: Sun, 2 Dec 2012 17:58:40 -0800 Subject: [PATCH 10/21] Fix build break on *BSD Linux uses alloca.h; many other operating systems define alloca(3) in stdlib.h. Signed-off-by: Garrett Cooper --- configure.ac | 1 + include/jemalloc/internal/jemalloc_internal.h.in | 6 +++++- include/jemalloc/jemalloc_defs.h.in | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 88297c5f..aee74828 100644 --- a/configure.ac +++ b/configure.ac @@ -261,6 +261,7 @@ case "${host}" in CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" + AC_DEFINE([JEMALLOC_HAS_ALLOCA_H]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) JEMALLOC_USABLE_SIZE_CONST="" diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 475821ac..484f351b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -359,7 +359,11 @@ static const bool config_ivsalloc = # include # define alloca _alloca # else -# include +# ifdef JEMALLOC_HAS_ALLOCA_H +# include +# else +# include +# endif # endif # define VARIABLE_ARRAY(type, name, count) \ type *name = alloca(sizeof(type) * count) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 1cd60254..3fcf93ce 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -249,6 +249,11 @@ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +/* + * Define if operating system has alloca.h header. + */ +#undef JEMALLOC_HAS_ALLOCA_H + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR From 14a2c6a698a207ac3f3825443cf3441c8842e990 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Jan 2013 19:56:34 -0800 Subject: [PATCH 11/21] Avoid validating freshly mapped memory. Move validation of supposedly zeroed pages from chunk_alloc() to chunk_recycle(). There is little point to validating newly mapped memory returned by chunk_alloc_mmap(), and memory that comes from sbrk() is explicitly zeroed, so there is little risk to assuming that chunk_alloc_dss() actually does the zeroing properly. This relaxation of validation can make a big difference to application startup time and overall system usage on platforms that use jemalloc as the system allocator (namely FreeBSD). Submitted by Ian Lepore . --- src/chunk.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 40c108a4..46e387e1 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -78,6 +78,9 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, assert(node->size >= leadsize + size); trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); + zeroed = node->zeroed; + if (zeroed) + *zero = true; /* Remove node from the tree. */ extent_tree_szad_remove(chunks_szad, node); extent_tree_ad_remove(chunks_ad, node); @@ -114,17 +117,22 @@ chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, } malloc_mutex_unlock(&chunks_mtx); - zeroed = false; - if (node != NULL) { - if (node->zeroed) { - zeroed = true; - *zero = true; - } + if (node != NULL) base_node_dealloc(node); - } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - if (zeroed == false && *zero) - memset(ret, 0, size); + if (*zero) { + if (zeroed == false) + memset(ret, 0, size); + else if (config_debug) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + + VALGRIND_MAKE_MEM_DEFINED(ret, size); + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + } + } return (ret); } @@ -193,14 +201,6 @@ label_return: if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); } - if (config_debug && *zero && ret != NULL) { - size_t i; - size_t *p = (size_t *)(uintptr_t)ret; - - VALGRIND_MAKE_MEM_DEFINED(ret, size); - for (i = 0; i < size / sizeof(size_t); i++) - assert(p[i] == 0); - } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } From 38067483c542adfe092644d1ecc103c6bc74add0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 21 Jan 2013 20:04:42 -0800 Subject: [PATCH 12/21] Tighten valgrind integration. Tighten valgrind integration such that immediately after memory is validated or zeroed, valgrind is told to forget the memory's 'defined' state. The only place newly allocated memory should be left marked as 'defined' is in the public functions (e.g. calloc() and realloc()). --- include/jemalloc/internal/tcache.h | 2 ++ src/arena.c | 50 +++++++++++++++++------------- src/chunk_dss.c | 1 + 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 38d735c8..6957da3f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -320,6 +320,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) @@ -370,6 +371,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } if (config_stats) diff --git a/src/arena.c b/src/arena.c index f9406c79..8d50f4d4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -359,13 +359,29 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) } static inline void -arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages) +{ + + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); + memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, + (npages << LG_PAGE)); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (npages << LG_PAGE)); +} + +static inline void +arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); + VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); + VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), PAGE); } static void @@ -441,19 +457,10 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, for (i = 0; i < need_pages; i++) { if (arena_mapbits_unzeroed_get(chunk, run_ind+i) != 0) { - VALGRIND_MAKE_MEM_UNDEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - memset((void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), 0, PAGE); + arena_run_zero(chunk, run_ind+i, + 1); } else if (config_debug) { - VALGRIND_MAKE_MEM_DEFINED( - (void *)((uintptr_t) - chunk + ((run_ind+i) << - LG_PAGE)), PAGE); - arena_chunk_validate_zeroed( + arena_run_page_validate_zeroed( chunk, run_ind+i); } } @@ -462,11 +469,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * The run is dirty, so all pages must be * zeroed. */ - VALGRIND_MAKE_MEM_UNDEFINED((void - *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), (need_pages << LG_PAGE)); - memset((void *)((uintptr_t)chunk + (run_ind << - LG_PAGE)), 0, (need_pages << LG_PAGE)); + arena_run_zero(chunk, run_ind, need_pages); } } @@ -492,19 +495,21 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, */ if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind) == 0) - arena_chunk_validate_zeroed(chunk, run_ind); + arena_run_page_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && - arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) - arena_chunk_validate_zeroed(chunk, run_ind+i); + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) { + arena_run_page_validate_zeroed(chunk, + run_ind+i); + } } arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1, binind, flag_dirty); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) { - arena_chunk_validate_zeroed(chunk, + arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1); } } @@ -1459,6 +1464,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 24781cc5..d1aea930 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -127,6 +127,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } return (ret); } From 88393cb0eb9a046000d20809809d4adac11957ab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 08:45:43 -0800 Subject: [PATCH 13/21] Add and use JEMALLOC_ALWAYS_INLINE. Add JEMALLOC_ALWAYS_INLINE and use it to guarantee that the entire fast paths of the primary allocation/deallocation functions are inlined. --- include/jemalloc/internal/arena.h | 42 +++++++++--------- .../jemalloc/internal/jemalloc_internal.h.in | 44 +++++++++++-------- include/jemalloc/internal/tcache.h | 18 ++++---- src/jemalloc.c | 6 +-- 4 files changed, 59 insertions(+), 51 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 5ba16406..8fdee931 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -480,7 +480,7 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_INLINE arena_chunk_map_t * +JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * arena_mapp_get(arena_chunk_t *chunk, size_t pageind) { @@ -490,21 +490,21 @@ arena_mapp_get(arena_chunk_t *chunk, size_t pageind) return (&chunk->map[pageind-map_bias]); } -JEMALLOC_INLINE size_t * +JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { return (&arena_mapp_get(chunk, pageind)->bits); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) { return (*arena_mapbitsp_get(chunk, pageind)); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -514,7 +514,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -525,7 +525,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & ~PAGE_MASK); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -536,7 +536,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -548,7 +548,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) return (binind); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -557,7 +557,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_DIRTY); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -566,7 +566,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_UNZEROED); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -575,7 +575,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_LARGE); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -584,7 +584,7 @@ arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) return (mapbits & CHUNK_MAP_ALLOCATED); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -597,7 +597,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size) { @@ -609,7 +609,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, *mapbitsp = size | (*mapbitsp & PAGE_MASK); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { @@ -624,7 +624,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, size_t binind) { @@ -637,7 +637,7 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, CHUNK_MAP_BININD_SHIFT); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { @@ -653,7 +653,7 @@ arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, flags | unzeroed | CHUNK_MAP_ALLOCATED; } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed) { @@ -701,7 +701,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) malloc_mutex_unlock(&arena->lock); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { size_t binind; @@ -896,7 +896,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) { tcache_t *tcache; @@ -927,7 +927,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } /* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t arena_salloc(const void *ptr, bool demote) { size_t ret; @@ -973,7 +973,7 @@ arena_salloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 484f351b..fb53e131 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -233,10 +233,17 @@ static const bool config_ivsalloc = #ifdef JEMALLOC_DEBUG /* Disable inlining to make debugging easier. */ +# define JEMALLOC_ALWAYS_INLINE # define JEMALLOC_INLINE # define inline #else # define JEMALLOC_ENABLE_INLINE +# ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ALWAYS_INLINE \ + static JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) +# else +# define JEMALLOC_ALWAYS_INLINE static inline +# endif # define JEMALLOC_INLINE static inline # ifdef _MSC_VER # define inline _inline @@ -595,13 +602,14 @@ arena_t *choose_arena(arena_t *arena); * for allocations. */ malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, + arenas_cleanup) /* * Compute usable size that would result from allocating an object with the * specified size. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { @@ -616,7 +624,7 @@ s2u(size_t size) * Compute usable size that would result from allocating an object with the * specified size and alignment. */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t sa2u(size_t size, size_t alignment) { size_t usize; @@ -761,7 +769,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -773,14 +781,14 @@ imallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, false)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * imalloc(size_t size) { return (imallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icallocx(size_t size, bool try_tcache, arena_t *arena) { @@ -790,14 +798,14 @@ icallocx(size_t size, bool try_tcache, arena_t *arena) return (huge_malloc(size, true)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * icalloc(size_t size) { return (icallocx(size, true, NULL)); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { @@ -822,7 +830,7 @@ ipallocx(size_t usize, size_t alignment, bool zero, bool try_tcache, return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * ipalloc(size_t usize, size_t alignment, bool zero) { @@ -834,7 +842,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) * void *ptr = [...] * size_t sz = isalloc(ptr, config_prof); */ -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t isalloc(const void *ptr, bool demote) { size_t ret; @@ -853,7 +861,7 @@ isalloc(const void *ptr, bool demote) return (ret); } -JEMALLOC_INLINE size_t +JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { @@ -886,7 +894,7 @@ p2rz(const void *ptr) return (u2rz(usize)); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idallocx(void *ptr, bool try_tcache) { arena_chunk_t *chunk; @@ -900,14 +908,14 @@ idallocx(void *ptr, bool try_tcache) huge_dalloc(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void idalloc(void *ptr) { idallocx(ptr, true); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqallocx(void *ptr, bool try_tcache) { @@ -917,14 +925,14 @@ iqallocx(void *ptr, bool try_tcache) idallocx(ptr, try_tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void iqalloc(void *ptr) { iqallocx(ptr, true); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) { @@ -993,7 +1001,7 @@ irallocx(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) { @@ -1003,7 +1011,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 6957da3f..71900c2f 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -140,11 +140,11 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) /* Map of thread-specific caches. */ malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, tcache_thread_cleanup) /* Per thread flag that allows thread caches to be disabled. */ malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, tcache_enabled_default, malloc_tsd_no_cleanup) JEMALLOC_INLINE void @@ -206,7 +206,7 @@ tcache_enabled_set(bool enabled) } } -JEMALLOC_INLINE tcache_t * +JEMALLOC_ALWAYS_INLINE tcache_t * tcache_get(bool create) { tcache_t *tcache; @@ -258,7 +258,7 @@ tcache_get(bool create) return (tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_event(tcache_t *tcache) { @@ -271,7 +271,7 @@ tcache_event(tcache_t *tcache) tcache_event_hard(tcache); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; @@ -287,7 +287,7 @@ tcache_alloc_easy(tcache_bin_t *tbin) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -331,7 +331,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void * +JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) { void *ret; @@ -384,7 +384,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) return (ret); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { tcache_bin_t *tbin; @@ -408,7 +408,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) tcache_event(tcache); } -JEMALLOC_INLINE void +JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { size_t binind; diff --git a/src/jemalloc.c b/src/jemalloc.c index ec88700a..58e18df4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -279,7 +279,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -static inline bool +static JEMALLOC_ATTR(always_inline) bool malloc_init(void) { @@ -892,7 +892,7 @@ JEMALLOC_ATTR(nonnull(1)) * Avoid any uncertainty as to how many backtrace frames to ignore in * PROF_ALLOC_PREP(). */ -JEMALLOC_ATTR(noinline) +JEMALLOC_NOINLINE #endif static int imemalign(void **memptr, size_t alignment, size_t size, @@ -1378,7 +1378,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, */ #ifdef JEMALLOC_EXPERIMENTAL -JEMALLOC_INLINE void * +static JEMALLOC_ATTR(always_inline) void * iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, arena_t *arena) { From 7329a4f038ed096f3cfa11cb60433f44009fbe16 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 10:53:29 -0800 Subject: [PATCH 14/21] Fix AC_PATH_PROG() calls to specify default. Fix AC_PATH_PROG() calls to specify 'false' as the default, so that if the configure script fails to find a program, the false program is instead called, and an error occurs. Prior to this fix, if xsltproc could not be found, make would not report an error due to the leading -o in the xsltproc invocation. Reported by David Reiss. --- configure.ac | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index aee74828..8c130827 100644 --- a/configure.ac +++ b/configure.ac @@ -86,7 +86,7 @@ MANDIR=`eval echo $MANDIR` AC_SUBST([MANDIR]) dnl Support for building documentation. -AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +AC_PATH_PROG([XSLTPROC], [xsltproc], [false], [$PATH]) if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then @@ -403,9 +403,9 @@ AC_SUBST([enable_autogen]) AC_PROG_INSTALL AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], , [$PATH]) -AC_PATH_PROG([LD], [ld], , [$PATH]) -AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +AC_PATH_PROG([AR], [ar], [false], [$PATH]) +AC_PATH_PROG([LD], [ld], [false], [$PATH]) +AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH]) public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" From ae03bf6a57f0dd6a009288fa6477a300cabf6d5e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:02:08 -0800 Subject: [PATCH 15/21] Update hash from MurmurHash2 to MurmurHash3. Update hash from MurmurHash2 to MurmurHash3, primarily because the latter generates 128 bits in a single call for no extra cost, which simplifies integration with cuckoo hashing. --- include/jemalloc/internal/ckh.h | 8 +- include/jemalloc/internal/hash.h | 333 ++++++++++++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 1 + include/jemalloc/internal/private_namespace.h | 15 + src/ckh.c | 81 +---- src/prof.c | 28 +- 6 files changed, 337 insertions(+), 129 deletions(-) diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 05d1fc03..50c39ed9 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -5,7 +5,7 @@ typedef struct ckh_s ckh_t; typedef struct ckhc_s ckhc_t; /* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef void ckh_hash_t (const void *, size_t[2]); typedef bool ckh_keycomp_t (const void *, const void *); /* Maintain counters used to get an idea of performance. */ @@ -75,11 +75,9 @@ bool ckh_insert(ckh_t *ckh, const void *key, const void *data); bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); bool ckh_pointer_keycomp(const void *k1, const void *k2); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 2f501f5d..56ecc793 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,3 +1,8 @@ +/* + * The following hash function is based on MurmurHash3, placed into the public + * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * details. + */ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -14,55 +19,311 @@ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -uint64_t hash(const void *key, size_t len, uint64_t seed); +void hash(const void *key, size_t len, const uint32_t seed, + size_t r_hash[2]); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/* - * The following hash function is based on MurmurHash64A(), placed into the - * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for - * details. - */ -JEMALLOC_INLINE uint64_t -hash(const void *key, size_t len, uint64_t seed) +/******************************************************************************/ +/* Internal implementation. */ +JEMALLOC_INLINE uint32_t +hash_rotl_32(uint32_t x, int8_t r) { - const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); - const int r = 47; - uint64_t h = seed ^ (len * m); - const uint64_t *data = (const uint64_t *)key; - const uint64_t *end = data + (len/8); - const unsigned char *data2; - assert(((uintptr_t)key & 0x7) == 0); + return (x << r) | (x >> (32 - r)); +} - while(data != end) { - uint64_t k = *data++; +JEMALLOC_INLINE uint64_t +hash_rotl_64(uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} - k *= m; - k ^= k >> r; - k *= m; +JEMALLOC_INLINE uint32_t +hash_get_block_32(const uint32_t *p, int i) +{ - h ^= k; - h *= m; + return p[i]; +} + +JEMALLOC_INLINE uint64_t +hash_get_block_64(const uint64_t *p, int i) +{ + + return p[i]; +} + +JEMALLOC_INLINE uint32_t +hash_fmix_32(uint32_t h) +{ + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +JEMALLOC_INLINE uint64_t +hash_fmix_64(uint64_t k) +{ + + k ^= k >> 33; + k *= QU(0xff51afd7ed558ccdLLU); + k ^= k >> 33; + k *= QU(0xc4ceb9fe1a85ec53LLU); + k ^= k >> 33; + + return k; +} + +JEMALLOC_INLINE uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*4); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i); + + k1 *= c1; + k1 = hash_rotl_32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = hash_rotl_32(h1, 13); + h1 = h1*5 + 0xe6546b64; + } } - data2 = (const unsigned char *)data; - switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15); + k1 *= c2; h1 ^= k1; + } } - h ^= h >> r; - h *= m; - h ^= h >> r; + /* finalization */ + h1 ^= len; - return (h); + h1 = hash_fmix_32(h1); + + return h1; +} + +UNUSED JEMALLOC_INLINE void +hash_x86_128(const void *key, const int len, uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t * data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + /* body */ + { + const uint32_t *blocks = (const uint32_t *) (data + nblocks*16); + int i; + + for (i = -nblocks; i; i++) { + uint32_t k1 = hash_get_block_32(blocks, i*4 + 0); + uint32_t k2 = hash_get_block_32(blocks, i*4 + 1); + uint32_t k3 = hash_get_block_32(blocks, i*4 + 2); + uint32_t k4 = hash_get_block_32(blocks, i*4 + 3); + + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_32(h1, 19); h1 += h2; + h1 = h1*5 + 0x561ccd1b; + + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + h2 = hash_rotl_32(h2, 17); h2 += h3; + h2 = h2*5 + 0x0bcaa747; + + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + h3 = hash_rotl_32(h3, 15); h3 += h4; + h3 = h3*5 + 0x96cd1c35; + + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + h4 = hash_rotl_32(h4, 13); h4 += h1; + h4 = h4*5 + 0x32ac3b17; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t *) (data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = hash_fmix_32(h1); + h2 = hash_fmix_32(h2); + h3 = hash_fmix_32(h3); + h4 = hash_fmix_32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + r_out[0] = (((uint64_t) h2) << 32) | h1; + r_out[1] = (((uint64_t) h4) << 32) | h3; +} + +UNUSED JEMALLOC_INLINE void +hash_x64_128(const void *key, const int len, const uint32_t seed, + uint64_t r_out[2]) +{ + const uint8_t *data = (const uint8_t *) key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = QU(0x87c37b91114253d5LLU); + const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + + /* body */ + { + const uint64_t *blocks = (const uint64_t *) (data); + int i; + + for (i = 0; i < nblocks; i++) { + uint64_t k1 = hash_get_block_64(blocks, i*2 + 0); + uint64_t k2 = hash_get_block_64(blocks, i*2 + 1); + + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + + h1 = hash_rotl_64(h1, 27); h1 += h2; + h1 = h1*5 + 0x52dce729; + + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + h2 = hash_rotl_64(h2, 31); h2 += h1; + h2 = h2*5 + 0x38495ab5; + } + } + + /* tail */ + { + const uint8_t *tail = (const uint8_t*)(data + nblocks*16); + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; + k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; + k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; + } + } + + /* finalization */ + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = hash_fmix_64(h1); + h2 = hash_fmix_64(h2); + + h1 += h2; + h2 += h1; + + r_out[0] = h1; + r_out[1] = h2; +} + + +/******************************************************************************/ +/* API. */ +JEMALLOC_INLINE void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) +{ +#if (LG_SIZEOF_PTR == 3) + hash_x64_128(key, len, seed, (uint64_t *)r_hash); +#else + uint64_t hashes[2]; + hash_x86_128(key, len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; +#endif } #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index fb53e131..13a2ffb7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -226,6 +226,7 @@ static const bool config_ivsalloc = #define ALLOCM_LG_ALIGN_MASK ((int)0x3f) #define ZU(z) ((size_t)z) +#define QU(q) ((uint64_t)q) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 951df24b..903fb4df 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -65,6 +65,7 @@ #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) #define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) #define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_get_wrapper JEMALLOC_N(arenas_tsd_get_wrapper) #define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) #define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) @@ -176,6 +177,15 @@ #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define get_errno JEMALLOC_N(get_errno) #define hash JEMALLOC_N(hash) +#define hash_fmix_32 JEMALLOC_N(hash_fmix_32) +#define hash_fmix_64 JEMALLOC_N(hash_fmix_64) +#define hash_get_block_32 JEMALLOC_N(hash_get_block_32) +#define hash_get_block_64 JEMALLOC_N(hash_get_block_64) +#define hash_rotl_32 JEMALLOC_N(hash_rotl_32) +#define hash_rotl_64 JEMALLOC_N(hash_rotl_64) +#define hash_x64_128 JEMALLOC_N(hash_x64_128) +#define hash_x86_128 JEMALLOC_N(hash_x86_128) +#define hash_x86_32 JEMALLOC_N(hash_x86_32) #define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) @@ -293,12 +303,14 @@ #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_get_wrapper JEMALLOC_N(prof_tdata_tsd_get_wrapper) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) #define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) #define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_get_wrapper JEMALLOC_N(quarantine_tsd_get_wrapper) #define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) @@ -342,6 +354,7 @@ #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_get_wrapper JEMALLOC_N(tcache_enabled_tsd_get_wrapper) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) @@ -357,6 +370,7 @@ #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_get_wrapper JEMALLOC_N(tcache_tsd_get_wrapper) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) #define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) #define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) @@ -365,5 +379,6 @@ #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_get_wrapper JEMALLOC_N(thread_allocated_tsd_get_wrapper) #define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) #define u2rz JEMALLOC_N(u2rz) diff --git a/src/ckh.c b/src/ckh.c index 742a950b..e58980de 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -70,20 +70,20 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) JEMALLOC_INLINE size_t ckh_isearch(ckh_t *ckh, const void *key) { - size_t hash1, hash2, bucket, cell; + size_t hashes[2], bucket, cell; assert(ckh != NULL); - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Search primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); if (cell != SIZE_T_MAX) return (cell); /* Search secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); cell = ckh_bucket_search(ckh, bucket, key); return (cell); } @@ -126,7 +126,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, { const void *key, *data, *tkey, *tdata; ckhc_t *cell; - size_t hash1, hash2, bucket, tbucket; + size_t hashes[2], bucket, tbucket; unsigned i; bucket = argbucket; @@ -155,10 +155,11 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, #endif /* Find the alternate bucket for the evicted item. */ - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + ckh->hash(key, hashes); + tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (tbucket == bucket) { - tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) + - 1); /* * It may be that (tbucket == bucket) still, if the * item's hashes both indicate this bucket. However, @@ -192,19 +193,19 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, JEMALLOC_INLINE bool ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) { - size_t hash1, hash2, bucket; + size_t hashes[2], bucket; const void *key = *argkey; const void *data = *argdata; - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + ckh->hash(key, hashes); /* Try to insert in primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); /* Try to insert in secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1); if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) return (false); @@ -526,31 +527,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) } void -ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +ckh_string_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(key, strlen((const char *)key), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(key, strlen((const char *)key), 0x94122f33U, r_hash); } bool @@ -564,41 +544,16 @@ ckh_string_keycomp(const void *k1, const void *k2) } void -ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2) +ckh_pointer_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; union { const void *v; - uint64_t i; + size_t i; } u; - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - assert(sizeof(u.v) == sizeof(u.i)); -#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) - u.i = 0; -#endif u.v = key; - h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - assert(SIZEOF_PTR == 8); - ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash); } bool diff --git a/src/prof.c b/src/prof.c index e21d1667..b9f03a0d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -90,8 +90,7 @@ static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); -static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); +static void prof_bt_hash(const void *key, size_t r_hash[2]); static bool prof_bt_keycomp(const void *k1, const void *k2); static malloc_mutex_t *prof_ctx_mutex_choose(void); @@ -1043,34 +1042,13 @@ prof_gdump(void) } static void -prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +prof_bt_hash(const void *key, size_t r_hash[2]) { - size_t ret1, ret2; - uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; cassert(config_prof); - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - h = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x94122f335b332aea)); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(bt->vec, bt->len * sizeof(void *), - UINT64_C(0x8432a476666bbc13)); - } - - *hash1 = ret1; - *hash2 = ret2; + hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); } static bool From ba175a2bfb236d79404012d9b5bb6e9b3c8be8dd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:14:45 -0800 Subject: [PATCH 16/21] Use config_* instead of JEMALLOC_*. Convert a couple of stragglers from JEMALLOC_* to use config_*. --- src/ckh.c | 5 ++--- src/jemalloc.c | 13 ++----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/ckh.c b/src/ckh.c index e58980de..2f38348b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -418,9 +418,8 @@ ckh_delete(ckh_t *ckh) #endif idalloc(ckh->tab); -#ifdef JEMALLOC_DEBUG - memset(ckh, 0x5a, sizeof(ckh_t)); -#endif + if (config_debug) + memset(ckh, 0x5a, sizeof(ckh_t)); } size_t diff --git a/src/jemalloc.c b/src/jemalloc.c index 58e18df4..2d56e4aa 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -10,17 +10,8 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, /* Runtime configuration options. */ const char *je_malloc_conf; -#ifdef JEMALLOC_DEBUG -bool opt_abort = true; -# ifdef JEMALLOC_FILL -bool opt_junk = true; -# else -bool opt_junk = false; -# endif -#else -bool opt_abort = false; -bool opt_junk = false; -#endif +bool opt_abort = config_debug; +bool opt_junk = (config_debug && config_fill); size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; From 42ba90eb7f9e12d9cf6d7f9be82e239f0ffb04f5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 12:55:42 -0800 Subject: [PATCH 17/21] Update phony targets. Submitted by Frederik Deweerdt. --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0062747c..74810472 100644 --- a/Makefile.in +++ b/Makefile.in @@ -113,9 +113,9 @@ COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) -.PHONY: all dist doc_html doc_man doc +.PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib -.PHONY: install_html install_man install_doc install +.PHONY: install_doc_html install_doc_man install_doc install .PHONY: tests check clean distclean relclean .SECONDARY : $(CTESTOBJS) From 2625c8968e88de435d6452e6f202c8dbdeb1775b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 16:46:27 -0800 Subject: [PATCH 18/21] Fix quoting bug in --without-export implementation. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8c130827..c270662b 100644 --- a/configure.ac +++ b/configure.ac @@ -475,7 +475,7 @@ done AC_ARG_WITH([export], [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])], [if test "x$with_export" = "xno"; then - AC_DEFINE([JEMALLOC_EXPORT],[])] + AC_DEFINE([JEMALLOC_EXPORT],[]) fi] ) From d1b6e18a99caf7e0c38707f4aed7ec8c492e0424 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 16:54:26 -0800 Subject: [PATCH 19/21] Revert opt_abort and opt_junk refactoring. Revert refactoring of opt_abort and opt_junk declarations. clang accepts the config_*-based declarations (and generates correct code), but gcc complains with: error: initializer element is not constant --- src/jemalloc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 2d56e4aa..c117685a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -10,8 +10,20 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, /* Runtime configuration options. */ const char *je_malloc_conf; -bool opt_abort = config_debug; -bool opt_junk = (config_debug && config_fill); +bool opt_abort = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +bool opt_junk = +#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL)) + true +#else + false +#endif + ; size_t opt_quarantine = ZU(0); bool opt_redzone = false; bool opt_utrace = false; From dd0438ee6b7b3640516d5a48feec1490ca2f1cc3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 20:43:04 -0800 Subject: [PATCH 20/21] Specify 'inline' in addition to always_inline attribute. Specify both inline and __attribute__((always_inline)), in order to avoid warnings when using newer versions of gcc. --- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 13a2ffb7..c606c122 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -241,7 +241,7 @@ static const bool config_ivsalloc = # define JEMALLOC_ENABLE_INLINE # ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ALWAYS_INLINE \ - static JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) + static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) # else # define JEMALLOC_ALWAYS_INLINE static inline # endif From b5681fb20c17478f2193fead19b7788807e39996 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 22 Jan 2013 22:45:09 -0800 Subject: [PATCH 21/21] Updated ChangeLog for 3.3.0. --- COPYING | 4 ++-- ChangeLog | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/COPYING b/COPYING index e27fc4d6..019e8132 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2012 Jason Evans . +Copyright (C) 2002-2013 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2013 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index 374459de..65782253 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,12 +6,22 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.x.x (Not yet released) +* 3.3.0 (January 23, 2013) + + This version includes a few minor performance improvements in addition to the + listed new features and bug fixes. + + New features: + - Add clipping support to lg_chunk option processing. + - Add the --enable-ivsalloc option. + - Add the --without-export option. + - Add the --disable-zone-allocator option. Bug fixes: - Fix "arenas.extend" mallctl to output the number of arenas. - Fix chunk_recycyle() to unconditionally inform Valgrind that returned memory is undefined. + - Fix build break on FreeBSD related to alloca.h. * 3.2.0 (November 9, 2012)