From da9dde0854b2240882867f192a59ad391f4bf92b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 1 Nov 2011 20:48:31 -0700 Subject: [PATCH 001/205] Clean up rb documentation. --- include/jemalloc/internal/rb.h | 92 +++++++++++++++++----------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index ee9b009d..7b675f09 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -223,88 +223,88 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * The following API is generated: * * static void - * ex_new(ex_t *extree); + * ex_new(ex_t *tree); * Description: Initialize a red-black tree structure. * Args: - * extree: Pointer to an uninitialized red-black tree object. + * tree: Pointer to an uninitialized red-black tree object. * * static ex_node_t * - * ex_first(ex_t *extree); + * ex_first(ex_t *tree); * static ex_node_t * - * ex_last(ex_t *extree); - * Description: Get the first/last node in extree. + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * Ret: First/last node in extree, or NULL if extree is empty. + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. * * static ex_node_t * - * ex_next(ex_t *extree, ex_node_t *node); + * ex_next(ex_t *tree, ex_node_t *node); * static ex_node_t * - * ex_prev(ex_t *extree, ex_node_t *node); + * ex_prev(ex_t *tree, ex_node_t *node); * Description: Get node's successor/predecessor. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : A node in extree. - * Ret: node's successor/predecessor in extree, or NULL if node is + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is * last/first. * * static ex_node_t * - * ex_search(ex_t *extree, ex_node_t *key); + * ex_search(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or NULL if no match. + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *extree, ex_node_t *key); + * ex_nsearch(ex_t *tree, ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *extree, ex_node_t *key); + * ex_psearch(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were - * key in extree. + * key in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or if no match, hypothetical - * node's successor/predecessor (NULL if no successor/predecessor). + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). * * static void - * ex_insert(ex_t *extree, ex_node_t *node); - * Description: Insert node into extree. + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node to be inserted into extree. + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. * * static void - * ex_remove(ex_t *extree, ex_node_t *node); - * Description: Remove node from extree. + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node in extree to be removed. + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. * * static ex_node_t * - * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); * static ex_node_t * - * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over extree, starting at node. - * If extree is modified, iteration must be immediately + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately * terminated by the callback function that causes the * modification. * Args: - * extree: Pointer to an initialized red-black tree object. - * start : Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying extree. - * arg : Opaque pointer passed to cb(). + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. */ From 2bd3cbc5c68bb9b097c382108ae1aed793e08062 Mon Sep 17 00:00:00 2001 From: Antony Dovgal Date: Thu, 13 Oct 2011 09:33:33 +0400 Subject: [PATCH 002/205] add autogenerated jemalloc.sh wrapper script --- .gitignore | 1 + Makefile.in | 2 +- bin/jemalloc.sh.in | 9 +++++++++ configure.ac | 20 +++++++++++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 bin/jemalloc.sh.in diff --git a/.gitignore b/.gitignore index 32b4c424..1a9bb068 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ !test/*.c !test/*.exp /VERSION +/bin/jemalloc.sh diff --git a/Makefile.in b/Makefile.in index de7492f9..6f66e4d2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -42,7 +42,7 @@ TEST_LIBRARY_PATH := endif # Lists of files. -BINS := @srcroot@bin/pprof +BINS := @srcroot@bin/pprof @objroot@bin/jemalloc.sh CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in new file mode 100644 index 00000000..4d13cc6c --- /dev/null +++ b/bin/jemalloc.sh.in @@ -0,0 +1,9 @@ +#!/bin/sh + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ + +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SHLIB_SUFFIX_NAME@.1 +export @LD_PRELOAD_VAR@ +exec "$@" diff --git a/configure.ac b/configure.ac index b58aa520..688e0c83 100644 --- a/configure.ac +++ b/configure.ac @@ -167,6 +167,9 @@ case "${host_cpu}" in esac AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) +LD_PRELOAD_VAR="LD_PRELOAD" +SHLIB_SUFFIX_NAME="so" + dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally dnl not worth the trouble. @@ -180,6 +183,8 @@ case "${host}" in abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="" + LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" + SHLIB_SUFFIX_NAME="dylib" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -217,6 +222,17 @@ case "${host}" in CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" ;; + *-ibm-aix*) + if "$LG_SIZEOF_PTR" = "8"; then + dnl 64bit AIX + LD_PRELOAD_VAR="LDR_PRELOAD64" + else + dnl 32bit AIX + LD_PRELOAD_VAR="LDR_PRELOAD" + fi + abi="xcoff" + RPATH="-Wl,-rpath," + ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" @@ -225,6 +241,8 @@ case "${host}" in esac AC_SUBST([abi]) AC_SUBST([RPATH]) +AC_SUBST([LD_PRELOAD_VAR]) +AC_SUBST([SHLIB_SUFFIX_NAME]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -881,7 +899,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup]) dnl ============================================================================ dnl Generate outputs. -AC_CONFIG_FILES([$cfgoutputs_tup config.stamp]) +AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh]) AC_SUBST([cfgoutputs_in]) AC_SUBST([cfgoutputs_out]) AC_OUTPUT From f576c63f1eb29ce32e930501f65c541ff344e912 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 1 Nov 2011 22:27:41 -0700 Subject: [PATCH 003/205] Refactor SO and REV make variables. Refactor the SO and REV such that they are set via autoconf variables, @so@ and @rev@. These variables are both needed by the jemalloc.sh script, so this unifies their definitions. --- Makefile.in | 5 ++--- bin/jemalloc.sh.in | 2 +- configure.ac | 13 +++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile.in b/Makefile.in index 6f66e4d2..82983892 100644 --- a/Makefile.in +++ b/Makefile.in @@ -27,14 +27,13 @@ endif LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ +SO := @so@ ifeq (macho, @abi@) -SO := dylib WL_SONAME := dylib_install_name else -SO := so WL_SONAME := soname endif -REV := 1 +REV := @rev@ ifeq (macho, @abi@) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib else diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index 4d13cc6c..56cdfaf4 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SHLIB_SUFFIX_NAME@.1 +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@so@.@rev@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 688e0c83..699f931c 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,10 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( dnl ============================================================================ +dnl Library revision. +rev=1 +AC_SUBST([rev]) + srcroot=$srcdir if test "x${srcroot}" = "x." ; then srcroot="" @@ -168,7 +172,7 @@ esac AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" -SHLIB_SUFFIX_NAME="so" +so="so" dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -184,7 +188,7 @@ case "${host}" in AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" - SHLIB_SUFFIX_NAME="dylib" + so="dylib" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -242,7 +246,7 @@ esac AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) -AC_SUBST([SHLIB_SUFFIX_NAME]) +AC_SUBST([so]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -907,7 +911,8 @@ AC_OUTPUT dnl ============================================================================ dnl Print out the results of configuration. AC_MSG_RESULT([===============================================================================]) -AC_MSG_RESULT([jemalloc version : $jemalloc_version]) +AC_MSG_RESULT([jemalloc version : ${jemalloc_version}]) +AC_MSG_RESULT([library revision : ${rev}]) AC_MSG_RESULT([]) AC_MSG_RESULT([CC : ${CC}]) AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) From 8e6f8b490dbd4b9ae715267fd401f09a056f92c4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 3 Nov 2011 18:40:03 -0700 Subject: [PATCH 004/205] Initialize arenas_tsd before setting it. Reported by: Ethan Burns, Rich Prohaska, Tudor Bosman --- src/jemalloc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd8bf52f..fd6b890a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -769,6 +769,14 @@ malloc_init_hard(void) } #endif + if (malloc_mutex_init(&arenas_lock)) + return (true); + + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -795,14 +803,6 @@ malloc_init_hard(void) ARENA_SET(arenas[0]); arenas[0]->nthreads++; - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - #ifdef JEMALLOC_PROF if (prof_boot2()) { malloc_mutex_unlock(&init_lock); From 30fbef8aeaf65fcd6b265fb9f551e7c2ec8cb22f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:06:55 -0700 Subject: [PATCH 005/205] Fix rallocm() test to support >4KiB pages. --- src/jemalloc.c | 2 +- test/rallocm.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index fd6b890a..a161c2e2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -689,7 +689,7 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); - pagesize = (unsigned)result; + pagesize = (size_t)result; /* * We assume that pagesize is a power of 2 when calculating diff --git a/test/rallocm.c b/test/rallocm.c index a8cadebc..ccf326bb 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -1,6 +1,8 @@ #include #include +#include #include +#include #define JEMALLOC_MANGLE #include "jemalloc_test.h" @@ -8,12 +10,20 @@ int main(void) { + size_t pagesize; void *p, *q; size_t sz, tsz; int r; fprintf(stderr, "Test begin\n"); + /* Get page size. */ + { + long result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (size_t)result; + } + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); @@ -66,7 +76,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -78,7 +88,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -88,7 +98,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -99,7 +109,7 @@ main(void) } sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); + r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) From ca9ee1a409c32e052ab04ca727bbc257a43795fc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 5 Nov 2011 21:46:23 -0700 Subject: [PATCH 006/205] Update ChangeLog for 2.2.4. --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 66032b26..61979683 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,13 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.4 (November 5, 2011) + + Bug fixes: + - Initialize arenas_tsd before using it. This bug existed for 2.2.[0-3], as + well as for --disable-tls builds in earlier releases. + - Do not assume a 4 KiB page size in test/rallocm.c. + * 2.2.3 (August 31, 2011) This version fixes numerous bugs related to heap profiling. From fa351d9fdcbbbfe7455279311fdf3d65751a4e75 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 Nov 2011 11:55:19 -0800 Subject: [PATCH 007/205] Fix huge_ralloc() race when using mremap(2). Fix huge_ralloc() to remove the old memory region from tree of huge allocations *before* calling mremap(2), in order to make sure that no other thread acquires the old memory region via mmap() and encounters stale metadata in the tree. Reported by: Rich Prohaska --- src/huge.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/huge.c b/src/huge.c index ac3f3a0d..5ee9f549 100644 --- a/src/huge.c +++ b/src/huge.c @@ -234,6 +234,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, ) { size_t newsize = huge_salloc(ret); + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, ret) == MAP_FAILED) { /* @@ -253,9 +260,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - idalloc(ptr); - } else - huge_dalloc(ptr, false); + chunk_dealloc(ptr, oldsize); + } } else #endif { From 12a488782681cbd740a5f54e0b7e74ea84858e21 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:41:59 -0800 Subject: [PATCH 008/205] Fix huge_ralloc to maintain chunk statistics. Fix huge_ralloc() to properly maintain chunk statistics when using mremap(2). --- include/jemalloc/internal/chunk.h | 2 +- src/arena.c | 2 +- src/chunk.c | 16 +++++++++------- src/huge.c | 11 ++++++----- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index a60f0ad7..54b6a3ec 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -50,7 +50,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size); +void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/arena.c b/src/arena.c index e749c1d5..d166ca1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -569,7 +569,7 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena->ndirty -= spare->ndirty; } malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); + chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); #ifdef JEMALLOC_STATS arena->stats.mapped -= chunksize; diff --git a/src/chunk.c b/src/chunk.c index 301519e8..d190c6f4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -70,7 +70,7 @@ RETURN: #ifdef JEMALLOC_IVSALLOC if (base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size); + chunk_dealloc(ret, size, true); return (NULL); } } @@ -108,7 +108,7 @@ RETURN: } void -chunk_dealloc(void *chunk, size_t size) +chunk_dealloc(void *chunk, size_t size, bool unmap) { assert(chunk != NULL); @@ -125,15 +125,17 @@ chunk_dealloc(void *chunk, size_t size) malloc_mutex_unlock(&chunks_mtx); #endif + if (unmap) { #ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; #endif #ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; + if (chunk_dealloc_dss(chunk, size) == false) + return; #endif - chunk_dealloc_mmap(chunk, size); + chunk_dealloc_mmap(chunk, size); + } } bool diff --git a/src/huge.c b/src/huge.c index 5ee9f549..a4f9b054 100644 --- a/src/huge.c +++ b/src/huge.c @@ -110,12 +110,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (offset == 0) { /* Trim trailing space. */ chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size); + - chunk_size, true); } else { size_t trailsize; /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset); + chunk_dealloc(ret, alignment - offset, true); ret = (void *)((uintptr_t)ret + (alignment - offset)); @@ -124,7 +124,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) /* Trim trailing space. */ assert(trailsize < alloc_size); chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize); + trailsize, true); } } @@ -260,7 +260,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, if (opt_abort) abort(); memcpy(ret, ptr, copysize); - chunk_dealloc(ptr, oldsize); + chunk_dealloc_mmap(ptr, oldsize); } } else #endif @@ -301,9 +301,10 @@ huge_dalloc(void *ptr, bool unmap) memset(node->addr, 0x5a, node->size); #endif #endif - chunk_dealloc(node->addr, node->size); } + chunk_dealloc(node->addr, node->size, unmap); + base_node_dealloc(node); } From 334cc021422869329f08349e088e7f491318e087 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 Nov 2011 14:46:04 -0800 Subject: [PATCH 009/205] Fix malloc_stats_print(..., "a") output. Fix the logic in stats_print() such that if the "a" flag is passed in without the "m" flag, merged statistics will be printed even if only one arena is initialized. --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index cbbbb5ba..dc172e42 100644 --- a/src/stats.c +++ b/src/stats.c @@ -748,7 +748,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, ninitialized++; } - if (ninitialized > 1) { + if (ninitialized > 1 || unmerged == false) { /* Print merged arena stats. */ malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); From b3bd885090230cc28add77c399b4ed440b760ca3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Nov 2011 17:12:45 -0800 Subject: [PATCH 010/205] Update ChangeLog for 2.2.5. --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 61979683..326ee7a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,14 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 2.2.5 (November 14, 2011) + + Bug fixes: + - Fix huge_ralloc() race when using mremap(2). This is a serious bug that + could cause memory corruption and/or crashes. + - Fix huge_ralloc() to maintain chunk statistics. + - Fix malloc_stats_print(..., "a") output. + * 2.2.4 (November 5, 2011) Bug fixes: From 7372b15a31c63ac5cb9ed8aeabc2a0a3c005e8bf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 10 Feb 2012 20:22:09 -0800 Subject: [PATCH 011/205] Reduce cpp conditional logic complexity. Convert configuration-related cpp conditional logic to use static constant variables, e.g.: #ifdef JEMALLOC_DEBUG [...] #endif becomes: if (config_debug) { [...] } The advantage is clearer, more concise code. The main disadvantage is that data structures no longer have conditionally defined fields, so they pay the cost of all fields regardless of whether they are used. In practice, this is only a minor concern; config_stats will go away in an upcoming change, and config_prof is the only other major feature that depends on more than a few special-purpose fields. --- configure.ac | 4 +- include/jemalloc/internal/arena.h | 108 +-- include/jemalloc/internal/chunk.h | 6 - include/jemalloc/internal/chunk_dss.h | 2 - include/jemalloc/internal/chunk_swap.h | 4 - include/jemalloc/internal/ckh.h | 2 - include/jemalloc/internal/ctl.h | 6 - include/jemalloc/internal/extent.h | 6 - include/jemalloc/internal/huge.h | 4 - .../jemalloc/internal/jemalloc_internal.h.in | 169 +++- include/jemalloc/internal/mutex.h | 12 +- include/jemalloc/internal/prof.h | 15 +- include/jemalloc/internal/stats.h | 21 - include/jemalloc/internal/tcache.h | 114 +-- include/jemalloc/jemalloc_defs.h.in | 6 +- src/arena.c | 734 +++++++----------- src/chunk.c | 104 +-- src/chunk_dss.c | 14 +- src/chunk_swap.c | 43 +- src/ckh.c | 17 +- src/ctl.c | 620 ++++++--------- src/extent.c | 2 - src/huge.c | 80 +- src/jemalloc.c | 608 +++++---------- src/prof.c | 75 +- src/stats.c | 13 +- src/tcache.c | 130 ++-- 27 files changed, 1194 insertions(+), 1725 deletions(-) diff --git a/configure.ac b/configure.ac index 699f931c..9617a5e3 100644 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,9 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +dnl Heap profiling uses the log(3) function. +LIBS="$LIBS -lm" + dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally dnl not worth the trouble. @@ -553,7 +556,6 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then - LIBS="$LIBS -lm" AC_DEFINE([JEMALLOC_PROF], [ ]) fi AC_SUBST([enable_prof]) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b80c118d..b6a5c23d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -16,11 +16,9 @@ #define SUBPAGE_CEILING(s) \ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) -#ifdef JEMALLOC_TINY - /* Smallest size class to support. */ -# define LG_TINY_MIN LG_SIZEOF_PTR -# define TINY_MIN (1U << LG_TINY_MIN) -#endif +/* Smallest size class to support. */ +#define LG_TINY_MIN LG_SIZEOF_PTR +#define TINY_MIN (1U << LG_TINY_MIN) /* * Maximum size class that is a multiple of the quantum, but not (necessarily) @@ -85,6 +83,15 @@ typedef struct arena_s arena_t; /* Each element of the chunk map corresponds to one page within the chunk. */ struct arena_chunk_map_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif union { /* * Linkage for run trees. There are two disjoint uses: @@ -103,9 +110,10 @@ struct arena_chunk_map_s { ql_elm(arena_chunk_map_t) ql_link; } u; -#ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ prof_ctx_t *prof_ctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ #endif /* @@ -162,10 +170,8 @@ struct arena_chunk_map_s { * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; -#ifdef JEMALLOC_PROF #define CHUNK_MAP_CLASS_SHIFT 4 #define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#endif #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) @@ -205,10 +211,8 @@ struct arena_chunk_s { typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { -#ifdef JEMALLOC_DEBUG uint32_t magic; # define ARENA_RUN_MAGIC 0x384adf93 -#endif /* Bin this run is associated with. */ arena_bin_t *bin; @@ -247,13 +251,11 @@ struct arena_bin_info_s { */ bitmap_info_t bitmap_info; -#ifdef JEMALLOC_PROF /* * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). + * class, or 0 if (config_prof == false || opt_prof == false). */ uint32_t ctx0_offset; -#endif /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; @@ -283,17 +285,13 @@ struct arena_bin_s { */ arena_run_tree_t runs; -#ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; -#endif }; struct arena_s { -#ifdef JEMALLOC_DEBUG uint32_t magic; # define ARENA_MAGIC 0x947d3d24 -#endif /* This arena's index within the arenas array. */ unsigned ind; @@ -314,20 +312,14 @@ struct arena_s { */ malloc_mutex_t lock; -#ifdef JEMALLOC_STATS arena_stats_t stats; -# ifdef JEMALLOC_TCACHE /* * List of tcaches for extant threads associated with this arena. * Stats from these are merged incrementally, and at exit. */ ql_head(tcache_t) tcache_ql; -# endif -#endif -#ifdef JEMALLOC_PROF uint64_t prof_accumbytes; -#endif /* List of dirty-page-containing chunks this arena manages. */ ql_head(arena_chunk_t) chunks_dirty; @@ -455,35 +447,23 @@ extern size_t sspace_max; #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -#ifdef JEMALLOC_PROF void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif -#ifdef JEMALLOC_TCACHE void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ); -#endif + size_t binind, uint64_t prof_accumbytes); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_malloc(size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr); -#ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -#endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#ifdef JEMALLOC_STATS void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -#endif void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, @@ -499,10 +479,8 @@ bool arena_boot(void); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -# ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -# endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -521,7 +499,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) unsigned shift, diff, regind; size_t size; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); /* * Freeing a pointer lower than region zero can cause assertion * failure. @@ -586,7 +564,6 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) return (regind); } -#ifdef JEMALLOC_PROF JEMALLOC_INLINE prof_ctx_t * arena_prof_ctx_get(const void *ptr) { @@ -594,6 +571,7 @@ arena_prof_ctx_get(const void *ptr) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -612,7 +590,7 @@ arena_prof_ctx_get(const void *ptr) arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * @@ -630,6 +608,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -647,7 +626,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); binind = arena_bin_index(chunk->arena, bin); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -659,7 +638,6 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else chunk->map[pageind-map_bias].prof_ctx = ctx; } -#endif JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) @@ -668,7 +646,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -678,63 +656,57 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ -#ifdef JEMALLOC_TCACHE tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) + if (config_tcache && (tcache = tcache_get()) != NULL) tcache_dalloc_small(tcache, ptr); else { -#endif arena_run_t *run; arena_bin_t *bin; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; -#ifdef JEMALLOC_DEBUG - { + if (config_debug) { size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = + UNUSED arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == 0); } -#endif malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_TCACHE } -#endif } else { -#ifdef JEMALLOC_TCACHE - size_t size = mapelm->bits & ~PAGE_MASK; + if (config_tcache) { + size_t size = mapelm->bits & ~PAGE_MASK; - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + if (size <= tcache_maxclass) { + tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_large(tcache, ptr, size); + else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } else { malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); } } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); } -#else - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); -#endif } } #endif diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 54b6a3ec..4cc1e80e 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -28,20 +28,14 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; -#ifdef JEMALLOC_SWAP extern bool opt_overcommit; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ extern chunk_stats_t stats_chunks; -#endif -#ifdef JEMALLOC_IVSALLOC extern rtree_t *chunks_rtree; -#endif extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 6f005222..35cd461a 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_DSS /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -27,4 +26,3 @@ bool chunk_dss_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h index 9faa739f..99a079eb 100644 --- a/include/jemalloc/internal/chunk_swap.h +++ b/include/jemalloc/internal/chunk_swap.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_SWAP /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -15,9 +14,7 @@ extern bool swap_enabled; extern bool swap_prezeroed; extern size_t swap_nfds; extern int *swap_fds; -#ifdef JEMALLOC_STATS extern size_t swap_avail; -#endif void *chunk_alloc_swap(size_t size, bool *zero); bool chunk_in_swap(void *chunk); @@ -31,4 +28,3 @@ bool chunk_swap_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 3e4ad4c8..28f171c8 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -30,10 +30,8 @@ struct ckhc_s { }; struct ckh_s { -#ifdef JEMALLOC_DEBUG #define CKH_MAGIC 0x3af2489d uint32_t magic; -#endif #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index f1f5eb70..31f9d99b 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -32,7 +32,6 @@ struct ctl_arena_stats_s { unsigned nthreads; size_t pactive; size_t pdirty; -#ifdef JEMALLOC_STATS arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ @@ -43,11 +42,9 @@ struct ctl_arena_stats_s { malloc_bin_stats_t *bstats; /* nbins elements. */ malloc_large_stats_t *lstats; /* nlclasses elements. */ -#endif }; struct ctl_stats_s { -#ifdef JEMALLOC_STATS size_t allocated; size_t active; size_t mapped; @@ -61,11 +58,8 @@ struct ctl_stats_s { uint64_t nmalloc; /* huge_nmalloc */ uint64_t ndalloc; /* huge_ndalloc */ } huge; -#endif ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -#ifdef JEMALLOC_SWAP size_t swap_avail; -#endif }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h index 6fe9702b..36af8be8 100644 --- a/include/jemalloc/internal/extent.h +++ b/include/jemalloc/internal/extent.h @@ -9,18 +9,14 @@ typedef struct extent_node_s extent_node_t; /* Tree of extents. */ struct extent_node_s { -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) /* Linkage for the size/address-ordered tree. */ rb_node(extent_node_t) link_szad; -#endif /* Linkage for the address-ordered tree. */ rb_node(extent_node_t) link_ad; -#ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ prof_ctx_t *prof_ctx; -#endif /* Pointer to the extent that this tree node is responsible for. */ void *addr; @@ -34,9 +30,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) -#endif rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 66544cf8..3a6b0b87 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,12 +9,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#ifdef JEMALLOC_STATS /* Huge allocation statistics. */ extern uint64_t huge_nmalloc; extern uint64_t huge_ndalloc; extern size_t huge_allocated; -#endif /* Protects chunk-related data structures. */ extern malloc_mutex_t huge_mtx; @@ -27,10 +25,8 @@ void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); -#ifdef JEMALLOC_PROF prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -#endif bool huge_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a44f0978..8842e4bf 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -35,6 +35,125 @@ #include "jemalloc/internal/private_namespace.h" +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool config_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_dynamic_page_shift = +#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_swap = +#ifdef JEMALLOC_SWAP + true +#else + false +#endif + ; +static const bool config_sysv = +#ifdef JEMALLOC_SYSV + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tiny = +#ifdef JEMALLOC_TINY + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif @@ -82,11 +201,11 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); # endif #endif -#ifdef JEMALLOC_DEBUG -# define dassert(e) assert(e) -#else -# define dassert(e) -#endif +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), @@ -265,30 +384,20 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #endif #include "jemalloc/internal/prof.h" -#ifdef JEMALLOC_STATS typedef struct { uint64_t allocated; uint64_t deallocated; } thread_allocated_t; -#endif #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS extern bool opt_abort; -#ifdef JEMALLOC_FILL extern bool opt_junk; -#endif -#ifdef JEMALLOC_SYSV extern bool opt_sysv; -#endif -#ifdef JEMALLOC_XMALLOC extern bool opt_xmalloc; -#endif -#ifdef JEMALLOC_FILL extern bool opt_zero; -#endif extern size_t opt_narenas; #ifdef DYNAMIC_PAGE_SHIFT @@ -327,8 +436,7 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifdef JEMALLOC_STATS -# ifndef NO_TLS +#ifndef NO_TLS extern __thread thread_allocated_t thread_allocated_tls; # define ALLOCATED_GET() (thread_allocated_tls.allocated) # define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) @@ -338,10 +446,7 @@ extern __thread thread_allocated_t thread_allocated_tls; thread_allocated_tls.allocated += a; \ thread_allocated_tls.deallocated += d; \ } while (0) -# else -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - +#else # define ALLOCATED_GET() (thread_allocated_get()->allocated) # define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) # define DEALLOCATED_GET() (thread_allocated_get()->deallocated) @@ -351,8 +456,9 @@ thread_allocated_t *thread_allocated_get_hard(void); thread_allocated->allocated += (a); \ thread_allocated->deallocated += (d); \ } while (0) -# endif #endif +extern pthread_key_t thread_allocated_tsd; +thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); @@ -403,9 +509,7 @@ size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); -# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) thread_allocated_t *thread_allocated_get(void); -# endif #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -565,7 +669,6 @@ choose_arena(void) return (ret); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) JEMALLOC_INLINE thread_allocated_t * thread_allocated_get(void) { @@ -577,7 +680,6 @@ thread_allocated_get(void) return (thread_allocated); } #endif -#endif #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" @@ -593,9 +695,7 @@ void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t usize, size_t alignment, bool zero); size_t isalloc(const void *ptr); -# ifdef JEMALLOC_IVSALLOC size_t ivsalloc(const void *ptr); -# endif void idalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); @@ -674,20 +774,18 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); -#ifdef JEMALLOC_PROF - ret = arena_salloc_demote(ptr); -#else - ret = arena_salloc(ptr); -#endif + if (config_prof) + ret = arena_salloc_demote(ptr); + else + ret = arena_salloc(ptr); } else ret = huge_salloc(ptr); return (ret); } -#ifdef JEMALLOC_IVSALLOC JEMALLOC_INLINE size_t ivsalloc(const void *ptr) { @@ -698,7 +796,6 @@ ivsalloc(const void *ptr) return (isalloc(ptr)); } -#endif JEMALLOC_INLINE void idalloc(void *ptr) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 62947ced..6a7b4fce 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -3,14 +3,14 @@ #ifdef JEMALLOC_OSSPIN typedef OSSpinLock malloc_mutex_t; +#define MALLOC_MUTEX_INITIALIZER 0 #else typedef pthread_mutex_t malloc_mutex_t; -#endif - -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -#else -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# endif #endif #endif /* JEMALLOC_H_TYPES */ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e9064ba6..d4700808 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -297,6 +296,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) uint64_t r; double u; + cassert(config_prof); + /* * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). @@ -329,12 +330,13 @@ prof_ctx_get(const void *ptr) prof_ctx_t *ret; arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); ret = arena_prof_ctx_get(ptr); } else @@ -348,12 +350,13 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); arena_prof_ctx_set(ptr, ctx); } else @@ -365,6 +368,7 @@ prof_sample_accum_update(size_t size) { prof_tdata_t *prof_tdata; + cassert(config_prof); /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); @@ -391,6 +395,7 @@ JEMALLOC_INLINE void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) { + cassert(config_prof); assert(ptr != NULL); assert(size == isalloc(ptr)); @@ -437,6 +442,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, { prof_thr_cnt_t *told_cnt; + cassert(config_prof); assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { @@ -510,6 +516,8 @@ prof_free(const void *ptr, size_t size) { prof_ctx_t *ctx = prof_ctx_get(ptr); + cassert(config_prof); + if ((uintptr_t)ctx > (uintptr_t)1) { assert(size == isalloc(ptr)); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); @@ -544,4 +552,3 @@ prof_free(const void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 2a9b31d9..64ba4bd7 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -3,23 +3,16 @@ #define UMAX2S_BUFSIZE 65 -#ifdef JEMALLOC_STATS typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; typedef struct arena_stats_s arena_stats_t; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) typedef struct chunk_stats_s chunk_stats_t; -#endif #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -#ifdef JEMALLOC_STATS - -#ifdef JEMALLOC_TCACHE struct tcache_bin_stats_s { /* * Number of allocation requests that corresponded to the size of this @@ -27,7 +20,6 @@ struct tcache_bin_stats_s { */ uint64_t nrequests; }; -#endif struct malloc_bin_stats_s { /* @@ -52,13 +44,11 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; -#ifdef JEMALLOC_TCACHE /* Number of tcache fills from this bin. */ uint64_t nfills; /* Number of tcache flushes to this bin. */ uint64_t nflushes; -#endif /* Total number of runs created for this bin's size class. */ uint64_t nruns; @@ -127,14 +117,10 @@ struct arena_stats_s { */ malloc_large_stats_t *lstats; }; -#endif /* JEMALLOC_STATS */ -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) struct chunk_stats_s { -# ifdef JEMALLOC_STATS /* Number of chunks that were allocated. */ uint64_t nchunks; -# endif /* High-water mark for number of chunks allocated. */ size_t highchunks; @@ -146,7 +132,6 @@ struct chunk_stats_s { */ size_t curchunks; }; -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ @@ -154,24 +139,19 @@ struct chunk_stats_s { extern bool opt_stats_print; -#ifdef JEMALLOC_STATS extern size_t stats_cactive; -#endif char *u2s(uint64_t x, unsigned base, char *s); -#ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); void malloc_printf(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); -#endif void stats_print(void (*write)(void *, const char *), void *cbopaque, const char *opts); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#ifdef JEMALLOC_STATS #ifndef JEMALLOC_ENABLE_INLINE size_t stats_cactive_get(void); @@ -202,6 +182,5 @@ stats_cactive_sub(size_t size) } #endif -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index da3c68c5..0855d32e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -42,9 +42,7 @@ struct tcache_bin_info_s { }; struct tcache_bin_s { -# ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; -# endif int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ @@ -52,12 +50,8 @@ struct tcache_bin_s { }; struct tcache_s { -# ifdef JEMALLOC_STATS ql_elm(tcache_t) link; /* Used for aggregating stats. */ -# endif -# ifdef JEMALLOC_PROF uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ -# endif arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ @@ -109,23 +103,15 @@ extern size_t tcache_maxclass; /* Number of tcache allocation/deallocation events between incremental GCs. */ extern unsigned tcache_gc_incr; -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_destroy(tcache_t *tcache); -#ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -#endif bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ @@ -195,19 +181,11 @@ tcache_event(tcache_t *tcache) if (binind < nbins) { tcache_bin_flush_small(tbin, binind, tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + (tbin->low_water >> 2), tcache); } else { tcache_bin_flush_large(tbin, binind, tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + (tbin->low_water >> 2), tcache); } /* * Reduce fill count by 2X. Limit lg_fill_div such that @@ -268,21 +246,19 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; tcache_event(tcache); return (ret); } @@ -309,28 +285,28 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { -#ifdef JEMALLOC_PROF - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); - chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; -#endif + if (config_prof) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + PAGE_SHIFT); + chunk->map[pageind-map_bias].bits &= + ~CHUNK_MAP_CLASS_MASK; + } if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += size; } tcache_event(tcache); @@ -357,26 +333,20 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); assert(binind < nbins); -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, arena_bin_info[binind].reg_size); -#endif tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; @@ -403,20 +373,14 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = nbins + (size >> PAGE_SHIFT) - 1; -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, size); -#endif tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 9ac7e1c2..d8052e2b 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -48,9 +48,11 @@ /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_CATTR(s, a) __attribute__((s)) +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #else -# define JEMALLOC_ATTR(s) +# define JEMALLOC_CATTR(s, a) a +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ diff --git a/src/arena.c b/src/arena.c index d166ca1e..356b628d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,9 +188,7 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); static bool small_size2bin_init(void); -#ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void); -#endif static bool small_size2bin_init_hard(void); static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size); @@ -211,8 +209,8 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) } /* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t, - arena_chunk_map_t, u.rb_link, arena_run_comp) +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, + u.rb_link, arena_run_comp) static inline int arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) @@ -246,8 +244,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) } /* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_t, u.rb_link, arena_avail_comp) +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, + u.rb_link, arena_avail_comp) static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) @@ -257,7 +255,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); @@ -295,17 +293,16 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) run->nfree++; } -#ifdef JEMALLOC_DEBUG static inline void arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; - size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << + PAGE_SHIFT)); for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) assert(p[i] == 0); } -#endif static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, @@ -315,9 +312,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); old_ndirty = chunk->ndirty; @@ -336,13 +330,17 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, rem_pages = total_pages - need_pages; arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << - PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING((arena->nactive + + need_pages) << PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -390,13 +388,10 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk + ((run_ind+i) << PAGE_SHIFT)), 0, PAGE_SIZE); - } -#ifdef JEMALLOC_DEBUG - else { + } else if (config_debug) { arena_chunk_validate_zeroed( chunk, run_ind+i); } -#endif } } else { /* @@ -427,40 +422,34 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk->map[run_ind-map_bias].bits = (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not * actually cause an observable failure. */ - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) arena_chunk_validate_zeroed(chunk, run_ind); -#endif for (i = 1; i < need_pages - 1; i++) { chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) | (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); -#endif } chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - 1) << PAGE_SHIFT) | (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && + if (config_debug && flag_dirty == 0 && (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) == 0) { arena_chunk_validate_zeroed(chunk, run_ind+need_pages-1); } -#endif } } @@ -498,9 +487,8 @@ arena_chunk_alloc(arena_t *arena) malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); -#ifdef JEMALLOC_STATS - arena->stats.mapped += chunksize; -#endif + if (config_stats) + arena->stats.mapped += chunksize; chunk->arena = arena; ql_elm_new(chunk, link_dirty); @@ -526,13 +514,10 @@ arena_chunk_alloc(arena_t *arena) if (zero == false) { for (i = map_bias+1; i < chunk_npages-1; i++) chunk->map[i-map_bias].bits = unzeroed; - } -#ifdef JEMALLOC_DEBUG - else { + } else if (config_debug) { for (i = map_bias+1; i < chunk_npages-1; i++) assert(chunk->map[i-map_bias].bits == unzeroed); } -#endif chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | unzeroed; @@ -571,9 +556,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) malloc_mutex_unlock(&arena->lock); chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.mapped -= chunksize; -#endif + if (config_stats) + arena->stats.mapped -= chunksize; } else arena->spare = chunk; } @@ -677,12 +661,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; size_t pageind, flag_unzeroed; -#ifdef JEMALLOC_DEBUG size_t ndirty; -#endif -#ifdef JEMALLOC_STATS size_t nmadvise; -#endif ql_new(&mapelms); @@ -692,10 +672,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous * mappings, but not for file-backed mappings. */ -# ifdef JEMALLOC_SWAP - swap_enabled ? CHUNK_MAP_UNZEROED : -# endif - 0; + (config_swap && swap_enabled) ? CHUNK_MAP_UNZEROED : 0; #else CHUNK_MAP_UNZEROED; #endif @@ -730,9 +707,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); @@ -755,17 +729,19 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) CHUNK_MAP_ALLOCATED; } -#ifdef JEMALLOC_STATS - /* - * Update stats_cactive if nactive is crossing a - * chunk multiple. - */ - cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is + * crossing a chunk multiple. + */ + size_t cactive_diff = + CHUNK_CEILING((arena->nactive + + npages) << PAGE_SHIFT) - + CHUNK_CEILING(arena->nactive << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } arena->nactive += npages; /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); @@ -782,7 +758,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = @@ -793,53 +769,45 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } assert(pageind == chunk_npages); -#ifdef JEMALLOC_DEBUG - ndirty = chunk->ndirty; -#endif -#ifdef JEMALLOC_STATS - arena->stats.purged += chunk->ndirty; -#endif + if (config_debug) + ndirty = chunk->ndirty; + if (config_stats) + arena->stats.purged += chunk->ndirty; arena->ndirty -= chunk->ndirty; chunk->ndirty = 0; ql_remove(&arena->chunks_dirty, chunk, link_dirty); chunk->dirtied = false; malloc_mutex_unlock(&arena->lock); -#ifdef JEMALLOC_STATS - nmadvise = 0; -#endif + if (config_stats) + nmadvise = 0; ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; size_t npages = mapelm->bits >> PAGE_SHIFT; assert(pageind + npages <= chunk_npages); -#ifdef JEMALLOC_DEBUG assert(ndirty >= npages); - ndirty -= npages; -#endif + if (config_debug) + ndirty -= npages; #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_DONTNEED); +# define MADV_PURGE MADV_DONTNEED #elif defined(JEMALLOC_PURGE_MADVISE_FREE) - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_FREE); +# define MADV_PURGE MADV_FREE #else # error "No method defined for purging unused dirty pages." #endif - -#ifdef JEMALLOC_STATS - nmadvise++; -#endif + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_PURGE); +#undef MADV_PURGE + if (config_stats) + nmadvise++; } -#ifdef JEMALLOC_DEBUG assert(ndirty == 0); -#endif malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.nmadvise += nmadvise; -#endif + if (config_stats) + arena->stats.nmadvise += nmadvise; /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; @@ -859,23 +827,22 @@ arena_purge(arena_t *arena, bool all) { arena_chunk_t *chunk; size_t npurgatory; -#ifdef JEMALLOC_DEBUG - size_t ndirty = 0; + if (config_debug) { + size_t ndirty = 0; - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; + ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { + assert(chunk->dirtied); + ndirty += chunk->ndirty; + } + assert(ndirty == arena->ndirty); } - assert(ndirty == arena->ndirty); -#endif assert(arena->ndirty > arena->npurgatory || all); assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory) || all); -#ifdef JEMALLOC_STATS - arena->stats.npurge++; -#endif + if (config_stats) + arena->stats.npurge++; /* * Compute the minimum number of pages that this thread should try to @@ -957,9 +924,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) @@ -981,13 +945,17 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) size = bin_info->run_size; } run_pages = (size >> PAGE_SHIFT); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - - CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); -#endif + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING(arena->nactive << + PAGE_SHIFT) - CHUNK_CEILING((arena->nactive - run_pages) << + PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_sub(cactive_diff); + } arena->nactive -= run_pages; /* @@ -1144,9 +1112,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, | flag_dirty | (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_DEBUG - { - size_t tail_npages = newsize >> PAGE_SHIFT; + if (config_debug) { + UNUSED size_t tail_npages = newsize >> PAGE_SHIFT; assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & ~PAGE_MASK) == 0); assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] @@ -1156,7 +1123,6 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & CHUNK_MAP_ALLOCATED) != 0); } -#endif chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; @@ -1231,9 +1197,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif + if (config_stats) + bin->stats.reruns++; return (run); } /* No existing runs have any space available. */ @@ -1255,20 +1220,19 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run->nextind = 0; run->nfree = bin_info->nregs; bitmap_init(bitmap, &bin_info->bitmap_info); -#ifdef JEMALLOC_DEBUG - run->magic = ARENA_RUN_MAGIC; -#endif + if (config_debug) + run->magic = ARENA_RUN_MAGIC; } malloc_mutex_unlock(&arena->lock); /********************************/ malloc_mutex_lock(&bin->lock); if (run != NULL) { -#ifdef JEMALLOC_STATS - bin->stats.nruns++; - bin->stats.curruns++; - if (bin->stats.curruns > bin->stats.highruns) - bin->stats.highruns = bin->stats.curruns; -#endif + if (config_stats) { + bin->stats.nruns++; + bin->stats.curruns++; + if (bin->stats.curruns > bin->stats.highruns) + bin->stats.highruns = bin->stats.curruns; + } return (run); } @@ -1291,9 +1255,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif + if (config_stats) + bin->stats.reruns++; return (run); } @@ -1318,7 +1281,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1346,13 +1309,12 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); return (arena_run_reg_alloc(bin->runcur, bin_info)); } -#ifdef JEMALLOC_PROF void arena_prof_accum(arena_t *arena, uint64_t accumbytes) { @@ -1365,15 +1327,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } } -#endif -#ifdef JEMALLOC_TCACHE void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ) +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, + uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; @@ -1382,11 +1339,11 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind assert(tbin->ncached == 0); -#ifdef JEMALLOC_PROF - malloc_mutex_lock(&arena->lock); - arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); -#endif + if (config_prof) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, prof_accumbytes); + malloc_mutex_unlock(&arena->lock); + } bin = &arena->bins[binind]; malloc_mutex_lock(&bin->lock); for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> @@ -1400,17 +1357,16 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind /* Insert such that low regions get used first. */ tbin->avail[nfill - 1 - i] = ptr; } -#ifdef JEMALLOC_STATS - bin->stats.allocated += i * arena_bin_info[binind].reg_size; - bin->stats.nmalloc += i; - bin->stats.nrequests += tbin->tstats.nrequests; - bin->stats.nfills++; - tbin->tstats.nrequests = 0; -#endif + if (config_stats) { + bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.nmalloc += i; + bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.nfills++; + tbin->tstats.nrequests = 0; + } malloc_mutex_unlock(&bin->lock); tbin->ncached = i; } -#endif void * arena_malloc_small(arena_t *arena, size_t size, bool zero) @@ -1436,27 +1392,25 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) return (NULL); } -#ifdef JEMALLOC_STATS - bin->stats.allocated += size; - bin->stats.nmalloc++; - bin->stats.nrequests++; -#endif + if (config_stats) { + bin->stats.allocated += size; + bin->stats.nmalloc++; + bin->stats.nrequests++; + } malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_PROF - if (isthreaded == false) { + if (config_prof && isthreaded == false) { malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, size); malloc_mutex_unlock(&arena->lock); } -#endif if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } else memset(ret, 0, size); @@ -1476,31 +1430,31 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) malloc_mutex_unlock(&arena->lock); return (NULL); } -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, size); -#endif + if (config_prof) + arena_prof_accum(arena, size); malloc_mutex_unlock(&arena->lock); if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } } return (ret); @@ -1514,18 +1468,14 @@ arena_malloc(size_t size, bool zero) assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= small_maxclass) { -#ifdef JEMALLOC_TCACHE tcache_t *tcache; - if ((tcache = tcache_get()) != NULL) + if (config_tcache && (tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else - -#endif return (arena_malloc_small(choose_arena(), size, zero)); } else { -#ifdef JEMALLOC_TCACHE - if (size <= tcache_maxclass) { + if (config_tcache && size <= tcache_maxclass) { tcache_t *tcache; if ((tcache = tcache_get()) != NULL) @@ -1535,7 +1485,6 @@ arena_malloc(size_t size, bool zero) size, zero)); } } else -#endif return (arena_malloc_large(choose_arena(), size, zero)); } } @@ -1586,29 +1535,28 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, } } -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, size); else if (opt_zero) memset(ret, 0, size); } -#endif return (ret); } @@ -1631,7 +1579,7 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1647,7 +1595,6 @@ arena_salloc(const void *ptr) return (ret); } -#ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size) { @@ -1685,7 +1632,7 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1707,7 +1654,6 @@ arena_salloc_demote(const void *ptr) return (ret); } -#endif static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, @@ -1781,16 +1727,14 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << PAGE_SHIFT), false); /* npages = past - run_ind; */ } -#ifdef JEMALLOC_DEBUG - run->magic = 0; -#endif + if (config_debug) + run->magic = 0; arena_run_dalloc(arena, run, true); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - bin->stats.curruns--; -#endif + if (config_stats) + bin->stats.curruns--; } static void @@ -1836,25 +1780,20 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind; arena_run_t *run; arena_bin_t *bin; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) size_t size; -#endif pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin_info->reg_size; -#endif + if (config_fill || config_stats) + size = bin_info->reg_size; -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, size); -#endif arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { @@ -1863,13 +1802,12 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } else if (run->nfree == 1 && run != bin->runcur) arena_bin_lower_run(arena, chunk, run, bin); -#ifdef JEMALLOC_STATS - bin->stats.allocated -= size; - bin->stats.ndalloc++; -#endif + if (config_stats) { + bin->stats.allocated -= size; + bin->stats.ndalloc++; + } } -#ifdef JEMALLOC_STATS void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -1907,10 +1845,10 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, bstats[i].nmalloc += bin->stats.nmalloc; bstats[i].ndalloc += bin->stats.ndalloc; bstats[i].nrequests += bin->stats.nrequests; -#ifdef JEMALLOC_TCACHE - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; -#endif + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; bstats[i].highruns += bin->stats.highruns; @@ -1918,37 +1856,24 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, malloc_mutex_unlock(&bin->lock); } } -#endif void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { - /* Large allocation. */ -#ifdef JEMALLOC_FILL -# ifndef JEMALLOC_STATS - if (opt_junk) -# endif -#endif - { -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) + if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; -#endif -#ifdef JEMALLOC_FILL -# ifdef JEMALLOC_STATS - if (opt_junk) -# endif + if (config_fill && config_stats && opt_junk) memset(ptr, 0x5a, size); -#endif -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; -#endif + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; + } } arena_run_dalloc(arena, (arena_run_t *)ptr, true); @@ -1968,24 +1893,25 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, malloc_mutex_lock(&arena->lock); arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, true); -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); } @@ -2038,25 +1964,29 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) + - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) + - 1].curruns--; - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].curruns > arena->stats.lstats[(size >> + PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) + - 1].highruns = arena->stats.lstats[(size >> + PAGE_SHIFT) - 1].curruns; + } } -#endif malloc_mutex_unlock(&arena->lock); return (false); } @@ -2078,12 +2008,10 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, psize = PAGE_CEILING(size + extra); if (psize == oldsize) { /* Same size class. */ -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (false); } else { arena_chunk_t *chunk; @@ -2091,16 +2019,14 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - dassert(arena->magic == ARENA_MAGIC); + assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { -#ifdef JEMALLOC_FILL /* Fill before shrinking in order avoid a race. */ - if (opt_junk) { + if (config_fill && opt_junk) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, psize); return (false); @@ -2108,12 +2034,11 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize, PAGE_CEILING(size), psize - PAGE_CEILING(size), zero); -#ifdef JEMALLOC_FILL - if (ret == false && zero == false && opt_zero) { + if (config_fill && ret == false && zero == false && + opt_zero) { memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize); } -#endif return (ret); } } @@ -2135,12 +2060,10 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (ptr); } } else { @@ -2222,22 +2145,21 @@ arena_new(arena_t *arena, unsigned ind) if (malloc_mutex_init(&arena->lock)) return (true); -#ifdef JEMALLOC_STATS - memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); - memset(arena->stats.lstats, 0, nlclasses * - sizeof(malloc_large_stats_t)); -# ifdef JEMALLOC_TCACHE - ql_new(&arena->tcache_ql); -# endif -#endif + if (config_stats) { + memset(&arena->stats, 0, sizeof(arena_stats_t)); + arena->stats.lstats = + (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (arena->stats.lstats == NULL) + return (true); + memset(arena->stats.lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + if (config_tcache) + ql_new(&arena->tcache_ql); + } -#ifdef JEMALLOC_PROF - arena->prof_accumbytes = 0; -#endif + if (config_prof) + arena->prof_accumbytes = 0; /* Initialize chunks. */ ql_new(&arena->chunks_dirty); @@ -2251,84 +2173,41 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { + for (i = 0; i < nbins; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) return (true); bin->runcur = NULL; arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif + if (config_stats) + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - -#ifdef JEMALLOC_DEBUG - arena->magic = ARENA_MAGIC; -#endif + if (config_debug) + arena->magic = ARENA_MAGIC; return (false); } -#ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void) { size_t i, size, binind; i = 1; -# ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); + if (config_tiny) { + for (; i < (1U << LG_TINY_MIN); i++) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } -# endif /* Quantum-spaced. */ for (; i <= qspace_max; i++) { size = QUANTUM_CEILING(i); @@ -2350,7 +2229,6 @@ small_size2bin_validate(void) assert(SMALL_SIZE2BIN(i) == binind); } } -#endif static bool small_size2bin_init(void) @@ -2363,9 +2241,8 @@ small_size2bin_init(void) return (small_size2bin_init_hard()); small_size2bin = const_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif + if (config_debug) + small_size2bin_validate(); return (false); } @@ -2388,19 +2265,19 @@ small_size2bin_init_hard(void) return (true); i = 1; -#ifdef JEMALLOC_TINY /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; + if (config_tiny) { + for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } -#endif /* Quantum-spaced. */ for (; i <= qspace_max; i += TINY_MIN) { size = QUANTUM_CEILING(i); @@ -2423,9 +2300,8 @@ small_size2bin_init_hard(void) } small_size2bin = custom_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif + if (config_debug) + small_size2bin_validate(); return (false); #undef CUSTOM_SMALL_SIZE2BIN } @@ -2448,9 +2324,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; -#ifdef JEMALLOC_PROF uint32_t try_ctx0_offset, good_ctx0_offset; -#endif uint32_t try_reg0_offset, good_reg0_offset; assert(min_run_size >= PAGE_SIZE); @@ -2481,8 +2355,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { + if (config_prof && opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_ctx0_offset = try_hdr_size; @@ -2490,7 +2363,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else try_ctx0_offset = 0; -#endif try_reg0_offset = try_run_size - (try_nregs * bin_info->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -2504,9 +2376,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_nregs = try_nregs; good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; -#ifdef JEMALLOC_PROF good_ctx0_offset = try_ctx0_offset; -#endif good_reg0_offset = try_reg0_offset; /* Try more aggressive settings. */ @@ -2526,8 +2396,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_bitmap_offset = try_hdr_size; /* Add space for bitmap. */ try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { + if (config_prof && opt_prof && prof_promote == false) { /* Pad to a quantum boundary. */ try_hdr_size = QUANTUM_CEILING(try_hdr_size); try_ctx0_offset = try_hdr_size; @@ -2537,7 +2406,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } -#endif try_reg0_offset = try_run_size - (try_nregs * bin_info->reg_size); } while (try_hdr_size > try_reg0_offset); @@ -2553,9 +2421,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; -#ifdef JEMALLOC_PROF bin_info->ctx0_offset = good_ctx0_offset; -#endif bin_info->reg0_offset = good_reg0_offset; return (good_run_size); @@ -2574,15 +2440,17 @@ bin_info_init(void) prev_run_size = PAGE_SIZE; i = 0; -#ifdef JEMALLOC_TINY /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + if (config_tiny) { + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, + prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, + bin_info->nregs); + } } -#endif /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { @@ -2631,9 +2499,8 @@ arena_boot(void) assert(sspace_min < PAGE_SIZE); sspace_max = PAGE_SIZE - SUBPAGE; -#ifdef JEMALLOC_TINY - assert(LG_QUANTUM >= LG_TINY_MIN); -#endif + if (config_tiny) + assert(LG_QUANTUM >= LG_TINY_MIN); assert(ntbins <= LG_QUANTUM); nqbins = qspace_max >> LG_QUANTUM; ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; @@ -2652,23 +2519,18 @@ arena_boot(void) * small size classes, plus a "not small" size class must be stored in * 8 bits of arena_chunk_map_t's bits field. */ -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote) { - if (nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } - } else -#endif - if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); + if (config_prof && opt_prof && prof_promote && nbins > 255) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 255)\n"); + abort(); + } else if (nbins > 256) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 256)\n"); + abort(); } /* diff --git a/src/chunk.c b/src/chunk.c index d190c6f4..57ab20d8 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -5,18 +5,12 @@ /* Data. */ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -#ifdef JEMALLOC_SWAP bool opt_overcommit = true; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; -#endif -#ifdef JEMALLOC_IVSALLOC rtree_t *chunks_rtree; -#endif /* Various chunk-related settings. */ size_t chunksize; @@ -41,67 +35,50 @@ chunk_alloc(size_t size, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); -#ifdef JEMALLOC_SWAP - if (swap_enabled) { + if (config_swap && swap_enabled) { ret = chunk_alloc_swap(size, zero); if (ret != NULL) goto RETURN; } if (swap_enabled == false || opt_overcommit) { -#endif -#ifdef JEMALLOC_DSS - ret = chunk_alloc_dss(size, zero); - if (ret != NULL) - goto RETURN; -#endif + if (config_dss) { + ret = chunk_alloc_dss(size, zero); + if (ret != NULL) + goto RETURN; + } ret = chunk_alloc_mmap(size); if (ret != NULL) { *zero = true; goto RETURN; } -#ifdef JEMALLOC_SWAP } -#endif /* All strategies for allocation failed. */ ret = NULL; RETURN: -#ifdef JEMALLOC_IVSALLOC - if (base == false && ret != NULL) { + if (config_ivsalloc && base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { chunk_dealloc(ret, size, true); return (NULL); } } -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (ret != NULL) { -# ifdef JEMALLOC_PROF + if ((config_stats || config_prof) && ret != NULL) { bool gdump; -# endif malloc_mutex_lock(&chunks_mtx); -# ifdef JEMALLOC_STATS - stats_chunks.nchunks += (size / chunksize); -# endif + if (config_stats) + stats_chunks.nchunks += (size / chunksize); stats_chunks.curchunks += (size / chunksize); if (stats_chunks.curchunks > stats_chunks.highchunks) { stats_chunks.highchunks = stats_chunks.curchunks; -# ifdef JEMALLOC_PROF - gdump = true; -# endif - } -# ifdef JEMALLOC_PROF - else + if (config_prof) + gdump = true; + } else if (config_prof) gdump = false; -# endif malloc_mutex_unlock(&chunks_mtx); -# ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_gdump && gdump) + if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); -# endif } -#endif assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); @@ -116,24 +93,20 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) assert(size != 0); assert((size & chunksize_mask) == 0); -#ifdef JEMALLOC_IVSALLOC - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - malloc_mutex_lock(&chunks_mtx); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); -#endif + if (config_ivsalloc) + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); + if (config_stats || config_prof) { + malloc_mutex_lock(&chunks_mtx); + stats_chunks.curchunks -= (size / chunksize); + malloc_mutex_unlock(&chunks_mtx); + } if (unmap) { -#ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + if (config_swap && swap_enabled && chunk_dealloc_swap(chunk, + size) == false) return; -#endif -#ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) + if (config_dss && chunk_dealloc_dss(chunk, size) == false) return; -#endif chunk_dealloc_mmap(chunk, size); } } @@ -148,26 +121,23 @@ chunk_boot(void) chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> PAGE_SHIFT); -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (malloc_mutex_init(&chunks_mtx)) + if (config_stats || config_prof) { + if (malloc_mutex_init(&chunks_mtx)) + return (true); + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); + } + if (config_swap && chunk_swap_boot()) return (true); - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); -#endif -#ifdef JEMALLOC_SWAP - if (chunk_swap_boot()) - return (true); -#endif if (chunk_mmap_boot()) return (true); -#ifdef JEMALLOC_DSS - if (chunk_dss_boot()) + if (config_dss && chunk_dss_boot()) return (true); -#endif -#ifdef JEMALLOC_IVSALLOC - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); - if (chunks_rtree == NULL) - return (true); -#endif + if (config_ivsalloc) { + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); + } return (false); } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 5c0e290e..c25baea3 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -1,6 +1,5 @@ #define JEMALLOC_CHUNK_DSS_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_DSS /******************************************************************************/ /* Data. */ @@ -35,6 +34,8 @@ chunk_recycle_dss(size_t size, bool *zero) { extent_node_t *node, key; + cassert(config_dss); + key.addr = NULL; key.size = size; malloc_mutex_lock(&dss_mtx); @@ -74,6 +75,8 @@ chunk_alloc_dss(size_t size, bool *zero) { void *ret; + cassert(config_dss); + ret = chunk_recycle_dss(size, zero); if (ret != NULL) return (ret); @@ -131,6 +134,8 @@ chunk_dealloc_dss_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; + cassert(config_dss); + xnode = NULL; while (true) { key.addr = (void *)((uintptr_t)chunk + size); @@ -204,6 +209,8 @@ chunk_in_dss(void *chunk) { bool ret; + cassert(config_dss); + malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < (uintptr_t)dss_max) @@ -220,6 +227,8 @@ chunk_dealloc_dss(void *chunk, size_t size) { bool ret; + cassert(config_dss); + malloc_mutex_lock(&dss_mtx); if ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk < (uintptr_t)dss_max) { @@ -269,6 +278,8 @@ bool chunk_dss_boot(void) { + cassert(config_dss); + if (malloc_mutex_init(&dss_mtx)) return (true); dss_base = sbrk(0); @@ -281,4 +292,3 @@ chunk_dss_boot(void) } /******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/src/chunk_swap.c b/src/chunk_swap.c index cb25ae0d..fe9ca303 100644 --- a/src/chunk_swap.c +++ b/src/chunk_swap.c @@ -1,6 +1,6 @@ #define JEMALLOC_CHUNK_SWAP_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_SWAP + /******************************************************************************/ /* Data. */ @@ -9,9 +9,7 @@ bool swap_enabled; bool swap_prezeroed; size_t swap_nfds; int *swap_fds; -#ifdef JEMALLOC_STATS size_t swap_avail; -#endif /* Base address of the mmap()ed file(s). */ static void *swap_base; @@ -42,6 +40,8 @@ chunk_recycle_swap(size_t size, bool *zero) { extent_node_t *node, key; + cassert(config_swap); + key.addr = NULL; key.size = size; malloc_mutex_lock(&swap_mtx); @@ -65,9 +65,8 @@ chunk_recycle_swap(size_t size, bool *zero) node->size -= size; extent_tree_szad_insert(&swap_chunks_szad, node); } -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif + if (config_stats) + swap_avail -= size; malloc_mutex_unlock(&swap_mtx); if (*zero) @@ -84,6 +83,7 @@ chunk_alloc_swap(size_t size, bool *zero) { void *ret; + cassert(config_swap); assert(swap_enabled); ret = chunk_recycle_swap(size, zero); @@ -94,9 +94,8 @@ chunk_alloc_swap(size_t size, bool *zero) if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { ret = swap_end; swap_end = (void *)((uintptr_t)swap_end + size); -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif + if (config_stats) + swap_avail -= size; malloc_mutex_unlock(&swap_mtx); if (swap_prezeroed) @@ -116,6 +115,8 @@ chunk_dealloc_swap_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; + cassert(config_swap); + xnode = NULL; while (true) { key.addr = (void *)((uintptr_t)chunk + size); @@ -189,6 +190,7 @@ chunk_in_swap(void *chunk) { bool ret; + cassert(config_swap); assert(swap_enabled); malloc_mutex_lock(&swap_mtx); @@ -207,6 +209,7 @@ chunk_dealloc_swap(void *chunk, size_t size) { bool ret; + cassert(config_swap); assert(swap_enabled); malloc_mutex_lock(&swap_mtx); @@ -237,9 +240,8 @@ chunk_dealloc_swap(void *chunk, size_t size) } else madvise(chunk, size, MADV_DONTNEED); -#ifdef JEMALLOC_STATS - swap_avail += size; -#endif + if (config_stats) + swap_avail += size; ret = false; goto RETURN; } @@ -260,6 +262,8 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) size_t cumsize, voff; size_t sizes[nfds]; + cassert(config_swap); + malloc_mutex_lock(&swap_mtx); /* Get file sizes. */ @@ -362,9 +366,8 @@ chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) memcpy(swap_fds, fds, nfds * sizeof(int)); swap_nfds = nfds; -#ifdef JEMALLOC_STATS - swap_avail = cumsize; -#endif + if (config_stats) + swap_avail = cumsize; swap_enabled = true; @@ -378,6 +381,8 @@ bool chunk_swap_boot(void) { + cassert(config_swap); + if (malloc_mutex_init(&swap_mtx)) return (true); @@ -385,9 +390,8 @@ chunk_swap_boot(void) swap_prezeroed = false; /* swap.* mallctl's depend on this. */ swap_nfds = 0; swap_fds = NULL; -#ifdef JEMALLOC_STATS - swap_avail = 0; -#endif + if (config_stats) + swap_avail = 0; swap_base = NULL; swap_end = NULL; swap_max = NULL; @@ -397,6 +401,3 @@ chunk_swap_boot(void) return (false); } - -/******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/src/ckh.c b/src/ckh.c index 43fcc252..f7eaa78b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -73,7 +73,7 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -394,9 +394,8 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) goto RETURN; } -#ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIC; -#endif + if (config_debug) + ckh->magic = CKH_MAGIC; ret = false; RETURN: @@ -408,7 +407,7 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -433,7 +432,7 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -464,7 +463,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -489,7 +488,7 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -521,7 +520,7 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); + assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { diff --git a/src/ctl.c b/src/ctl.c index e5336d36..05be4317 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -27,16 +27,12 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ size_t i); -#ifdef JEMALLOC_STATS static bool ctl_arena_init(ctl_arena_stats_t *astats); -#endif static void ctl_arena_clear(ctl_arena_stats_t *astats); -#ifdef JEMALLOC_STATS static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena); static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); -#endif static void ctl_arena_refresh(arena_t *arena, unsigned i); static void ctl_refresh(void); static bool ctl_init(void); @@ -45,16 +41,12 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) -#ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) -#endif CTL_PROTO(thread_arena) -#ifdef JEMALLOC_STATS CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) -#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -77,21 +69,12 @@ CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) -#ifdef JEMALLOC_FILL CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) -#endif -#ifdef JEMALLOC_SYSV CTL_PROTO(opt_sysv) -#endif -#ifdef JEMALLOC_XMALLOC CTL_PROTO(opt_xmalloc) -#endif -#ifdef JEMALLOC_TCACHE CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) -#endif -#ifdef JEMALLOC_PROF CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -102,10 +85,7 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_lg_prof_tcmax) -#endif -#ifdef JEMALLOC_SWAP CTL_PROTO(opt_overcommit) -#endif CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -119,35 +99,26 @@ CTL_PROTO(arenas_cacheline) CTL_PROTO(arenas_subpage) CTL_PROTO(arenas_pagesize) CTL_PROTO(arenas_chunksize) -#ifdef JEMALLOC_TINY CTL_PROTO(arenas_tspace_min) CTL_PROTO(arenas_tspace_max) -#endif CTL_PROTO(arenas_qspace_min) CTL_PROTO(arenas_qspace_max) CTL_PROTO(arenas_cspace_min) CTL_PROTO(arenas_cspace_max) CTL_PROTO(arenas_sspace_min) CTL_PROTO(arenas_sspace_max) -#ifdef JEMALLOC_TCACHE CTL_PROTO(arenas_tcache_max) -#endif CTL_PROTO(arenas_ntbins) CTL_PROTO(arenas_nqbins) CTL_PROTO(arenas_ncbins) CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nbins) -#ifdef JEMALLOC_TCACHE CTL_PROTO(arenas_nhbins) -#endif CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_purge) -#ifdef JEMALLOC_PROF CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_interval) -#endif -#ifdef JEMALLOC_STATS CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_total) CTL_PROTO(stats_chunks_high) @@ -166,10 +137,8 @@ CTL_PROTO(stats_arenas_i_bins_j_allocated) CTL_PROTO(stats_arenas_i_bins_j_nmalloc) CTL_PROTO(stats_arenas_i_bins_j_ndalloc) CTL_PROTO(stats_arenas_i_bins_j_nrequests) -#ifdef JEMALLOC_TCACHE CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) -#endif CTL_PROTO(stats_arenas_i_bins_j_nruns) CTL_PROTO(stats_arenas_i_bins_j_nreruns) CTL_PROTO(stats_arenas_i_bins_j_highruns) @@ -181,31 +150,22 @@ CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) -#endif CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) -#ifdef JEMALLOC_STATS CTL_PROTO(stats_arenas_i_mapped) CTL_PROTO(stats_arenas_i_npurge) CTL_PROTO(stats_arenas_i_nmadvise) CTL_PROTO(stats_arenas_i_purged) -#endif INDEX_PROTO(stats_arenas_i) -#ifdef JEMALLOC_STATS CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) -#endif -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS CTL_PROTO(swap_avail) -# endif CTL_PROTO(swap_prezeroed) CTL_PROTO(swap_nfds) CTL_PROTO(swap_fds) -#endif /******************************************************************************/ /* mallctl tree. */ @@ -223,21 +183,16 @@ CTL_PROTO(swap_fds) */ #define INDEX(i) false, {.indexed = {i##_index}}, NULL -#ifdef JEMALLOC_TCACHE static const ctl_node_t tcache_node[] = { {NAME("flush"), CTL(tcache_flush)} }; -#endif static const ctl_node_t thread_node[] = { - {NAME("arena"), CTL(thread_arena)} -#ifdef JEMALLOC_STATS - , + {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)} -#endif }; static const ctl_node_t config_node[] = { @@ -265,27 +220,13 @@ static const ctl_node_t opt_node[] = { {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)} -#ifdef JEMALLOC_FILL - , + {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)} -#endif -#ifdef JEMALLOC_SYSV - , - {NAME("sysv"), CTL(opt_sysv)} -#endif -#ifdef JEMALLOC_XMALLOC - , - {NAME("xmalloc"), CTL(opt_xmalloc)} -#endif -#ifdef JEMALLOC_TCACHE - , + {NAME("zero"), CTL(opt_zero)}, + {NAME("sysv"), CTL(opt_sysv)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} -#endif -#ifdef JEMALLOC_PROF - , + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -295,12 +236,8 @@ static const ctl_node_t opt_node[] = { {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} -#endif -#ifdef JEMALLOC_SWAP - , + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}, {NAME("overcommit"), CTL(opt_overcommit)} -#endif }; static const ctl_node_t arenas_bin_i_node[] = { @@ -335,42 +272,33 @@ static const ctl_node_t arenas_node[] = { {NAME("subpage"), CTL(arenas_subpage)}, {NAME("pagesize"), CTL(arenas_pagesize)}, {NAME("chunksize"), CTL(arenas_chunksize)}, -#ifdef JEMALLOC_TINY {NAME("tspace_min"), CTL(arenas_tspace_min)}, {NAME("tspace_max"), CTL(arenas_tspace_max)}, -#endif {NAME("qspace_min"), CTL(arenas_qspace_min)}, {NAME("qspace_max"), CTL(arenas_qspace_max)}, {NAME("cspace_min"), CTL(arenas_cspace_min)}, {NAME("cspace_max"), CTL(arenas_cspace_max)}, {NAME("sspace_min"), CTL(arenas_sspace_min)}, {NAME("sspace_max"), CTL(arenas_sspace_max)}, -#ifdef JEMALLOC_TCACHE {NAME("tcache_max"), CTL(arenas_tcache_max)}, -#endif {NAME("ntbins"), CTL(arenas_ntbins)}, {NAME("nqbins"), CTL(arenas_nqbins)}, {NAME("ncbins"), CTL(arenas_ncbins)}, {NAME("nsbins"), CTL(arenas_nsbins)}, {NAME("nbins"), CTL(arenas_nbins)}, -#ifdef JEMALLOC_TCACHE {NAME("nhbins"), CTL(arenas_nhbins)}, -#endif {NAME("bin"), CHILD(arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(arenas_lrun)}, {NAME("purge"), CTL(arenas_purge)} }; -#ifdef JEMALLOC_PROF static const ctl_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("interval"), CTL(prof_interval)} }; -#endif -#ifdef JEMALLOC_STATS static const ctl_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, {NAME("total"), CTL(stats_chunks_total)}, @@ -402,10 +330,8 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, -#ifdef JEMALLOC_TCACHE {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, -#endif {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, @@ -433,14 +359,11 @@ static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { static const ctl_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; -#endif static const ctl_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} -#ifdef JEMALLOC_STATS - , + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, @@ -449,7 +372,6 @@ static const ctl_node_t stats_arenas_i_node[] = { {NAME("large"), CHILD(stats_arenas_i_large)}, {NAME("bins"), CHILD(stats_arenas_i_bins)}, {NAME("lruns"), CHILD(stats_arenas_i_lruns)} -#endif }; static const ctl_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(stats_arenas_i)} @@ -460,46 +382,34 @@ static const ctl_node_t stats_arenas_node[] = { }; static const ctl_node_t stats_node[] = { -#ifdef JEMALLOC_STATS {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("chunks"), CHILD(stats_chunks)}, {NAME("huge"), CHILD(stats_huge)}, -#endif {NAME("arenas"), CHILD(stats_arenas)} }; -#ifdef JEMALLOC_SWAP static const ctl_node_t swap_node[] = { -# ifdef JEMALLOC_STATS {NAME("avail"), CTL(swap_avail)}, -# endif {NAME("prezeroed"), CTL(swap_prezeroed)}, {NAME("nfds"), CTL(swap_nfds)}, {NAME("fds"), CTL(swap_fds)} }; -#endif static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, -#ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, -#endif {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, -#ifdef JEMALLOC_PROF {NAME("prof"), CHILD(prof)}, -#endif {NAME("stats"), CHILD(stats)} -#ifdef JEMALLOC_SWAP , {NAME("swap"), CHILD(swap)} -#endif }; static const ctl_node_t super_root_node[] = { {NAME(""), CHILD(root)} @@ -512,7 +422,6 @@ static const ctl_node_t super_root_node[] = { /******************************************************************************/ -#ifdef JEMALLOC_STATS static bool ctl_arena_init(ctl_arena_stats_t *astats) { @@ -532,7 +441,6 @@ ctl_arena_init(ctl_arena_stats_t *astats) return (false); } -#endif static void ctl_arena_clear(ctl_arena_stats_t *astats) @@ -540,18 +448,18 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->pactive = 0; astats->pdirty = 0; -#ifdef JEMALLOC_STATS - memset(&astats->astats, 0, sizeof(arena_stats_t)); - astats->allocated_small = 0; - astats->nmalloc_small = 0; - astats->ndalloc_small = 0; - astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); - memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); -#endif + if (config_stats) { + memset(&astats->astats, 0, sizeof(arena_stats_t)); + astats->allocated_small = 0; + astats->nmalloc_small = 0; + astats->ndalloc_small = 0; + astats->nrequests_small = 0; + memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + } } -#ifdef JEMALLOC_STATS static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { @@ -604,17 +512,17 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; sstats->bstats[i].nrequests += astats->bstats[i].nrequests; -#ifdef JEMALLOC_TCACHE - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += astats->bstats[i].nflushes; -#endif + if (config_tcache) { + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } sstats->bstats[i].nruns += astats->bstats[i].nruns; sstats->bstats[i].reruns += astats->bstats[i].reruns; sstats->bstats[i].highruns += astats->bstats[i].highruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } } -#endif static void ctl_arena_refresh(arena_t *arena, unsigned i) @@ -625,17 +533,17 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_clear(astats); sstats->nthreads += astats->nthreads; -#ifdef JEMALLOC_STATS - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); -#else - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; -#endif + if (config_stats) { + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); + } else { + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; + } } static void @@ -644,19 +552,19 @@ ctl_refresh(void) unsigned i; arena_t *tarenas[narenas]; -#ifdef JEMALLOC_STATS - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); + if (config_stats) { + malloc_mutex_lock(&chunks_mtx); + ctl_stats.chunks.current = stats_chunks.curchunks; + ctl_stats.chunks.total = stats_chunks.nchunks; + ctl_stats.chunks.high = stats_chunks.highchunks; + malloc_mutex_unlock(&chunks_mtx); - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); -#endif + malloc_mutex_lock(&huge_mtx); + ctl_stats.huge.allocated = huge_allocated; + ctl_stats.huge.nmalloc = huge_nmalloc; + ctl_stats.huge.ndalloc = huge_ndalloc; + malloc_mutex_unlock(&huge_mtx); + } /* * Clear sum stats, since they will be merged into by @@ -682,20 +590,20 @@ ctl_refresh(void) ctl_arena_refresh(tarenas[i], i); } -#ifdef JEMALLOC_STATS - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large - + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) - + ctl_stats.huge.allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + if (config_stats) { + ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small + + ctl_stats.arenas[narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = (ctl_stats.arenas[narenas].pactive << + PAGE_SHIFT) + ctl_stats.huge.allocated; + ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); -# ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); - ctl_stats.swap_avail = swap_avail; - malloc_mutex_unlock(&swap_mtx); -# endif -#endif + if (config_swap) { + malloc_mutex_lock(&swap_mtx); + ctl_stats.swap_avail = swap_avail; + malloc_mutex_unlock(&swap_mtx); + } + } ctl_epoch++; } @@ -707,10 +615,6 @@ ctl_init(void) malloc_mutex_lock(&ctl_mtx); if (ctl_initialized == false) { -#ifdef JEMALLOC_STATS - unsigned i; -#endif - /* * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. @@ -729,14 +633,15 @@ ctl_init(void) * ever get used. Lazy initialization would allow errors to * cause inconsistent state to be viewable by the application. */ -#ifdef JEMALLOC_STATS - for (i = 0; i <= narenas; i++) { - if (ctl_arena_init(&ctl_stats.arenas[i])) { - ret = true; - goto RETURN; + if (config_stats) { + unsigned i; + for (i = 0; i <= narenas; i++) { + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto RETURN; + } } } -#endif ctl_stats.arenas[narenas].initialized = true; ctl_epoch = 0; @@ -998,6 +903,54 @@ ctl_boot(void) } \ } while (0) +/* + * There's a lot of code duplication in the following macros due to limitations + * in how nested cpp macros are expanded. + */ +#define CTL_RO_CLGEN(c, l, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + if (l) \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + if (l) \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + #define CTL_RO_GEN(n, v, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ @@ -1021,7 +974,26 @@ RETURN: \ * ctl_mtx is not acquired, under the assumption that no pertinent data will * mutate during the call. */ -#define CTL_RO_NL_GEN(n, v, t) \ +#define CTL_RO_NL_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + return (ret); \ +} + +#define CTL_RO_NL_GEN(n, v, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ @@ -1038,7 +1010,7 @@ RETURN: \ return (ret); \ } -#define CTL_RO_TRUE_GEN(n) \ +#define CTL_RO_BOOL_CONFIG_GEN(n) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ @@ -1046,25 +1018,10 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ bool oldval; \ \ + if (n == false) \ + return (ENOENT); \ READONLY(); \ - oldval = true; \ - READ(oldval, bool); \ - \ - ret = 0; \ -RETURN: \ - return (ret); \ -} - -#define CTL_RO_FALSE_GEN(n) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - bool oldval; \ - \ - READONLY(); \ - oldval = false; \ + oldval = n; \ READ(oldval, bool); \ \ ret = 0; \ @@ -1094,7 +1051,6 @@ RETURN: return (ret); } -#ifdef JEMALLOC_TCACHE static int tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1102,6 +1058,9 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; tcache_t *tcache; + if (config_tcache == false) + return (ENOENT); + VOID(); tcache = TCACHE_GET(); @@ -1116,7 +1075,6 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, RETURN: return (ret); } -#endif static int thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1151,13 +1109,11 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ ARENA_SET(arena); -#ifdef JEMALLOC_TCACHE - { + if (config_tcache) { tcache_t *tcache = TCACHE_GET(); if (tcache != NULL) tcache->arena = arena; } -#endif } ret = 0; @@ -1165,104 +1121,29 @@ RETURN: return (ret); } -#ifdef JEMALLOC_STATS -CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); -CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); -#endif +CTL_RO_NL_CGEN(config_stats, thread_allocated, ALLOCATED_GET(), uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, ALLOCATEDP_GET(), uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, DEALLOCATED_GET(), uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), + uint64_t *) /******************************************************************************/ -#ifdef JEMALLOC_DEBUG -CTL_RO_TRUE_GEN(config_debug) -#else -CTL_RO_FALSE_GEN(config_debug) -#endif - -#ifdef JEMALLOC_DSS -CTL_RO_TRUE_GEN(config_dss) -#else -CTL_RO_FALSE_GEN(config_dss) -#endif - -#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT -CTL_RO_TRUE_GEN(config_dynamic_page_shift) -#else -CTL_RO_FALSE_GEN(config_dynamic_page_shift) -#endif - -#ifdef JEMALLOC_FILL -CTL_RO_TRUE_GEN(config_fill) -#else -CTL_RO_FALSE_GEN(config_fill) -#endif - -#ifdef JEMALLOC_LAZY_LOCK -CTL_RO_TRUE_GEN(config_lazy_lock) -#else -CTL_RO_FALSE_GEN(config_lazy_lock) -#endif - -#ifdef JEMALLOC_PROF -CTL_RO_TRUE_GEN(config_prof) -#else -CTL_RO_FALSE_GEN(config_prof) -#endif - -#ifdef JEMALLOC_PROF_LIBGCC -CTL_RO_TRUE_GEN(config_prof_libgcc) -#else -CTL_RO_FALSE_GEN(config_prof_libgcc) -#endif - -#ifdef JEMALLOC_PROF_LIBUNWIND -CTL_RO_TRUE_GEN(config_prof_libunwind) -#else -CTL_RO_FALSE_GEN(config_prof_libunwind) -#endif - -#ifdef JEMALLOC_STATS -CTL_RO_TRUE_GEN(config_stats) -#else -CTL_RO_FALSE_GEN(config_stats) -#endif - -#ifdef JEMALLOC_SWAP -CTL_RO_TRUE_GEN(config_swap) -#else -CTL_RO_FALSE_GEN(config_swap) -#endif - -#ifdef JEMALLOC_SYSV -CTL_RO_TRUE_GEN(config_sysv) -#else -CTL_RO_FALSE_GEN(config_sysv) -#endif - -#ifdef JEMALLOC_TCACHE -CTL_RO_TRUE_GEN(config_tcache) -#else -CTL_RO_FALSE_GEN(config_tcache) -#endif - -#ifdef JEMALLOC_TINY -CTL_RO_TRUE_GEN(config_tiny) -#else -CTL_RO_FALSE_GEN(config_tiny) -#endif - -#ifdef JEMALLOC_TLS -CTL_RO_TRUE_GEN(config_tls) -#else -CTL_RO_FALSE_GEN(config_tls) -#endif - -#ifdef JEMALLOC_XMALLOC -CTL_RO_TRUE_GEN(config_xmalloc) -#else -CTL_RO_FALSE_GEN(config_xmalloc) -#endif +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_dynamic_page_shift) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_swap) +CTL_RO_BOOL_CONFIG_GEN(config_sysv) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tiny) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1273,35 +1154,24 @@ CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -#ifdef JEMALLOC_FILL -CTL_RO_NL_GEN(opt_junk, opt_junk, bool) -CTL_RO_NL_GEN(opt_zero, opt_zero, bool) -#endif -#ifdef JEMALLOC_SYSV -CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool) -#endif -#ifdef JEMALLOC_XMALLOC -CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool) -#endif -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) -CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) -#endif -#ifdef JEMALLOC_PROF -CTL_RO_NL_GEN(opt_prof, opt_prof, bool) -CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) -CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool) -CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) -#endif -#ifdef JEMALLOC_SWAP -CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) -#endif +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_sysv, opt_sysv, opt_sysv, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, + ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) +CTL_RO_NL_CGEN(config_swap, opt_overcommit, opt_overcommit, bool) /******************************************************************************/ @@ -1360,27 +1230,21 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -#ifdef JEMALLOC_TINY -CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -#endif +CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t) +CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t) CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) -#endif +CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -#endif +CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) static int @@ -1423,7 +1287,6 @@ RETURN: /******************************************************************************/ -#ifdef JEMALLOC_PROF static int prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1431,6 +1294,9 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; bool oldval; + if (config_prof == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ oldval = opt_prof_active; if (newp != NULL) { @@ -1457,6 +1323,9 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; const char *filename = NULL; + if (config_prof == false) + return (ENOENT); + WRITEONLY(); WRITE(filename, const char *); @@ -1470,56 +1339,53 @@ RETURN: return (ret); } -CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t) -#endif +CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) /******************************************************************************/ -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t) -CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t) -CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_allocated, +CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, + size_t) +CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) +CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) +CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) +CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, ctl_stats.arenas[mib[2]].allocated_small, size_t) -CTL_RO_GEN(stats_arenas_i_small_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_allocated, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) -CTL_RO_GEN(stats_arenas_i_large_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_allocated, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_GEN(stats_arenas_i_bins_j_nfills, +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nflushes, +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) -#endif -CTL_RO_GEN(stats_arenas_i_bins_j_nruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nreruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_highruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_highruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_curruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) const ctl_node_t * @@ -1531,15 +1397,15 @@ stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_bins_j_node); } -CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc, ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc, ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_curruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_highruns, +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_highruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) const ctl_node_t * @@ -1551,20 +1417,17 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) return (super_stats_arenas_i_lruns_j_node); } -#endif CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, - size_t) -CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, - uint64_t) -CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, - uint64_t) -CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, - uint64_t) -#endif +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) const ctl_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) @@ -1583,19 +1446,15 @@ RETURN: return (ret); } -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) -CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_GEN(stats_active, ctl_stats.active, size_t) -CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) -#endif +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) /******************************************************************************/ -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS -CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t) -# endif +CTL_RO_CGEN(config_swap && config_stats, swap_avail, ctl_stats.swap_avail, + size_t) static int swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1603,6 +1462,9 @@ swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, { int ret; + if (config_swap == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); @@ -1625,7 +1487,7 @@ RETURN: return (ret); } -CTL_RO_GEN(swap_nfds, swap_nfds, size_t) +CTL_RO_CGEN(config_swap, swap_nfds, swap_nfds, size_t) static int swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, @@ -1633,6 +1495,9 @@ swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; + if (config_swap == false) + return (ENOENT); + malloc_mutex_lock(&ctl_mtx); if (swap_enabled) { READONLY(); @@ -1667,4 +1532,3 @@ RETURN: malloc_mutex_unlock(&ctl_mtx); return (ret); } -#endif diff --git a/src/extent.c b/src/extent.c index 3c04d3aa..8c09b486 100644 --- a/src/extent.c +++ b/src/extent.c @@ -3,7 +3,6 @@ /******************************************************************************/ -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) static inline int extent_szad_comp(extent_node_t *a, extent_node_t *b) { @@ -25,7 +24,6 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b) /* Generate red-black tree functions. */ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, extent_szad_comp) -#endif static inline int extent_ad_comp(extent_node_t *a, extent_node_t *b) diff --git a/src/huge.c b/src/huge.c index a4f9b054..1eee436e 100644 --- a/src/huge.c +++ b/src/huge.c @@ -4,11 +4,9 @@ /******************************************************************************/ /* Data. */ -#ifdef JEMALLOC_STATS uint64_t huge_nmalloc; uint64_t huge_ndalloc; size_t huge_allocated; -#endif malloc_mutex_t huge_mtx; @@ -49,21 +47,19 @@ huge_malloc(size_t size, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; -#endif + if (config_stats) { + stats_cactive_add(csize); + huge_nmalloc++; + huge_allocated += csize; + } malloc_mutex_unlock(&huge_mtx); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, csize); else if (opt_zero) memset(ret, 0, csize); } -#endif return (ret); } @@ -134,21 +130,19 @@ huge_palloc(size_t size, size_t alignment, bool zero) malloc_mutex_lock(&huge_mtx); extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(chunk_size); - huge_nmalloc++; - huge_allocated += chunk_size; -#endif + if (config_stats) { + stats_cactive_add(chunk_size); + huge_nmalloc++; + huge_allocated += chunk_size; + } malloc_mutex_unlock(&huge_mtx); -#ifdef JEMALLOC_FILL - if (zero == false) { + if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, chunk_size); else if (opt_zero) memset(ret, 0, chunk_size); } -#endif return (ret); } @@ -164,12 +158,10 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { assert(CHUNK_CEILING(oldsize) == oldsize); -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { + if (config_fill && opt_junk && size < oldsize) { memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size); } -#endif return (ptr); } @@ -223,15 +215,10 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * source nor the destination are in swap or dss. */ #ifdef JEMALLOC_MREMAP_FIXED - if (oldsize >= chunksize -# ifdef JEMALLOC_SWAP - && (swap_enabled == false || (chunk_in_swap(ptr) == false && - chunk_in_swap(ret) == false)) -# endif -# ifdef JEMALLOC_DSS - && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false -# endif - ) { + if (oldsize >= chunksize && (config_swap == false || swap_enabled == + false || (chunk_in_swap(ptr) == false && chunk_in_swap(ret) == + false)) && (config_dss == false || (chunk_in_dss(ptr) == false && + chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); /* @@ -285,23 +272,16 @@ huge_dalloc(void *ptr, bool unmap) assert(node->addr == ptr); extent_tree_ad_remove(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; -#endif + if (config_stats) { + stats_cactive_sub(node->size); + huge_ndalloc++; + huge_allocated -= node->size; + } malloc_mutex_unlock(&huge_mtx); - if (unmap) { - /* Unmap chunk. */ -#ifdef JEMALLOC_FILL -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) - if (opt_junk) - memset(node->addr, 0x5a, node->size); -#endif -#endif - } + if (unmap && config_fill && (config_swap || config_dss) && opt_junk) + memset(node->addr, 0x5a, node->size); chunk_dealloc(node->addr, node->size, unmap); @@ -328,7 +308,6 @@ huge_salloc(const void *ptr) return (ret); } -#ifdef JEMALLOC_PROF prof_ctx_t * huge_prof_ctx_get(const void *ptr) { @@ -365,7 +344,6 @@ huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) malloc_mutex_unlock(&huge_mtx); } -#endif bool huge_boot(void) @@ -376,11 +354,11 @@ huge_boot(void) return (true); extent_tree_ad_new(&huge); -#ifdef JEMALLOC_STATS - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; -#endif + if (config_stats) { + huge_nmalloc = 0; + huge_ndalloc = 0; + huge_allocated = 0; + } return (false); } diff --git a/src/jemalloc.c b/src/jemalloc.c index a161c2e2..9e1814de 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -13,13 +13,10 @@ pthread_key_t arenas_tsd; __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif -#ifdef JEMALLOC_STATS -# ifndef NO_TLS +#ifndef NO_TLS __thread thread_allocated_t thread_allocated_tls; -# else -pthread_key_t thread_allocated_tsd; -# endif #endif +pthread_key_t thread_allocated_tsd; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -28,13 +25,7 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = -#ifdef JEMALLOC_OSSPIN - 0 -#else - MALLOC_MUTEX_INITIALIZER -#endif - ; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -50,22 +41,16 @@ const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; +# else +bool opt_junk = false; # endif #else bool opt_abort = false; -# ifdef JEMALLOC_FILL bool opt_junk = false; -# endif #endif -#ifdef JEMALLOC_SYSV bool opt_sysv = false; -#endif -#ifdef JEMALLOC_XMALLOC bool opt_xmalloc = false; -#endif -#ifdef JEMALLOC_FILL bool opt_zero = false; -#endif size_t opt_narenas = 0; /******************************************************************************/ @@ -75,7 +60,7 @@ static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS static void thread_allocated_cleanup(void *arg); #endif static bool malloc_conf_next(char const **opts_p, char const **k_p, @@ -89,22 +74,11 @@ static int imemalign(void **memptr, size_t alignment, size_t size); /******************************************************************************/ /* malloc_message() setup. */ -#ifdef JEMALLOC_HAVE_ATTR -JEMALLOC_ATTR(visibility("hidden")) -#else -static -#endif +JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { -#ifdef JEMALLOC_CC_SILENCE - int result = -#endif - write(STDERR_FILENO, s, strlen(s)); -#ifdef JEMALLOC_CC_SILENCE - if (result < 0) - result = errno; -#endif + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); } void (*JEMALLOC_P(malloc_message))(void *, const char *s) @@ -229,37 +203,38 @@ static void stats_print_atexit(void) { -#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) - unsigned i; + if (config_tcache && config_stats) { + unsigned i; - /* - * Merge stats from extant threads. This is racy, since individual - * threads do not lock when recording tcache stats events. As a - * consequence, the final stats may be slightly out of date by the time - * they are reported, if other threads continue to allocate. - */ - for (i = 0; i < narenas; i++) { - arena_t *arena = arenas[i]; - if (arena != NULL) { - tcache_t *tcache; + /* + * Merge stats from extant threads. This is racy, since + * individual threads do not lock when recording tcache stats + * events. As a consequence, the final stats may be slightly + * out of date by the time they are reported, if other threads + * continue to allocate. + */ + for (i = 0; i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; - /* - * tcache_stats_merge() locks bins, so if any code is - * introduced that acquires both arena and bin locks in - * the opposite order, deadlocks may result. - */ - malloc_mutex_lock(&arena->lock); - ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); + /* + * tcache_stats_merge() locks bins, so if any + * code is introduced that acquires both arena + * and bin locks in the opposite order, + * deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); } - malloc_mutex_unlock(&arena->lock); } } -#endif JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) thread_allocated_t * thread_allocated_get_hard(void) { @@ -279,7 +254,6 @@ thread_allocated_get_hard(void) thread_allocated->deallocated = 0; return (thread_allocated); } -#endif /* * End miscellaneous support functions. @@ -315,7 +289,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS static void thread_allocated_cleanup(void *arg) { @@ -603,41 +577,42 @@ malloc_conf_init(void) CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_BOOL(stats_print) -#ifdef JEMALLOC_FILL - CONF_HANDLE_BOOL(junk) - CONF_HANDLE_BOOL(zero) -#endif -#ifdef JEMALLOC_SYSV - CONF_HANDLE_BOOL(sysv) -#endif -#ifdef JEMALLOC_XMALLOC - CONF_HANDLE_BOOL(xmalloc) -#endif -#ifdef JEMALLOC_TCACHE - CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, - (sizeof(size_t) << 3) - 1) -#endif -#ifdef JEMALLOC_PROF - CONF_HANDLE_BOOL(prof) - CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) - CONF_HANDLE_BOOL(prof_active) - CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_gdump) - CONF_HANDLE_BOOL(prof_leak) -#endif -#ifdef JEMALLOC_SWAP - CONF_HANDLE_BOOL(overcommit) -#endif + if (config_fill) { + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) + } + if (config_sysv) { + CONF_HANDLE_BOOL(sysv) + } + if (config_xmalloc) { + CONF_HANDLE_BOOL(xmalloc) + } + if (config_tcache) { + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) + } + if (config_prof) { + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, + LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) + } + if (config_swap) { + CONF_HANDLE_BOOL(overcommit) + } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_HANDLE_BOOL @@ -701,9 +676,8 @@ malloc_init_hard(void) } #endif -#ifdef JEMALLOC_PROF - prof_boot0(); -#endif + if (config_prof) + prof_boot0(); malloc_conf_init(); @@ -739,31 +713,28 @@ malloc_init_hard(void) return (true); } -#ifdef JEMALLOC_PROF - prof_boot1(); -#endif + if (config_prof) + prof_boot1(); if (arena_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#ifdef JEMALLOC_TCACHE - if (tcache_boot()) { + if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#endif if (huge_boot()) { malloc_mutex_unlock(&init_lock); return (true); } -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +#ifdef NO_TLS /* Initialize allocation counters before any allocations can occur. */ - if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) - != 0) { + if (config_stats && pthread_key_create(&thread_allocated_tsd, + thread_allocated_cleanup) != 0) { malloc_mutex_unlock(&init_lock); return (true); } @@ -803,12 +774,10 @@ malloc_init_hard(void) ARENA_SET(arenas[0]); arenas[0]->nthreads++; -#ifdef JEMALLOC_PROF - if (prof_boot2()) { + if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } -#endif /* Get number of CPUs. */ malloc_initializer = pthread_self(); @@ -897,20 +866,8 @@ void * JEMALLOC_P(malloc)(size_t size) { void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + size_t usize; + prof_thr_cnt_t *cnt; if (malloc_init()) { ret = NULL; @@ -918,27 +875,20 @@ JEMALLOC_P(malloc)(size_t size) } if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " "invalid size 0\n"); abort(); } -# endif ret = NULL; goto RETURN; } -#endif } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { @@ -952,47 +902,36 @@ JEMALLOC_P(malloc)(size_t size) arena_prof_promoted(ret, usize); } else ret = imalloc(size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = imalloc(size); } OOM: if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } -#ifdef JEMALLOC_SYSV RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) + if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, 0); } -#endif return (ret); } JEMALLOC_ATTR(nonnull(1)) #ifdef JEMALLOC_PROF /* - * Avoid any uncertainty as to how many backtrace frames to ignore in + * Avoid any uncertainty as to how many backtrace frames to ignore in * PROF_ALLOC_PREP(). */ JEMALLOC_ATTR(noinline) @@ -1001,56 +940,38 @@ static int imemalign(void **memptr, size_t alignment, size_t size) { int ret; - size_t usize -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; + size_t usize; void *result; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + prof_thr_cnt_t *cnt; if (malloc_init()) result = NULL; else { if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid size " "0\n"); abort(); } -# endif result = NULL; *memptr = NULL; ret = 0; goto RETURN; } -#endif } /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid alignment\n"); abort(); } -#endif result = NULL; ret = EINVAL; goto RETURN; @@ -1063,8 +984,7 @@ imemalign(void **memptr, size_t alignment, size_t size) goto RETURN; } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { PROF_ALLOC_PREP(2, usize, cnt); if (cnt == NULL) { result = NULL; @@ -1086,18 +1006,15 @@ imemalign(void **memptr, size_t alignment, size_t size) } } } else -#endif result = ipalloc(usize, alignment, false); } if (result == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in posix_memalign(): " "out of memory\n"); abort(); } -#endif ret = ENOMEM; goto RETURN; } @@ -1106,16 +1023,12 @@ imemalign(void **memptr, size_t alignment, size_t size) ret = 0; RETURN: -#ifdef JEMALLOC_STATS - if (result != NULL) { + if (config_stats && result != NULL) { assert(usize == isalloc(result)); ALLOCATED_ADD(usize, 0); } -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && result != NULL) + if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); -#endif return (ret); } @@ -1135,20 +1048,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) { void *ret; size_t num_size; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + size_t usize; + prof_thr_cnt_t *cnt; if (malloc_init()) { num_size = 0; @@ -1158,16 +1059,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size) num_size = num * size; if (num_size == 0) { -#ifdef JEMALLOC_SYSV - if ((opt_sysv == false) && ((num == 0) || (size == 0))) -#endif + if ((config_sysv == false || opt_sysv == false) + && ((num == 0) || (size == 0))) num_size = 1; -#ifdef JEMALLOC_SYSV else { ret = NULL; goto RETURN; } -#endif /* * Try to avoid division here. We know that it isn't possible to * overflow during multiplication if neither operand uses any of the @@ -1180,8 +1078,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) goto RETURN; } -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(num_size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { @@ -1195,37 +1092,28 @@ JEMALLOC_P(calloc)(size_t num, size_t size) arena_prof_promoted(ret, usize); } else ret = icalloc(num_size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(num_size); -#endif + } else { + if (config_stats) + usize = s2u(num_size); ret = icalloc(num_size); } RETURN: if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in calloc(): out of " "memory\n"); abort(); } -#endif errno = ENOMEM; } -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) + if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, 0); } -#endif return (ret); } @@ -1234,67 +1122,39 @@ void * JEMALLOC_P(realloc)(void *ptr, size_t size) { void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; + size_t usize; size_t old_size = 0; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; - prof_ctx_t *old_ctx -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif + if (config_sysv == false || opt_sysv == false) size = 1; -#ifdef JEMALLOC_SYSV else { if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); cnt = NULL; } -#endif idalloc(ptr); - } -#ifdef JEMALLOC_PROF - else if (opt_prof) { + } else if (config_prof && opt_prof) { old_ctx = NULL; cnt = NULL; } -#endif ret = NULL; goto RETURN; } -#endif } if (ptr != NULL) { assert(malloc_initialized || malloc_initializer == pthread_self()); -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); PROF_ALLOC_PREP(1, usize, cnt); @@ -1316,42 +1176,30 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) if (ret == NULL) old_ctx = NULL; } - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = iralloc(ptr, size, 0, 0, false, false); } -#ifdef JEMALLOC_PROF OOM: -#endif if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } } else { -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) old_ctx = NULL; -#endif if (malloc_init()) { -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) cnt = NULL; -#endif ret = NULL; } else { -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { usize = s2u(size); PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) @@ -1368,41 +1216,30 @@ OOM: } else ret = imalloc(size); } - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif + } else { + if (config_stats) + usize = s2u(size); ret = imalloc(size); } } if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " "out of memory\n"); abort(); } -#endif errno = ENOMEM; } } -#ifdef JEMALLOC_SYSV RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) + if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { + if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); ALLOCATED_ADD(usize, old_size); } -#endif return (ret); } @@ -1412,27 +1249,19 @@ JEMALLOC_P(free)(void *ptr) { if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize; -#endif assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS + if (config_prof && opt_prof) { usize = isalloc(ptr); -# endif prof_free(ptr, usize); + } else if (config_stats) { + usize = isalloc(ptr); } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif + if (config_stats) + ALLOCATED_ADD(0, usize); idalloc(ptr); } } @@ -1455,15 +1284,12 @@ JEMALLOC_ATTR(visibility("default")) void * JEMALLOC_P(memalign)(size_t alignment, size_t size) { - void *ret; + void *ret #ifdef JEMALLOC_CC_SILENCE - int result = -#endif - imemalign(&ret, alignment, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); + = NULL #endif + ; + imemalign(&ret, alignment, size); return (ret); } #endif @@ -1474,15 +1300,12 @@ JEMALLOC_ATTR(visibility("default")) void * JEMALLOC_P(valloc)(size_t size) { - void *ret; + void *ret #ifdef JEMALLOC_CC_SILENCE - int result = -#endif - imemalign(&ret, PAGE_SIZE, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); + = NULL #endif + ; + imemalign(&ret, PAGE_SIZE, size); return (ret); } #endif @@ -1504,12 +1327,12 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_IVSALLOC - ret = ivsalloc(ptr); -#else - assert(ptr != NULL); - ret = isalloc(ptr); -#endif + if (config_ivsalloc) + ret = ivsalloc(ptr); + else { + assert(ptr != NULL); + ret = isalloc(ptr); + } return (ret); } @@ -1583,9 +1406,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; -#ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; -#endif assert(ptr != NULL); assert(size != 0); @@ -1597,8 +1418,7 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (usize == 0) goto OOM; -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) goto OOM; @@ -1618,39 +1438,26 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) goto OOM; } prof_malloc(p, usize, cnt); - if (rsize != NULL) - *rsize = usize; - } else -#endif - { + } else { p = iallocm(usize, alignment, zero); if (p == NULL) goto OOM; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; - } } + if (rsize != NULL) + *rsize = usize; *ptr = p; -#ifdef JEMALLOC_STATS - assert(usize == isalloc(p)); - ALLOCATED_ADD(usize, 0); -#endif + if (config_stats) { + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); + } return (ALLOCM_SUCCESS); OOM: -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in allocm(): " "out of memory\n"); abort(); } -#endif *ptr = NULL; return (ALLOCM_ERR_OOM); } @@ -1663,16 +1470,12 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, { void *p, *q; size_t usize; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t old_size; -#endif size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; -#ifdef JEMALLOC_PROF prof_thr_cnt_t *cnt; -#endif assert(ptr != NULL); assert(*ptr != NULL); @@ -1681,8 +1484,7 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, assert(malloc_initialized || malloc_initializer == pthread_self()); p = *ptr; -#ifdef JEMALLOC_PROF - if (opt_prof) { + if (config_prof && opt_prof) { /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and @@ -1722,45 +1524,34 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, prof_realloc(q, usize, cnt, old_size, old_ctx); if (rsize != NULL) *rsize = usize; - } else -#endif - { -#ifdef JEMALLOC_STATS - old_size = isalloc(p); -#endif + } else { + if (config_stats) + old_size = isalloc(p); q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto ERR; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { + if (config_stats) usize = isalloc(q); -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; + if (rsize != NULL) { + if (config_stats == false) + usize = isalloc(q); + *rsize = usize; } } *ptr = q; -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(usize, old_size); -#endif + if (config_stats) + ALLOCATED_ADD(usize, old_size); return (ALLOCM_SUCCESS); ERR: if (no_move) return (ALLOCM_ERR_NOT_MOVED); -#ifdef JEMALLOC_PROF OOM: -#endif -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { + if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); abort(); } -#endif return (ALLOCM_ERR_OOM); } @@ -1773,12 +1564,12 @@ JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_IVSALLOC - sz = ivsalloc(ptr); -#else - assert(ptr != NULL); - sz = isalloc(ptr); -#endif + if (config_ivsalloc) + sz = ivsalloc(ptr); + else { + assert(ptr != NULL); + sz = isalloc(ptr); + } assert(rsize != NULL); *rsize = sz; @@ -1790,27 +1581,20 @@ JEMALLOC_ATTR(visibility("default")) int JEMALLOC_P(dallocm)(void *ptr, int flags) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) size_t usize; -#endif assert(ptr != NULL); assert(malloc_initialized || malloc_initializer == pthread_self()); -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS + if (config_stats) usize = isalloc(ptr); -# endif + if (config_prof && opt_prof) { + if (config_stats == false) + usize = isalloc(ptr); prof_free(ptr, usize); } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif + if (config_stats) + ALLOCATED_ADD(0, usize); idalloc(ptr); return (ALLOCM_SUCCESS); @@ -1843,13 +1627,11 @@ jemalloc_prefork(void) malloc_mutex_lock(&huge_mtx); -#ifdef JEMALLOC_DSS - malloc_mutex_lock(&dss_mtx); -#endif + if (config_dss) + malloc_mutex_lock(&dss_mtx); -#ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); -#endif + if (config_swap) + malloc_mutex_lock(&swap_mtx); } void @@ -1859,13 +1641,11 @@ jemalloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ -#ifdef JEMALLOC_SWAP - malloc_mutex_unlock(&swap_mtx); -#endif + if (config_swap) + malloc_mutex_unlock(&swap_mtx); -#ifdef JEMALLOC_DSS - malloc_mutex_unlock(&dss_mtx); -#endif + if (config_dss) + malloc_mutex_unlock(&dss_mtx); malloc_mutex_unlock(&huge_mtx); diff --git a/src/prof.c b/src/prof.c index 8a144b4e..113cf15a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1,6 +1,5 @@ #define JEMALLOC_PROF_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_PROF_LIBUNWIND @@ -102,6 +101,8 @@ void bt_init(prof_bt_t *bt, void **vec) { + cassert(config_prof); + bt->vec = vec; bt->len = 0; } @@ -110,6 +111,8 @@ static void bt_destroy(prof_bt_t *bt) { + cassert(config_prof); + idalloc(bt); } @@ -118,6 +121,8 @@ bt_dup(prof_bt_t *bt) { prof_bt_t *ret; + cassert(config_prof); + /* * Create a single allocation that has space for vec immediately * following the prof_bt_t structure. The backtraces that get @@ -141,6 +146,8 @@ static inline void prof_enter(void) { + cassert(config_prof); + malloc_mutex_lock(&enq_mtx); enq = true; malloc_mutex_unlock(&enq_mtx); @@ -153,6 +160,8 @@ prof_leave(void) { bool idump, gdump; + cassert(config_prof); + malloc_mutex_unlock(&bt2ctx_mtx); malloc_mutex_lock(&enq_mtx); @@ -178,6 +187,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) unsigned i; int err; + cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); assert(max <= (1U << opt_lg_prof_bt_max)); @@ -204,12 +214,13 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) break; } } -#endif -#ifdef JEMALLOC_PROF_LIBGCC +#elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { + cassert(config_prof); + return (_URC_NO_REASON); } @@ -218,6 +229,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + cassert(config_prof); + if (data->nignore > 0) data->nignore--; else { @@ -235,10 +248,11 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { prof_unwind_data_t data = {bt, nignore, max}; + cassert(config_prof); + _Unwind_Backtrace(prof_unwind_callback, &data); } -#endif -#ifdef JEMALLOC_PROF_GCC +#elif (defined(JEMALLOC_PROF_GCC)) void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) { @@ -257,6 +271,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } else \ return; + cassert(config_prof); assert(nignore <= 3); assert(max <= (1U << opt_lg_prof_bt_max)); @@ -407,6 +422,14 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) BT_FRAME(130) #undef BT_FRAME } +#else +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + + cassert(config_prof); + assert(false); +} #endif prof_thr_cnt_t * @@ -418,6 +441,8 @@ prof_lookup(prof_bt_t *bt) } ret; prof_tdata_t *prof_tdata; + cassert(config_prof); + prof_tdata = PROF_TCACHE_GET(); if (prof_tdata == NULL) { prof_tdata = prof_tdata_init(); @@ -553,6 +578,8 @@ prof_flush(bool propagate_err) bool ret = false; ssize_t err; + cassert(config_prof); + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (propagate_err == false) { @@ -573,6 +600,8 @@ prof_write(const char *s, bool propagate_err) { unsigned i, slen, n; + cassert(config_prof); + i = 0; slen = strlen(s); while (i < slen) { @@ -602,6 +631,8 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; + cassert(config_prof); + malloc_mutex_lock(&ctx->lock); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); @@ -648,6 +679,8 @@ static void prof_ctx_destroy(prof_ctx_t *ctx) { + cassert(config_prof); + /* * Check that ctx is still unused by any thread cache before destroying * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in @@ -686,6 +719,8 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) { bool destroy; + cassert(config_prof); + /* Merge cnt stats and detach from ctx. */ malloc_mutex_lock(&ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; @@ -723,6 +758,8 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) char buf[UMAX2S_BUFSIZE]; unsigned i; + cassert(config_prof); + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); @@ -767,6 +804,8 @@ prof_dump_maps(bool propagate_err) char mpath[6 + UMAX2S_BUFSIZE + 5 + 1]; + cassert(config_prof); + i = 0; s = "/proc/"; @@ -827,6 +866,8 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; + cassert(config_prof); + prof_enter(); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { @@ -917,6 +958,8 @@ prof_dump_filename(char *filename, char v, int64_t vseq) char *s; unsigned i, slen; + cassert(config_prof); + /* * Construct a filename of the form: * @@ -979,6 +1022,8 @@ prof_fdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; @@ -995,6 +1040,8 @@ prof_idump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); @@ -1019,6 +1066,8 @@ prof_mdump(const char *filename) { char filename_buf[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (opt_prof == false || prof_booted == false) return (true); @@ -1040,6 +1089,8 @@ prof_gdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; + cassert(config_prof); + if (prof_booted == false) return; malloc_mutex_lock(&enq_mtx); @@ -1066,6 +1117,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) uint64_t h; prof_bt_t *bt = (prof_bt_t *)key; + cassert(config_prof); assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); assert(hash1 != NULL); assert(hash2 != NULL); @@ -1094,6 +1146,8 @@ prof_bt_keycomp(const void *k1, const void *k2) const prof_bt_t *bt1 = (prof_bt_t *)k1; const prof_bt_t *bt2 = (prof_bt_t *)k2; + cassert(config_prof); + if (bt1->len != bt2->len) return (false); return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); @@ -1104,6 +1158,8 @@ prof_tdata_init(void) { prof_tdata_t *prof_tdata; + cassert(config_prof); + /* Initialize an empty cache for this thread. */ prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); if (prof_tdata == NULL) @@ -1138,6 +1194,8 @@ prof_tdata_cleanup(void *arg) prof_thr_cnt_t *cnt; prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + cassert(config_prof); + /* * Delete the hash table. All of its contents can still be iterated * over via the LRU. @@ -1161,6 +1219,8 @@ void prof_boot0(void) { + cassert(config_prof); + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT)); } @@ -1169,6 +1229,8 @@ void prof_boot1(void) { + cassert(config_prof); + /* * opt_prof and prof_promote must be in their final state before any * arenas are initialized, so this function must be executed early. @@ -1197,6 +1259,8 @@ bool prof_boot2(void) { + cassert(config_prof); + if (opt_prof) { if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) @@ -1241,4 +1305,3 @@ prof_boot2(void) } /******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/src/stats.c b/src/stats.c index dc172e42..e6446530 100644 --- a/src/stats.c +++ b/src/stats.c @@ -39,14 +39,11 @@ bool opt_stats_print = false; -#ifdef JEMALLOC_STATS size_t stats_cactive = 0; -#endif /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -#ifdef JEMALLOC_STATS static void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); static void stats_arena_bins_print(void (*write_cb)(void *, const char *), @@ -55,10 +52,10 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); -#endif /******************************************************************************/ +/* XXX Refactor by adding malloc_vsnprintf(). */ /* * We don't want to depend on vsnprintf() for production builds, since that can * cause unnecessary bloat for static binaries. u2s() provides minimal integer @@ -99,7 +96,6 @@ u2s(uint64_t x, unsigned base, char *s) return (&s[i]); } -#ifdef JEMALLOC_STATS static void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap) @@ -149,9 +145,7 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } -#endif -#ifdef JEMALLOC_STATS static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) @@ -377,7 +371,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_arena_bins_print(write_cb, cbopaque, i); stats_arena_lruns_print(write_cb, cbopaque, i); } -#endif void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, @@ -674,8 +667,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, ")\n"); } -#ifdef JEMALLOC_STATS - { + if (config_stats) { int err; size_t sszp, ssz; size_t *cactive; @@ -785,6 +777,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } } -#endif /* #ifdef JEMALLOC_STATS */ write_cb(cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/src/tcache.c b/src/tcache.c index 31c329e1..398fc0aa 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -38,31 +38,22 @@ tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { void *ret; - arena_tcache_fill_small(tcache->arena, tbin, binind -#ifdef JEMALLOC_PROF - , tcache->prof_accumbytes -#endif - ); -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes = 0; -#endif + arena_tcache_fill_small(tcache->arena, tbin, binind, + config_prof ? tcache->prof_accumbytes : 0); + if (config_prof) + tcache->prof_accumbytes = 0; ret = tcache_alloc_easy(tbin); return (ret); } void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) +tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) { void *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS bool merged_stats = false; -#endif assert(binind < nbins); assert(rem <= tbin->ncached); @@ -74,25 +65,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem arena_t *arena = chunk->arena; arena_bin_t *bin = &arena->bins[binind]; -#ifdef JEMALLOC_PROF - if (arena == tcache->arena) { + if (config_prof && arena == tcache->arena) { malloc_mutex_lock(&arena->lock); arena_prof_accum(arena, tcache->prof_accumbytes); malloc_mutex_unlock(&arena->lock); tcache->prof_accumbytes = 0; } -#endif malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - if (arena == tcache->arena) { + if (config_stats && arena == tcache->arena) { assert(merged_stats == false); merged_stats = true; bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } -#endif ndeferred = 0; for (i = 0; i < nflush; i++) { ptr = tbin->avail[i]; @@ -117,8 +104,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&bin->lock); } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { + if (config_stats && merged_stats == false) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -130,7 +116,6 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem tbin->tstats.nrequests = 0; malloc_mutex_unlock(&bin->lock); } -#endif memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], rem * sizeof(void *)); @@ -140,17 +125,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem } void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) +tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) { void *ptr; unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS bool merged_stats = false; -#endif assert(binind < nhbins); assert(rem <= tbin->ncached); @@ -162,23 +142,21 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem arena_t *arena = chunk->arena; malloc_mutex_lock(&arena->lock); -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - if (arena == tcache->arena) { -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, tcache->prof_accumbytes); - tcache->prof_accumbytes = 0; -#endif -#ifdef JEMALLOC_STATS - merged_stats = true; - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; -#endif -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + if ((config_prof || config_stats) && arena == tcache->arena) { + if (config_prof) { + arena_prof_accum(arena, + tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; + } + if (config_stats) { + merged_stats = true; + arena->stats.nrequests_large += + tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } } -#endif ndeferred = 0; for (i = 0; i < nflush; i++) { ptr = tbin->avail[i]; @@ -199,8 +177,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem } malloc_mutex_unlock(&arena->lock); } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { + if (config_stats && merged_stats == false) { /* * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. @@ -213,7 +190,6 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem tbin->tstats.nrequests = 0; malloc_mutex_unlock(&arena->lock); } -#endif memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], rem * sizeof(void *)); @@ -254,13 +230,13 @@ tcache_create(arena_t *arena) if (tcache == NULL) return (NULL); -#ifdef JEMALLOC_STATS - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); -#endif + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); @@ -282,43 +258,32 @@ tcache_destroy(tcache_t *tcache) unsigned i; size_t tcache_size; -#ifdef JEMALLOC_STATS - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); -#endif + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); + } for (i = 0; i < nbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + tcache_bin_flush_small(tbin, i, 0, tcache); -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { + if (config_stats && tbin->tstats.nrequests != 0) { arena_t *arena = tcache->arena; arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(&bin->lock); } -#endif } for (; i < nhbins; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + tcache_bin_flush_large(tbin, i, 0, tcache); -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { + if (config_stats && tbin->tstats.nrequests != 0) { arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; @@ -326,16 +291,13 @@ tcache_destroy(tcache_t *tcache) tbin->tstats.nrequests; malloc_mutex_unlock(&arena->lock); } -#endif } -#ifdef JEMALLOC_PROF - if (tcache->prof_accumbytes > 0) { + if (config_prof && tcache->prof_accumbytes > 0) { malloc_mutex_lock(&tcache->arena->lock); arena_prof_accum(tcache->arena, tcache->prof_accumbytes); malloc_mutex_unlock(&tcache->arena->lock); } -#endif tcache_size = arena_salloc(tcache); if (tcache_size <= small_maxclass) { @@ -389,7 +351,6 @@ tcache_thread_cleanup(void *arg) } } -#ifdef JEMALLOC_STATS void tcache_stats_merge(tcache_t *tcache, arena_t *arena) { @@ -413,7 +374,6 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) tbin->tstats.nrequests = 0; } } -#endif bool tcache_boot(void) From fd56043c53f1cd1335ae6d1c0ee86cc0fbb9f12e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 10:24:43 -0800 Subject: [PATCH 012/205] Remove magic. Remove structure magic, because 1) it is no longer conditional, and 2) it stopped being very effective at detecting memory corruption several years ago. --- include/jemalloc/internal/arena.h | 11 ----------- include/jemalloc/internal/ckh.h | 3 --- include/jemalloc/internal/jemalloc_internal.h.in | 2 -- include/jemalloc/internal/prof.h | 4 ---- include/jemalloc/internal/tcache.h | 1 - src/arena.c | 15 --------------- src/ckh.c | 9 --------- 7 files changed, 45 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b6a5c23d..78ea2696 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -211,9 +211,6 @@ struct arena_chunk_s { typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { - uint32_t magic; -# define ARENA_RUN_MAGIC 0x384adf93 - /* Bin this run is associated with. */ arena_bin_t *bin; @@ -290,9 +287,6 @@ struct arena_bin_s { }; struct arena_s { - uint32_t magic; -# define ARENA_MAGIC 0x947d3d24 - /* This arena's index within the arenas array. */ unsigned ind; @@ -499,7 +493,6 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) unsigned shift, diff, regind; size_t size; - assert(run->magic == ARENA_RUN_MAGIC); /* * Freeing a pointer lower than region zero can cause assertion * failure. @@ -590,7 +583,6 @@ arena_prof_ctx_get(const void *ptr) arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * @@ -626,7 +618,6 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - assert(run->magic == ARENA_RUN_MAGIC); binind = arena_bin_index(chunk->arena, bin); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -646,7 +637,6 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_chunk_map_t *mapelm; assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -667,7 +657,6 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; if (config_debug) { size_t binind = arena_bin_index(arena, bin); diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 28f171c8..65f30f56 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -30,9 +30,6 @@ struct ckhc_s { }; struct ckh_s { -#define CKH_MAGIC 0x3af2489d - uint32_t magic; - #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ uint64_t ngrows; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8842e4bf..3193d35e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -774,8 +774,6 @@ isalloc(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - if (config_prof) ret = arena_salloc_demote(ptr); else diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index d4700808..98f96546 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -336,8 +336,6 @@ prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_prof_ctx_get(ptr); } else ret = huge_prof_ctx_get(ptr); @@ -356,8 +354,6 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - assert(chunk->arena->magic == ARENA_MAGIC); - arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 0855d32e..83e03d9d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -333,7 +333,6 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); diff --git a/src/arena.c b/src/arena.c index 356b628d..4ada6a37 100644 --- a/src/arena.c +++ b/src/arena.c @@ -255,7 +255,6 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); - assert(run->magic == ARENA_RUN_MAGIC); assert(run->nfree > 0); assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); @@ -758,7 +757,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) chunk + (uintptr_t)(pageind << PAGE_SHIFT)); assert((mapelm->bits >> PAGE_SHIFT) == 0); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = @@ -1220,8 +1218,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) run->nextind = 0; run->nfree = bin_info->nregs; bitmap_init(bitmap, &bin_info->bitmap_info); - if (config_debug) - run->magic = ARENA_RUN_MAGIC; } malloc_mutex_unlock(&arena->lock); /********************************/ @@ -1281,7 +1277,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) * Another thread updated runcur while this one ran without the * bin lock in arena_bin_nonfull_run_get(). */ - assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); ret = arena_run_reg_alloc(bin->runcur, bin_info); if (run != NULL) { @@ -1309,7 +1304,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) bin->runcur = run; - assert(bin->runcur->magic == ARENA_RUN_MAGIC); assert(bin->runcur->nfree > 0); return (arena_run_reg_alloc(bin->runcur, bin_info)); @@ -1579,7 +1573,6 @@ arena_salloc(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1632,7 +1625,6 @@ arena_salloc_demote(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1727,8 +1719,6 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << PAGE_SHIFT), false); /* npages = past - run_ind; */ } - if (config_debug) - run->magic = 0; arena_run_dalloc(arena, run, true); malloc_mutex_unlock(&arena->lock); /****************************/ @@ -1785,7 +1775,6 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - assert(run->magic == ARENA_RUN_MAGIC); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -2019,7 +2008,6 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - assert(arena->magic == ARENA_MAGIC); if (psize < oldsize) { /* Fill before shrinking in order avoid a race. */ @@ -2183,9 +2171,6 @@ arena_new(arena_t *arena, unsigned ind) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); } - if (config_debug) - arena->magic = ARENA_MAGIC; - return (false); } diff --git a/src/ckh.c b/src/ckh.c index f7eaa78b..4a6d1e37 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -73,7 +73,6 @@ ckh_isearch(ckh_t *ckh, const void *key) size_t hash1, hash2, bucket, cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); @@ -394,9 +393,6 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) goto RETURN; } - if (config_debug) - ckh->magic = CKH_MAGIC; - ret = false; RETURN: return (ret); @@ -407,7 +403,6 @@ ckh_delete(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); #ifdef CKH_VERBOSE malloc_printf( @@ -432,7 +427,6 @@ ckh_count(ckh_t *ckh) { assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); return (ckh->count); } @@ -463,7 +457,6 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) bool ret; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); assert(ckh_search(ckh, key, NULL, NULL)); #ifdef CKH_COUNT @@ -488,7 +481,6 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { @@ -520,7 +512,6 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) size_t cell; assert(ckh != NULL); - assert(ckh->magic == CKH_MAGIC); cell = ckh_isearch(ckh, searchkey); if (cell != SIZE_T_MAX) { From 4162627757889ea999264c2ddbc3c354768774e2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 10:56:17 -0800 Subject: [PATCH 013/205] Remove the swap feature. Remove the swap feature, which enabled per application swap files. In practice this feature has not proven itself useful to users. --- INSTALL | 5 - Makefile.in | 10 +- configure.ac | 17 - doc/jemalloc.xml.in | 94 +--- include/jemalloc/internal/chunk.h | 2 - include/jemalloc/internal/chunk_swap.h | 30 -- include/jemalloc/internal/ctl.h | 1 - .../jemalloc/internal/jemalloc_internal.h.in | 7 - include/jemalloc/internal/private_namespace.h | 5 - include/jemalloc/jemalloc_defs.h.in | 3 - src/arena.c | 5 +- src/chunk.c | 26 +- src/chunk_swap.c | 403 ------------------ src/ctl.c | 111 +---- src/huge.c | 10 +- src/jemalloc.c | 9 - src/stats.c | 26 +- 17 files changed, 26 insertions(+), 738 deletions(-) delete mode 100644 include/jemalloc/internal/chunk_swap.h delete mode 100644 src/chunk_swap.c diff --git a/INSTALL b/INSTALL index 2a1e469c..fa32d057 100644 --- a/INSTALL +++ b/INSTALL @@ -102,11 +102,6 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-swap - Enable mmap()ed swap file support. When this feature is built in, it is - possible to specify one or more files that act as backing store. This - effectively allows for per application swap files. - --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/Makefile.in b/Makefile.in index 82983892..ca4365e8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,14 +47,14 @@ CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ - @srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \ - @srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \ - @srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \ - @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c + @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ + @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ + @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ + @srcroot@src/stats.c @srcroot@src/tcache.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif -STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a +STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ @objroot@lib/libjemalloc@install_suffix@.$(SO) \ @objroot@lib/libjemalloc@install_suffix@_pic.a diff --git a/configure.ac b/configure.ac index 9617a5e3..cbcefdf3 100644 --- a/configure.ac +++ b/configure.ac @@ -592,22 +592,6 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) -dnl Do not enable mmap()ped swap files by default. -AC_ARG_ENABLE([swap], - [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])], -[if test "x$enable_swap" = "xno" ; then - enable_swap="0" -else - enable_swap="1" -fi -], -[enable_swap="0"] -) -if test "x$enable_swap" = "x1" ; then - AC_DEFINE([JEMALLOC_SWAP], [ ]) -fi -AC_SUBST([enable_swap]) - dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -955,7 +939,6 @@ AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([sysv : ${enable_sysv}]) -AC_MSG_RESULT([swap : ${enable_swap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 7a32879a..dc11642f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -660,16 +660,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.swap - (bool) - r- - - was specified during - build configuration. - - config.sysv @@ -1118,25 +1108,6 @@ malloc_conf = "xmalloc:true";]]> by default. - - - opt.overcommit - (bool) - r- - [] - - Over-commit enabled/disabled. If enabled, over-commit - memory as a side effect of using anonymous - mmap - 2 or - sbrk - 2 for virtual memory allocation. - In order for overcommit to be disabled, the swap.fds mallctl must have - been successfully written to. This option is enabled by - default. - - tcache.flush @@ -1590,8 +1561,7 @@ malloc_conf = "xmalloc:true";]]> application. This is a multiple of the chunk size, and is at least as large as stats.active. This - does not include inactive chunks backed by swap files. his does not - include inactive chunks embedded in the DSS. + does not include inactive chunks embedded in the DSS. @@ -1602,8 +1572,7 @@ malloc_conf = "xmalloc:true";]]> [] Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks backed by swap - files. This does not include inactive chunks embedded in the DSS. + application. This does not include inactive chunks embedded in the DSS. @@ -1983,65 +1952,6 @@ malloc_conf = "xmalloc:true";]]> Current number of runs for this size class. - - - - swap.avail - (size_t) - r- - [] - - Number of swap file bytes that are currently not - associated with any chunk (i.e. mapped, but otherwise completely - unmanaged). - - - - - swap.prezeroed - (bool) - rw - [] - - If true, the allocator assumes that the swap file(s) - contain nothing but nil bytes. If this assumption is violated, - allocator behavior is undefined. This value becomes read-only after - swap.fds is - successfully written to. - - - - - swap.nfds - (size_t) - r- - [] - - Number of file descriptors in use for swap. - - - - - - swap.fds - (int *) - rw - [] - - When written to, the files associated with the - specified file descriptors are contiguously mapped via - mmap - 2. The resulting virtual memory - region is preferred over anonymous - mmap - 2 and - sbrk - 2 memory. Note that if a file's - size is not a multiple of the page size, it is automatically truncated - to the nearest page size multiple. See the - swap.prezeroed - mallctl for specifying that the files are pre-zeroed. - diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 4cc1e80e..9a62ba18 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -28,7 +28,6 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; -extern bool opt_overcommit; /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; @@ -54,6 +53,5 @@ bool chunk_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#include "jemalloc/internal/chunk_swap.h" #include "jemalloc/internal/chunk_dss.h" #include "jemalloc/internal/chunk_mmap.h" diff --git a/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h deleted file mode 100644 index 99a079eb..00000000 --- a/include/jemalloc/internal/chunk_swap.h +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern malloc_mutex_t swap_mtx; -extern bool swap_enabled; -extern bool swap_prezeroed; -extern size_t swap_nfds; -extern int *swap_fds; -extern size_t swap_avail; - -void *chunk_alloc_swap(size_t size, bool *zero); -bool chunk_in_swap(void *chunk); -bool chunk_dealloc_swap(void *chunk, size_t size); -bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); -bool chunk_swap_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 31f9d99b..de4b9412 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -59,7 +59,6 @@ struct ctl_stats_s { uint64_t ndalloc; /* huge_ndalloc */ } huge; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ - size_t swap_avail; }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3193d35e..44415370 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -104,13 +104,6 @@ static const bool config_stats = false #endif ; -static const bool config_swap = -#ifdef JEMALLOC_SWAP - true -#else - false -#endif - ; static const bool config_sysv = #ifdef JEMALLOC_SYSV true diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index d4f5f96d..db2192e6 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -48,18 +48,13 @@ #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) #define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) -#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_in_swap JEMALLOC_N(chunk_in_swap) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) -#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) -#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index d8052e2b..f78028b0 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -98,9 +98,6 @@ */ #undef JEMALLOC_DSS -/* JEMALLOC_SWAP enables mmap()ed swap file support. */ -#undef JEMALLOC_SWAP - /* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL diff --git a/src/arena.c b/src/arena.c index 4ada6a37..c2632d97 100644 --- a/src/arena.c +++ b/src/arena.c @@ -671,10 +671,11 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous * mappings, but not for file-backed mappings. */ - (config_swap && swap_enabled) ? CHUNK_MAP_UNZEROED : 0; + 0 #else - CHUNK_MAP_UNZEROED; + CHUNK_MAP_UNZEROED #endif + ; /* * If chunk is the spare, temporarily re-allocate it, 1) so that its diff --git a/src/chunk.c b/src/chunk.c index 57ab20d8..b9086509 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -5,7 +5,6 @@ /* Data. */ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -bool opt_overcommit = true; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; @@ -35,23 +34,15 @@ chunk_alloc(size_t size, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); - if (config_swap && swap_enabled) { - ret = chunk_alloc_swap(size, zero); + if (config_dss) { + ret = chunk_alloc_dss(size, zero); if (ret != NULL) goto RETURN; } - - if (swap_enabled == false || opt_overcommit) { - if (config_dss) { - ret = chunk_alloc_dss(size, zero); - if (ret != NULL) - goto RETURN; - } - ret = chunk_alloc_mmap(size); - if (ret != NULL) { - *zero = true; - goto RETURN; - } + ret = chunk_alloc_mmap(size); + if (ret != NULL) { + *zero = true; + goto RETURN; } /* All strategies for allocation failed. */ @@ -102,9 +93,6 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (config_swap && swap_enabled && chunk_dealloc_swap(chunk, - size) == false) - return; if (config_dss && chunk_dealloc_dss(chunk, size) == false) return; chunk_dealloc_mmap(chunk, size); @@ -126,8 +114,6 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); } - if (config_swap && chunk_swap_boot()) - return (true); if (chunk_mmap_boot()) return (true); if (config_dss && chunk_dss_boot()) diff --git a/src/chunk_swap.c b/src/chunk_swap.c deleted file mode 100644 index fe9ca303..00000000 --- a/src/chunk_swap.c +++ /dev/null @@ -1,403 +0,0 @@ -#define JEMALLOC_CHUNK_SWAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t swap_mtx; -bool swap_enabled; -bool swap_prezeroed; -size_t swap_nfds; -int *swap_fds; -size_t swap_avail; - -/* Base address of the mmap()ed file(s). */ -static void *swap_base; -/* Current end of the space in use (<= swap_max). */ -static void *swap_end; -/* Absolute upper limit on file-backed addresses. */ -static void *swap_max; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t swap_chunks_szad; -static extent_tree_t swap_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_swap(size_t size, bool *zero); -static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); - -/******************************************************************************/ - -static void * -chunk_recycle_swap(size_t size, bool *zero) -{ - extent_node_t *node, key; - - cassert(config_swap); - - key.addr = NULL; - key.size = size; - malloc_mutex_lock(&swap_mtx); - node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); - if (node != NULL) { - void *ret = node->addr; - - /* Remove node from the tree. */ - extent_tree_szad_remove(&swap_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within swap_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; - extent_tree_szad_insert(&swap_chunks_szad, node); - } - if (config_stats) - swap_avail -= size; - malloc_mutex_unlock(&swap_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); - } - malloc_mutex_unlock(&swap_mtx); - - return (NULL); -} - -void * -chunk_alloc_swap(size_t size, bool *zero) -{ - void *ret; - - cassert(config_swap); - assert(swap_enabled); - - ret = chunk_recycle_swap(size, zero); - if (ret != NULL) - return (ret); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { - ret = swap_end; - swap_end = (void *)((uintptr_t)swap_end + size); - if (config_stats) - swap_avail -= size; - malloc_mutex_unlock(&swap_mtx); - - if (swap_prezeroed) - *zero = true; - else if (*zero) - memset(ret, 0, size); - } else { - malloc_mutex_unlock(&swap_mtx); - return (NULL); - } - - return (ret); -} - -static extent_node_t * -chunk_dealloc_swap_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - cassert(config_swap); - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * swap_chunks_ad, so only remove/insert from/into - * swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on swap_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&swap_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&swap_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&swap_chunks_ad, node); - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&swap_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within swap_chunks_ad, so only - * remove/insert node from/into swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, prev); - extent_tree_ad_remove(&swap_chunks_ad, prev); - - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&swap_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - -bool -chunk_in_swap(void *chunk) -{ - bool ret; - - cassert(config_swap); - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&swap_mtx); - - return (ret); -} - -bool -chunk_dealloc_swap(void *chunk, size_t size) -{ - bool ret; - - cassert(config_swap); - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_swap_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* - * Try to shrink the in-use memory if this chunk is at the end - * of the in-use memory. - */ - if ((void *)((uintptr_t)chunk + size) == swap_end) { - swap_end = (void *)((uintptr_t)swap_end - size); - - if (node != NULL) { - extent_tree_szad_remove(&swap_chunks_szad, - node); - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - - if (config_stats) - swap_avail += size; - ret = false; - goto RETURN; - } - - ret = true; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) -{ - bool ret; - unsigned i; - off_t off; - void *vaddr; - size_t cumsize, voff; - size_t sizes[nfds]; - - cassert(config_swap); - - malloc_mutex_lock(&swap_mtx); - - /* Get file sizes. */ - for (i = 0, cumsize = 0; i < nfds; i++) { - off = lseek(fds[i], 0, SEEK_END); - if (off == ((off_t)-1)) { - ret = true; - goto RETURN; - } - if (PAGE_CEILING(off) != off) { - /* Truncate to a multiple of the page size. */ - off &= ~PAGE_MASK; - if (ftruncate(fds[i], off) != 0) { - ret = true; - goto RETURN; - } - } - sizes[i] = off; - if (cumsize + off < cumsize) { - /* - * Cumulative file size is greater than the total - * address space. Bail out while it's still obvious - * what the problem is. - */ - ret = true; - goto RETURN; - } - cumsize += off; - } - - /* Round down to a multiple of the chunk size. */ - cumsize &= ~chunksize_mask; - if (cumsize == 0) { - ret = true; - goto RETURN; - } - - /* - * Allocate a chunk-aligned region of anonymous memory, which will - * be the final location for the memory-mapped files. - */ - vaddr = chunk_alloc_mmap_noreserve(cumsize); - if (vaddr == NULL) { - ret = true; - goto RETURN; - } - - /* Overlay the files onto the anonymous mapping. */ - for (i = 0, voff = 0; i < nfds; i++) { - void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); - if (addr == MAP_FAILED) { - char buf[BUFERROR_BUF]; - - - buferror(errno, buf, sizeof(buf)); - malloc_write( - ": Error in mmap(..., MAP_FIXED, ...): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - if (munmap(vaddr, voff) == -1) { - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); - } - ret = true; - goto RETURN; - } - assert(addr == (void *)((uintptr_t)vaddr + voff)); - - /* - * Tell the kernel that the mapping will be accessed randomly, - * and that it should not gratuitously sync pages to the - * filesystem. - */ -#ifdef MADV_RANDOM - madvise(addr, sizes[i], MADV_RANDOM); -#endif -#ifdef MADV_NOSYNC - madvise(addr, sizes[i], MADV_NOSYNC); -#endif - - voff += sizes[i]; - } - - swap_prezeroed = prezeroed; - swap_base = vaddr; - swap_end = swap_base; - swap_max = (void *)((uintptr_t)vaddr + cumsize); - - /* Copy the fds array for mallctl purposes. */ - swap_fds = (int *)base_alloc(nfds * sizeof(int)); - if (swap_fds == NULL) { - ret = true; - goto RETURN; - } - memcpy(swap_fds, fds, nfds * sizeof(int)); - swap_nfds = nfds; - - if (config_stats) - swap_avail = cumsize; - - swap_enabled = true; - - ret = false; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_boot(void) -{ - - cassert(config_swap); - - if (malloc_mutex_init(&swap_mtx)) - return (true); - - swap_enabled = false; - swap_prezeroed = false; /* swap.* mallctl's depend on this. */ - swap_nfds = 0; - swap_fds = NULL; - if (config_stats) - swap_avail = 0; - swap_base = NULL; - swap_end = NULL; - swap_max = NULL; - - extent_tree_szad_new(&swap_chunks_szad); - extent_tree_ad_new(&swap_chunks_ad); - - return (false); -} diff --git a/src/ctl.c b/src/ctl.c index 05be4317..2ac2f66e 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -8,8 +8,6 @@ * ctl_mtx protects the following: * - ctl_stats.* * - opt_prof_active - * - swap_enabled - * - swap_prezeroed */ static malloc_mutex_t ctl_mtx; static bool ctl_initialized; @@ -56,7 +54,6 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_swap) CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) CTL_PROTO(config_tiny) @@ -85,7 +82,6 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(opt_lg_prof_tcmax) -CTL_PROTO(opt_overcommit) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -162,10 +158,6 @@ CTL_PROTO(stats_cactive) CTL_PROTO(stats_allocated) CTL_PROTO(stats_active) CTL_PROTO(stats_mapped) -CTL_PROTO(swap_avail) -CTL_PROTO(swap_prezeroed) -CTL_PROTO(swap_nfds) -CTL_PROTO(swap_fds) /******************************************************************************/ /* mallctl tree. */ @@ -205,7 +197,6 @@ static const ctl_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("swap"), CTL(config_swap)}, {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tiny"), CTL(config_tiny)}, @@ -236,8 +227,7 @@ static const ctl_node_t opt_node[] = { {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)}, - {NAME("overcommit"), CTL(opt_overcommit)} + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} }; static const ctl_node_t arenas_bin_i_node[] = { @@ -391,13 +381,6 @@ static const ctl_node_t stats_node[] = { {NAME("arenas"), CHILD(stats_arenas)} }; -static const ctl_node_t swap_node[] = { - {NAME("avail"), CTL(swap_avail)}, - {NAME("prezeroed"), CTL(swap_prezeroed)}, - {NAME("nfds"), CTL(swap_nfds)}, - {NAME("fds"), CTL(swap_fds)} -}; - static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, @@ -408,8 +391,6 @@ static const ctl_node_t root_node[] = { {NAME("arenas"), CHILD(arenas)}, {NAME("prof"), CHILD(prof)}, {NAME("stats"), CHILD(stats)} - , - {NAME("swap"), CHILD(swap)} }; static const ctl_node_t super_root_node[] = { {NAME(""), CHILD(root)} @@ -597,12 +578,6 @@ ctl_refresh(void) ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); - - if (config_swap) { - malloc_mutex_lock(&swap_mtx); - ctl_stats.swap_avail = swap_avail; - malloc_mutex_unlock(&swap_mtx); - } } ctl_epoch++; @@ -1138,7 +1113,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_swap) CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tiny) @@ -1171,7 +1145,6 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) -CTL_RO_NL_CGEN(config_swap, opt_overcommit, opt_overcommit, bool) /******************************************************************************/ @@ -1450,85 +1423,3 @@ CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) - -/******************************************************************************/ - -CTL_RO_CGEN(config_swap && config_stats, swap_avail, ctl_stats.swap_avail, - size_t) - -static int -swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - if (config_swap == false) - return (ENOENT); - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else { - /* - * swap_prezeroed isn't actually used by the swap code until it - * is set during a successful chunk_swap_enabled() call. We - * use it here to store the value that we'll pass to - * chunk_swap_enable() in a swap.fds mallctl(). This is not - * very clean, but the obvious alternatives are even worse. - */ - WRITE(swap_prezeroed, bool); - } - - READ(swap_prezeroed, bool); - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_CGEN(config_swap, swap_nfds, swap_nfds, size_t) - -static int -swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - - if (config_swap == false) - return (ENOENT); - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else if (newp != NULL) { - size_t nfds = newlen / sizeof(int); - - { - int fds[nfds]; - - memcpy(fds, newp, nfds * sizeof(int)); - if (chunk_swap_enable(fds, nfds, swap_prezeroed)) { - ret = EFAULT; - goto RETURN; - } - } - } - - if (oldp != NULL && oldlenp != NULL) { - if (*oldlenp != swap_nfds * sizeof(int)) { - size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp) - ? swap_nfds * sizeof(int) : *oldlenp; - - memcpy(oldp, swap_fds, copylen); - ret = EINVAL; - goto RETURN; - } else - memcpy(oldp, swap_fds, *oldlenp); - } - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} diff --git a/src/huge.c b/src/huge.c index 1eee436e..f2fba869 100644 --- a/src/huge.c +++ b/src/huge.c @@ -212,13 +212,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, /* * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in swap or dss. + * source nor the destination are in dss. */ #ifdef JEMALLOC_MREMAP_FIXED - if (oldsize >= chunksize && (config_swap == false || swap_enabled == - false || (chunk_in_swap(ptr) == false && chunk_in_swap(ret) == - false)) && (config_dss == false || (chunk_in_dss(ptr) == false && - chunk_in_dss(ret) == false))) { + if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) + == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); /* @@ -280,7 +278,7 @@ huge_dalloc(void *ptr, bool unmap) malloc_mutex_unlock(&huge_mtx); - if (unmap && config_fill && (config_swap || config_dss) && opt_junk) + if (unmap && config_fill && config_dss && opt_junk) memset(node->addr, 0x5a, node->size); chunk_dealloc(node->addr, node->size, unmap); diff --git a/src/jemalloc.c b/src/jemalloc.c index 9e1814de..a32ce1a7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -610,9 +610,6 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(prof_gdump) CONF_HANDLE_BOOL(prof_leak) } - if (config_swap) { - CONF_HANDLE_BOOL(overcommit) - } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_HANDLE_BOOL @@ -1629,9 +1626,6 @@ jemalloc_prefork(void) if (config_dss) malloc_mutex_lock(&dss_mtx); - - if (config_swap) - malloc_mutex_lock(&swap_mtx); } void @@ -1641,9 +1635,6 @@ jemalloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ - if (config_swap) - malloc_mutex_unlock(&swap_mtx); - if (config_dss) malloc_mutex_unlock(&dss_mtx); diff --git a/src/stats.c b/src/stats.c index e6446530..ad8cd13d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -525,7 +525,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) OPT_WRITE_BOOL(prof_leak) - OPT_WRITE_BOOL(overcommit) #undef OPT_WRITE_BOOL #undef OPT_WRITE_SIZE_T @@ -668,11 +667,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if (config_stats) { - int err; size_t sszp, ssz; size_t *cactive; size_t allocated, active, mapped; - size_t chunks_current, chunks_high, swap_avail; + size_t chunks_current, chunks_high; uint64_t chunks_total; size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; @@ -694,24 +692,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("stats.chunks.total", &chunks_total, uint64_t); CTL_GET("stats.chunks.high", &chunks_high, size_t); CTL_GET("stats.chunks.current", &chunks_current, size_t); - if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz, - NULL, 0)) == 0) { - size_t lg_chunk; - - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks swap_avail\n"); - CTL_GET("opt.lg_chunk", &lg_chunk, size_t); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu%13zu\n", - chunks_total, chunks_high, chunks_current, - swap_avail << lg_chunk); - } else { - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu\n", - chunks_total, chunks_high, chunks_current); - } + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks\n"); + malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + chunks_total, chunks_high, chunks_current); /* Print huge stats. */ CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); From 962463d9b57bcc65de2fa108a691b4183b9b2faf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:29:49 -0800 Subject: [PATCH 014/205] Streamline tcache-related malloc/free fast paths. tcache_get() is inlined, so do the config_tcache check inside tcache_get() and simplify its callers. Make arena_malloc() an inline function, since it is part of the malloc() fast path. Remove conditional logic that cause build issues if --disable-tcache was specified. --- include/jemalloc/internal/arena.h | 50 +++++++++++++++++------------- include/jemalloc/internal/tcache.h | 8 +++-- src/arena.c | 29 ----------------- src/tcache.c | 4 +-- 4 files changed, 34 insertions(+), 57 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 78ea2696..b8de12be 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -446,7 +446,6 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_malloc(size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr); @@ -475,6 +474,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void *arena_malloc(size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -630,11 +630,33 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk->map[pageind-map_bias].prof_ctx = ctx; } +JEMALLOC_INLINE void * +arena_malloc(size_t size, bool zero) +{ + tcache_t *tcache = tcache_get(); + + assert(size != 0); + assert(QUANTUM_CEILING(size) <= arena_maxclass); + + if (size <= small_maxclass) { + if (tcache != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else + return (arena_malloc_small(choose_arena(), size, zero)); + } else { + if (tcache != NULL && size <= tcache_maxclass) + return (tcache_alloc_large(tcache, size, zero)); + else + return (arena_malloc_large(choose_arena(), size, zero)); + } +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { size_t pageind; arena_chunk_map_t *mapelm; + tcache_t *tcache = tcache_get(); assert(arena != NULL); assert(chunk->arena == arena); @@ -646,9 +668,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - tcache_t *tcache; - - if (config_tcache && (tcache = tcache_get()) != NULL) + if (tcache != NULL) tcache_dalloc_small(tcache, ptr); else { arena_run_t *run; @@ -671,27 +691,13 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) malloc_mutex_unlock(&bin->lock); } } else { - if (config_tcache) { - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = mapelm->bits & ~PAGE_MASK; - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; + assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } - } else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } + if (size <= tcache_maxclass && tcache != NULL) { + tcache_dalloc_large(tcache, ptr, size); } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, ptr); malloc_mutex_unlock(&arena->lock); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 83e03d9d..717682d7 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_TCACHE /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -134,7 +133,11 @@ tcache_get(void) { tcache_t *tcache; - if ((isthreaded & opt_tcache) == false) + if (config_tcache == false) + return (NULL); + if (config_lazy_lock && (isthreaded & opt_tcache) == false) + return (NULL); + else if (opt_tcache == false) return (NULL); tcache = TCACHE_GET(); @@ -391,4 +394,3 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ diff --git a/src/arena.c b/src/arena.c index c2632d97..8a158df2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1455,35 +1455,6 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) return (ret); } -void * -arena_malloc(size_t size, bool zero) -{ - - assert(size != 0); - assert(QUANTUM_CEILING(size) <= arena_maxclass); - - if (size <= small_maxclass) { - tcache_t *tcache; - - if (config_tcache && (tcache = tcache_get()) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else - return (arena_malloc_small(choose_arena(), size, zero)); - } else { - if (config_tcache && size <= tcache_maxclass) { - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(), - size, zero)); - } - } else - return (arena_malloc_large(choose_arena(), size, zero)); - } -} - /* Only handles large allocations that require more than page alignment. */ void * arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, diff --git a/src/tcache.c b/src/tcache.c index 398fc0aa..4f4ed6c6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -1,6 +1,6 @@ #define JEMALLOC_TCACHE_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_TCACHE + /******************************************************************************/ /* Data. */ @@ -436,5 +436,3 @@ tcache_boot(void) return (false); } -/******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ From 6ffbbeb5d60bdf16e15927cb1f173376fe355449 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:31:30 -0800 Subject: [PATCH 015/205] Silence compiler warnings. --- src/jemalloc.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index a32ce1a7..796c8158 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -864,7 +864,11 @@ JEMALLOC_P(malloc)(size_t size) { void *ret; size_t usize; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) { ret = NULL; @@ -939,7 +943,11 @@ imemalign(void **memptr, size_t alignment, size_t size) int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) result = NULL; @@ -1046,7 +1054,11 @@ JEMALLOC_P(calloc)(size_t num, size_t size) void *ret; size_t num_size; size_t usize; - prof_thr_cnt_t *cnt; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (malloc_init()) { num_size = 0; @@ -1121,8 +1133,16 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; - prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; + prof_thr_cnt_t *cnt +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; + prof_ctx_t *old_ctx +#ifdef JEMALLOC_CC_SILENCE + = NULL +#endif + ; if (size == 0) { if (config_sysv == false || opt_sysv == false) From 0fee70d718b9846cfab04225dc86a4b4216b963f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 12:36:11 -0800 Subject: [PATCH 016/205] Do not enable lazy locking by default. Do not enable lazy locking by default, because: - It's fragile (applications can subvert detection of multi-threaded mode). - Thread caching amortizes locking overhead in the default configuration. --- INSTALL | 4 ++-- configure.ac | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/INSTALL b/INSTALL index fa32d057..a210ec5a 100644 --- a/INSTALL +++ b/INSTALL @@ -128,8 +128,8 @@ any of the following arguments (not a definitive list) to 'configure': a measurable impact on performance, since the compiler is forced to load the page size from memory rather than embedding immediate values. ---disable-lazy-lock - Disable code that wraps pthread_create() to detect when an application +--enable-lazy-lock + Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid mutex locking/unlocking operations while in single-threaded mode. In practice, this feature usually has little impact on performance unless diff --git a/configure.ac b/configure.ac index cbcefdf3..e818f65a 100644 --- a/configure.ac +++ b/configure.ac @@ -730,17 +730,17 @@ AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], CPPFLAGS="$CPPFLAGS -D_REENTRANT" -dnl Enable lazy locking by default. +dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], - [AS_HELP_STRING([--disable-lazy-lock], - [Disable lazy locking (always lock, even when single-threaded)])], + [AS_HELP_STRING([--enable-lazy-lock], + [Enable lazy locking (only lock when multi-threaded)])], [if test "x$enable_lazy_lock" = "xno" ; then enable_lazy_lock="0" else enable_lazy_lock="1" fi ], -[enable_lazy_lock="1"] +[enable_lazy_lock="0"] ) if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) From ef8897b4b938111fcc9b54725067f1dbb33a4c20 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 14:30:52 -0800 Subject: [PATCH 017/205] Make 8-byte tiny size class non-optional. When tiny size class support was first added, it was intended to support truly tiny size classes (even 2 bytes). However, this wasn't very useful in practice, so the minimum tiny size class has been limited to sizeof(void *) for a long time now. This is too small to be standards compliant, but other commonly used malloc implementations do not even bother using a 16-byte quantum on systems with vector units (SSE2+, AltiVEC, etc.). As such, it is safe in practice to support an 8-byte tiny size class on 64-bit systems that support 16-byte types. --- INSTALL | 7 -- configure.ac | 17 --- doc/jemalloc.xml.in | 23 ++-- include/jemalloc/internal/arena.h | 19 ++-- .../jemalloc/internal/jemalloc_internal.h.in | 7 -- include/jemalloc/jemalloc_defs.h.in | 6 -- src/arena.c | 101 +++++------------- src/ctl.c | 9 +- 8 files changed, 44 insertions(+), 145 deletions(-) diff --git a/INSTALL b/INSTALL index a210ec5a..9124ac34 100644 --- a/INSTALL +++ b/INSTALL @@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure': Statically link against the specified libunwind.a rather than dynamically linking with -lunwind. ---disable-tiny - Disable tiny (sub-quantum-sized) object support. Technically it is not - legal for a malloc implementation to allocate objects with less than - quantum alignment (8 or 16 bytes, depending on architecture), but in - practice it never causes any problems if, for example, 4-byte allocations - are 4-byte-aligned. - --disable-tcache Disable thread-specific caches for small objects. Objects are cached and released in bulk, thus reducing the total number of mutex operations. See diff --git a/configure.ac b/configure.ac index e818f65a..fdbf1bad 100644 --- a/configure.ac +++ b/configure.ac @@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then fi AC_SUBST([enable_prof]) -dnl Enable tiny allocations by default. -AC_ARG_ENABLE([tiny], - [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], -[if test "x$enable_tiny" = "xno" ; then - enable_tiny="0" -else - enable_tiny="1" -fi -], -[enable_tiny="1"] -) -if test "x$enable_tiny" = "x1" ; then - AC_DEFINE([JEMALLOC_TINY], [ ]) -fi -AC_SUBST([enable_tiny]) - dnl Enable thread-specific caching by default. AC_ARG_ENABLE([tcache], [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], @@ -934,7 +918,6 @@ AC_MSG_RESULT([prof : ${enable_prof}]) AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) -AC_MSG_RESULT([tiny : ${enable_tiny}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index dc11642f..f9f14750 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) { allocations in constant time. Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Unless - is specified during configuration, - allocation requests that are no more than half the quantum (8 or 16, - depending on architecture) are rounded up to the nearest power of two that - is at least sizeof(void *). - Allocation requests that are more than half the quantum, but no more than - the minimum cacheline-multiple size class (see the sizeof(void *). Allocation requests + that are more than half the quantum, but no more than the minimum + cacheline-multiple size class (see the opt.lg_qspace_max option) are rounded up to the nearest multiple of the quantum. Allocation requests that are more than the minimum cacheline-multiple size class, but @@ -680,16 +679,6 @@ for (i = 0; i < nbins; i++) { during build configuration. - - - config.tiny - (bool) - r- - - was not specified - during build configuration. - - config.tls diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b8de12be..cacb03f8 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -17,7 +17,7 @@ (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) /* Smallest size class to support. */ -#define LG_TINY_MIN LG_SIZEOF_PTR +#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* @@ -418,18 +418,13 @@ extern uint8_t const *small_size2bin; extern arena_bin_info_t *arena_bin_info; /* Various bin-related settings. */ -#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ -# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -#else -# define ntbins 0 -#endif + /* Number of (2^n)-spaced tiny bins. */ +#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) extern unsigned nqbins; /* Number of quantum-spaced bins. */ extern unsigned ncbins; /* Number of cacheline-spaced bins. */ extern unsigned nsbins; /* Number of subpage-spaced bins. */ extern unsigned nbins; -#ifdef JEMALLOC_TINY -# define tspace_max ((size_t)(QUANTUM >> 1)) -#endif +#define tspace_max ((size_t)(QUANTUM >> 1)) #define qspace_min QUANTUM extern size_t qspace_max; extern size_t cspace_min; @@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) JEMALLOC_INLINE void * arena_malloc(size_t size, bool zero) { - tcache_t *tcache = tcache_get(); + tcache_t *tcache; assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= small_maxclass) { - if (tcache != NULL) + if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else return (arena_malloc_small(choose_arena(), size, zero)); } else { - if (tcache != NULL && size <= tcache_maxclass) + if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) return (tcache_alloc_large(tcache, size, zero)); else return (arena_malloc_large(choose_arena(), size, zero)); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 44415370..971336ec 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -118,13 +118,6 @@ static const bool config_tcache = false #endif ; -static const bool config_tiny = -#ifdef JEMALLOC_TINY - true -#else - false -#endif - ; static const bool config_tls = #ifdef JEMALLOC_TLS true diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f78028b0..66da6f3d 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -79,12 +79,6 @@ /* Use gcc intrinsics for profile backtracing if defined. */ #undef JEMALLOC_PROF_GCC -/* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - /* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking diff --git a/src/arena.c b/src/arena.c index 8a158df2..32afd0cf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,14 +28,7 @@ size_t mspace_mask; * const_small_size2bin is a static constant lookup table that in the common * case can be used as-is for small_size2bin. */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, -#define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) #define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) @@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) const uint8_t const_small_size2bin[] = { #if (LG_QUANTUM == 4) /* 16-byte quantum **********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ - S2B_8(2) /* 16 */ -# define S2B_QMIN 2 -# elif (LG_TINY_MIN == 3) - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_16(0) /* 16 */ -# define S2B_QMIN 0 -# endif + S2B_8(0) /* 8 */ + S2B_8(1) /* 16 */ +# define S2B_QMIN 1 S2B_16(S2B_QMIN + 1) /* 32 */ S2B_16(S2B_QMIN + 2) /* 48 */ S2B_16(S2B_QMIN + 3) /* 64 */ @@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE)) # define S2B_CMIN (S2B_QMIN + 8) #else /* 8-byte quantum ***********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_8(0) /* 8 */ -# define S2B_QMIN 0 -# endif +# define S2B_QMIN 0 + S2B_8(S2B_QMIN + 0) /* 8 */ S2B_8(S2B_QMIN + 1) /* 16 */ S2B_8(S2B_QMIN + 2) /* 24 */ S2B_8(S2B_QMIN + 3) /* 32 */ @@ -2153,17 +2122,15 @@ small_size2bin_validate(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } + for (; i < TINY_MIN; i++) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); } /* Quantum-spaced. */ for (; i <= qspace_max; i++) { @@ -2223,17 +2190,15 @@ small_size2bin_init_hard(void) i = 1; /* Tiny. */ - if (config_tiny) { - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } + for (; i < TINY_MIN; i += TINY_MIN) { + size = TINY_MIN; + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; } /* Quantum-spaced. */ for (; i <= qspace_max; i += TINY_MIN) { @@ -2398,17 +2363,12 @@ bin_info_init(void) prev_run_size = PAGE_SIZE; i = 0; /* (2^n)-spaced tiny bins. */ - if (config_tiny) { - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, - prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, - bin_info->nregs); - } + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { bin_info = &arena_bin_info[i]; @@ -2416,7 +2376,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Cacheline-spaced bins. */ for (; i < ntbins + nqbins + ncbins; i++) { bin_info = &arena_bin_info[i]; @@ -2425,7 +2384,6 @@ bin_info_init(void) prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); } - /* Subpage-spaced bins. */ for (; i < nbins; i++) { bin_info = &arena_bin_info[i]; @@ -2456,8 +2414,7 @@ arena_boot(void) assert(sspace_min < PAGE_SIZE); sspace_max = PAGE_SIZE - SUBPAGE; - if (config_tiny) - assert(LG_QUANTUM >= LG_TINY_MIN); + assert(LG_QUANTUM >= LG_TINY_MIN); assert(ntbins <= LG_QUANTUM); nqbins = qspace_max >> LG_QUANTUM; ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; diff --git a/src/ctl.c b/src/ctl.c index 2ac2f66e..6d0423fa 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) -CTL_PROTO(config_tiny) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) @@ -199,7 +198,6 @@ static const ctl_node_t config_node[] = { {NAME("stats"), CTL(config_stats)}, {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, - {NAME("tiny"), CTL(config_tiny)}, {NAME("tls"), CTL(config_tls)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ int ret; \ bool oldval; \ \ - if (n == false) \ - return (ENOENT); \ READONLY(); \ oldval = n; \ READ(oldval, bool); \ @@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tiny) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) @@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t) +CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) +CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) From 746868929afae3e346b47d0fa8a78d7fb131d5a4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 15:18:19 -0800 Subject: [PATCH 018/205] Remove highruns statistics. --- doc/jemalloc.xml.in | 22 ---------------------- include/jemalloc/internal/arena.h | 4 ++++ include/jemalloc/internal/stats.h | 6 ------ src/arena.c | 29 ----------------------------- src/ctl.c | 10 ---------- src/stats.c | 28 +++++++++++----------------- 6 files changed, 15 insertions(+), 84 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index f9f14750..4c7023b5 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1866,17 +1866,6 @@ malloc_conf = "xmalloc:true";]]> to allocate changed. - - - stats.arenas.<i>.bins.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far. - - - stats.arenas.<i>.bins.<j>.curruns @@ -1920,17 +1909,6 @@ malloc_conf = "xmalloc:true";]]> class. - - - stats.arenas.<i>.lruns.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far for this - size class. - - stats.arenas.<i>.lruns.<j>.curruns diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index cacb03f8..4a87ef54 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -639,6 +639,10 @@ arena_malloc(size_t size, bool zero) else return (arena_malloc_small(choose_arena(), size, zero)); } else { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) return (tcache_alloc_large(tcache, size, zero)); else diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 64ba4bd7..4af23c33 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -59,9 +59,6 @@ struct malloc_bin_stats_s { */ uint64_t reruns; - /* High-water mark for this bin. */ - size_t highruns; - /* Current number of runs in this bin. */ size_t curruns; }; @@ -83,9 +80,6 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* High-water mark for this size class. */ - size_t highruns; - /* Current number of runs of this size class. */ size_t curruns; }; diff --git a/src/arena.c b/src/arena.c index 32afd0cf..bd10de3b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1196,8 +1196,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) if (config_stats) { bin->stats.nruns++; bin->stats.curruns++; - if (bin->stats.curruns > bin->stats.highruns) - bin->stats.highruns = bin->stats.curruns; } return (run); } @@ -1401,12 +1399,6 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } if (config_prof) arena_prof_accum(arena, size); @@ -1477,12 +1469,6 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); @@ -1762,7 +1748,6 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].highruns += arena->stats.lstats[i].highruns; lstats[i].curruns += arena->stats.lstats[i].curruns; } malloc_mutex_unlock(&arena->lock); @@ -1781,7 +1766,6 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } bstats[i].nruns += bin->stats.nruns; bstats[i].reruns += bin->stats.reruns; - bstats[i].highruns += bin->stats.highruns; bstats[i].curruns += bin->stats.curruns; malloc_mutex_unlock(&bin->lock); } @@ -1835,12 +1819,6 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); } @@ -1909,13 +1887,6 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns > arena->stats.lstats[(size >> - PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].highruns = arena->stats.lstats[(size >> - PAGE_SHIFT) - 1].curruns; - } } malloc_mutex_unlock(&arena->lock); return (false); diff --git a/src/ctl.c b/src/ctl.c index 6d0423fa..e33ce67d 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -136,13 +136,11 @@ CTL_PROTO(stats_arenas_i_bins_j_nfills) CTL_PROTO(stats_arenas_i_bins_j_nflushes) CTL_PROTO(stats_arenas_i_bins_j_nruns) CTL_PROTO(stats_arenas_i_bins_j_nreruns) -CTL_PROTO(stats_arenas_i_bins_j_highruns) CTL_PROTO(stats_arenas_i_bins_j_curruns) INDEX_PROTO(stats_arenas_i_bins_j) CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) CTL_PROTO(stats_arenas_i_lruns_j_nrequests) -CTL_PROTO(stats_arenas_i_lruns_j_highruns) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) CTL_PROTO(stats_arenas_i_nthreads) @@ -322,7 +320,6 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { @@ -337,7 +334,6 @@ static const ctl_node_t stats_arenas_i_lruns_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)}, {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { @@ -482,7 +478,6 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].highruns += astats->lstats[i].highruns; sstats->lstats[i].curruns += astats->lstats[i].curruns; } @@ -498,7 +493,6 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) } sstats->bstats[i].nruns += astats->bstats[i].nruns; sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].highruns += astats->bstats[i].highruns; sstats->bstats[i].curruns += astats->bstats[i].curruns; } } @@ -1351,8 +1345,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_highruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) @@ -1373,8 +1365,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_highruns, - ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) const ctl_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) diff --git a/src/stats.c b/src/stats.c index ad8cd13d..1e907823 100644 --- a/src/stats.c +++ b/src/stats.c @@ -161,12 +161,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "bins: bin size regs pgs allocated nmalloc" " ndalloc nrequests nfills nflushes" - " newruns reruns maxruns curruns\n"); + " newruns reruns curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns maxruns" - " curruns\n"); + " ndalloc newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { @@ -182,7 +181,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; uint64_t reruns; - size_t highruns, curruns; + size_t curruns; if (gap_start != UINT_MAX) { if (j > gap_start + 1) { @@ -220,8 +219,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns, - size_t); CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, size_t); if (config_tcache) { @@ -229,27 +226,26 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? "Q" : j < ntbins_ + nqbins + ncbins ? "C" : "S", reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, highruns, - curruns); + nfills, nflushes, nruns, reruns, curruns); } else { malloc_cprintf(write_cb, cbopaque, "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", j, j < ntbins_ ? "T" : j < ntbins_ + nqbins ? "Q" : j < ntbins_ + nqbins + ncbins ? "C" : "S", reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nruns, reruns, - highruns, curruns); + curruns); } } } @@ -276,11 +272,11 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "large: size pages nmalloc ndalloc nrequests" - " maxruns curruns\n"); + " curruns\n"); CTL_GET("arenas.nlruns", &nlruns, size_t); for (j = 0, gap_start = -1; j < nlruns; j++) { uint64_t nmalloc, ndalloc, nrequests; - size_t run_size, highruns, curruns; + size_t run_size, curruns; CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, uint64_t); @@ -293,8 +289,6 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, gap_start = j; } else { CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns, - size_t); CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, size_t); if (gap_start != -1) { @@ -304,9 +298,9 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, } malloc_cprintf(write_cb, cbopaque, "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", + " %12zu\n", run_size, run_size / pagesize, nmalloc, ndalloc, - nrequests, highruns, curruns); + nrequests, curruns); } } if (gap_start != -1) From e7a1058aaa6b2cbdd19da297bf2250f86dcdac89 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 17:36:52 -0800 Subject: [PATCH 019/205] Fix bin->runcur management. Fix an interaction between arena_dissociate_bin_run() and arena_bin_lower_run() that made it possible for bin->runcur to point to a run other than the lowest non-full run. This bug violated jemalloc's layout policy, but did not affect correctness. --- src/arena.c | 140 ++++++++++++++++++++++++++++------------------------ 1 file changed, 75 insertions(+), 65 deletions(-) diff --git a/src/arena.c b/src/arena.c index bd10de3b..33f3f85e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -142,6 +142,10 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); +static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); +static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); +static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); +static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, @@ -1142,33 +1146,73 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, dirty); } +static arena_run_t * +arena_bin_runs_first(arena_bin_t *bin) +{ + arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + return (run); + } + + return (NULL); +} + +static void +arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + + arena_run_tree_insert(&bin->runs, mapelm); +} + +static void +arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + + arena_run_tree_remove(&bin->runs, mapelm); +} + +static arena_run_t * +arena_bin_nonfull_run_tryget(arena_bin_t *bin) +{ + arena_run_t *run = arena_bin_runs_first(bin); + if (run != NULL) { + arena_bin_runs_remove(bin, run); + if (config_stats) + bin->stats.reruns++; + } + return (run); +} + static arena_run_t * arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) { - arena_chunk_map_t *mapelm; arena_run_t *run; size_t binind; arena_bin_info_t *bin_info; /* Look for a usable run. */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); - if (config_stats) - bin->stats.reruns++; + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) return (run); - } /* No existing runs have any space available. */ binind = arena_bin_index(arena, bin); @@ -1205,24 +1249,9 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) * sufficient memory available while this one dropped bin->lock above, * so search one more time. */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); - if (config_stats) - bin->stats.reruns++; + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) return (run); - } return (NULL); } @@ -1587,16 +1616,12 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, arena_bin_info_t *bin_info = &arena_bin_info[binind]; if (bin_info->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; /* * This block's conditional is necessary because if the * run only contains one region, then it never gets * inserted into the non-full runs tree. */ - arena_run_tree_remove(&bin->runs, run_mapelm); + arena_bin_runs_remove(bin, run); } } } @@ -1660,34 +1685,19 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { /* - * Make sure that bin->runcur always refers to the lowest non-full run, - * if one exists. + * Make sure that if bin->runcur is non-NULL, it refers to the lowest + * non-full run. It is okay to NULL runcur out rather than proactively + * keeping it pointing at the lowest non-full run. */ - if (bin->runcur == NULL) - bin->runcur = run; - else if ((uintptr_t)run < (uintptr_t)bin->runcur) { + if ((uintptr_t)run < (uintptr_t)bin->runcur) { /* Switch runcur. */ - if (bin->runcur->nfree > 0) { - arena_chunk_t *runcur_chunk = - CHUNK_ADDR2BASE(bin->runcur); - size_t runcur_pageind = (((uintptr_t)bin->runcur - - (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *runcur_mapelm = - &runcur_chunk->map[runcur_pageind-map_bias]; - - /* Insert runcur. */ - arena_run_tree_insert(&bin->runs, runcur_mapelm); - } + if (bin->runcur->nfree > 0) + arena_bin_runs_insert(bin, bin->runcur); bin->runcur = run; - } else { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - - assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); - arena_run_tree_insert(&bin->runs, run_mapelm); - } + if (config_stats) + bin->stats.reruns++; + } else + arena_bin_runs_insert(bin, run); } void From 0b526ff94da7e59aa947a4d3529b2376794f8b01 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 18:04:26 -0800 Subject: [PATCH 020/205] Remove the opt.lg_prof_tcmax option. Remove the opt.lg_prof_tcmax option and hard-code a cache size of 1024. This setting is something that users just shouldn't have to worry about. If lock contention actually ends up being a problem, the simple solution available to the user is to reduce sampling frequency. --- doc/jemalloc.xml.in | 26 ++------------------------ include/jemalloc/internal/prof.h | 5 +++-- src/ctl.c | 5 +---- src/jemalloc.c | 2 -- src/prof.c | 8 ++------ src/stats.c | 12 ------------ 6 files changed, 8 insertions(+), 50 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 4c7023b5..2e5f10e3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -938,8 +938,6 @@ malloc_conf = "xmalloc:true";]]> option for probabilistic sampling control. See the opt.prof_accum option for control of cumulative sample reporting. See the opt.lg_prof_tcmax - option for control of per thread backtrace caching. See the opt.lg_prof_interval option for information on interval-triggered profile dumping, and the opt.prof_gdump @@ -1017,28 +1015,8 @@ malloc_conf = "xmalloc:true";]]> dumps enabled/disabled. If this option is enabled, every unique backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. See the - opt.lg_prof_tcmax - option for control of per thread backtrace caching, which has important - interactions. This option is enabled by default. - - - - - opt.lg_prof_tcmax - (ssize_t) - r- - [] - - Maximum per thread backtrace cache (log base 2) used - for heap profiling. A backtrace can only be discarded if the - opt.prof_accum - option is disabled, and no thread caches currently refer to the - backtrace. Therefore, a backtrace cache limit should be imposed if the - intention is to limit how much memory is used by backtraces. By - default, no limit is imposed (encoded as -1). - + cumulative counts are not always of interest. This option is enabled + by default. diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 98f96546..ad8bcd2c 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -12,7 +12,9 @@ typedef struct prof_tdata_s prof_tdata_t; #define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 -#define LG_PROF_TCMAX_DEFAULT -1 + +/* Maximum number of backtraces to store in each per thread LRU cache. */ +#define PROF_TCMAX 1024 /* * Hard limit on stack backtrace depth. Note that the version of @@ -167,7 +169,6 @@ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ extern char opt_prof_prefix[PATH_MAX + 1]; /* diff --git a/src/ctl.c b/src/ctl.c index e33ce67d..12b41857 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -80,7 +80,6 @@ CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) -CTL_PROTO(opt_lg_prof_tcmax) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -222,8 +221,7 @@ static const ctl_node_t opt_node[] = { {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} + {NAME("prof_accum"), CTL(opt_prof_accum)} }; static const ctl_node_t arenas_bin_i_node[] = { @@ -1133,7 +1131,6 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 796c8158..d2a6009f 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -603,8 +603,6 @@ malloc_conf_init(void) CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, - (sizeof(size_t) << 3) - 1) CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(prof_gdump) diff --git a/src/prof.c b/src/prof.c index 113cf15a..a4012f04 100644 --- a/src/prof.c +++ b/src/prof.c @@ -22,7 +22,6 @@ ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; bool opt_prof_leak = false; bool opt_prof_accum = true; -ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; @@ -519,8 +518,7 @@ prof_lookup(prof_bt_t *bt) prof_leave(); /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) - == (ZU(1) << opt_lg_prof_tcmax)) { + if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* * Flush the least recently used cnt in order to keep @@ -535,9 +533,7 @@ prof_lookup(prof_bt_t *bt) prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ } else { - assert(opt_lg_prof_tcmax < 0 || - ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << - opt_lg_prof_tcmax)); + assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { diff --git a/src/stats.c b/src/stats.c index 1e907823..86a48c60 100644 --- a/src/stats.c +++ b/src/stats.c @@ -515,7 +515,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_active) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_tcmax) OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) OPT_WRITE_BOOL(prof_leak) @@ -622,17 +621,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s((1U << sv), 10, s)); write_cb(cbopaque, "\n"); - CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); - write_cb(cbopaque, - "Maximum per thread backtrace cache: "); - if (ssv >= 0) { - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); - CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); From 5389146191b279ca3b90028357dd6ad66b283def Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 13 Feb 2012 18:23:41 -0800 Subject: [PATCH 021/205] Remove the opt.lg_prof_bt_max option. Remove opt.lg_prof_bt_max, and hard code it to 7. The original intention of this option was to enable faster backtracing by limiting backtrace depth. However, this makes graphical pprof output very difficult to interpret. In practice, decreasing sampling frequency is a better mechanism for limiting profiling overhead. --- doc/jemalloc.xml.in | 16 ---------------- include/jemalloc/internal/prof.h | 30 ++++++++++-------------------- src/ctl.c | 3 --- src/jemalloc.c | 2 -- src/prof.c | 22 ++++++++-------------- src/stats.c | 6 ------ 6 files changed, 18 insertions(+), 61 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 2e5f10e3..1e8c8005 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -930,8 +930,6 @@ malloc_conf = "xmalloc:true";]]> where <prefix> is controlled by the opt.prof_prefix option. See the opt.lg_prof_bt_max - option for backtrace depth control. See the opt.prof_active option for on-the-fly activation/deactivation. See the opt.lg_prof_sample @@ -962,17 +960,6 @@ malloc_conf = "xmalloc:true";]]> jeprof. - - - opt.lg_prof_bt_max - (size_t) - r- - [] - - Maximum backtrace depth (log base 2) when profiling - memory allocation activity. The default is 128 (2^7). - - opt.prof_active @@ -1067,9 +1054,6 @@ malloc_conf = "xmalloc:true";]]> atexit 3 function to report memory leaks detected by allocation sampling. See the - opt.lg_prof_bt_max - option for backtrace depth control. See the opt.prof option for information on analyzing heap profile output. This option is disabled by default. diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index ad8bcd2c..744d361e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -9,25 +9,19 @@ typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 #define LG_PROF_INTERVAL_DEFAULT -1 +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + /* Maximum number of backtraces to store in each per thread LRU cache. */ #define PROF_TCMAX 1024 -/* - * Hard limit on stack backtrace depth. Note that the version of - * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers. - */ -#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) -# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) -#else -# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ -#endif -#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) - /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 @@ -163,7 +157,6 @@ extern bool opt_prof; * to notice state changes. */ extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ @@ -186,9 +179,6 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -/* (1U << opt_lg_prof_bt_max). */ -extern unsigned prof_bt_max; - /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ #ifndef NO_TLS extern __thread prof_tdata_t *prof_tdata_tls @@ -213,7 +203,7 @@ extern __thread prof_tdata_t *prof_tdata_tls extern pthread_key_t prof_tdata_tsd; void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); void prof_idump(void); bool prof_mdump(const char *filename); @@ -249,7 +239,7 @@ bool prof_boot2(void); /* Don't bother with sampling logic, since sampling */\ /* interval is 1. */\ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else { \ if (prof_tdata->threshold == 0) { \ @@ -272,7 +262,7 @@ bool prof_boot2(void); if (size >= prof_tdata->threshold - \ prof_tdata->accum) { \ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ diff --git a/src/ctl.c b/src/ctl.c index 12b41857..4938e10a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -74,7 +74,6 @@ CTL_PROTO(opt_lg_tcache_gc_sweep) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) -CTL_PROTO(opt_lg_prof_bt_max) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) @@ -216,7 +215,6 @@ static const ctl_node_t opt_node[] = { {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, - {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, @@ -1125,7 +1123,6 @@ CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_CGEN(config_prof, opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index d2a6009f..81829fe7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -597,8 +597,6 @@ malloc_conf_init(void) if (config_prof) { CONF_HANDLE_BOOL(prof) CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, - LG_PROF_BT_MAX) CONF_HANDLE_BOOL(prof_active) CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) diff --git a/src/prof.c b/src/prof.c index a4012f04..21b60e3e 100644 --- a/src/prof.c +++ b/src/prof.c @@ -16,7 +16,6 @@ bool opt_prof = false; bool opt_prof_active = true; -size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; @@ -27,8 +26,6 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -unsigned prof_bt_max; - #ifndef NO_TLS __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); @@ -179,7 +176,7 @@ prof_leave(void) #ifdef JEMALLOC_PROF_LIBUNWIND void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { unw_context_t uc; unw_cursor_t cursor; @@ -189,7 +186,6 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); - assert(max <= (1U << opt_lg_prof_bt_max)); unw_getcontext(&uc); unw_init_local(&cursor, &uc); @@ -205,7 +201,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) * Iterate over stack frames until there are no more, or until no space * remains in bt. */ - for (i = 0; i < max; i++) { + for (i = 0; i < PROF_BT_MAX; i++) { unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); bt->len++; err = unw_step(&cursor); @@ -243,9 +239,9 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) } void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { - prof_unwind_data_t data = {bt, nignore, max}; + prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; cassert(config_prof); @@ -253,10 +249,10 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #elif (defined(JEMALLOC_PROF_GCC)) void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { #define BT_FRAME(i) \ - if ((i) < nignore + max) { \ + if ((i) < nignore + PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ @@ -272,7 +268,6 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) cassert(config_prof); assert(nignore <= 3); - assert(max <= (1U << opt_lg_prof_bt_max)); BT_FRAME(0) BT_FRAME(1) @@ -423,7 +418,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) } #else void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +prof_backtrace(prof_bt_t *bt, unsigned nignore) { cassert(config_prof); @@ -1168,7 +1163,7 @@ prof_tdata_init(void) } ql_new(&prof_tdata->lru_ql); - prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); + prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); if (prof_tdata->vec == NULL) { ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); @@ -1270,7 +1265,6 @@ prof_boot2(void) abort(); } - prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); diff --git a/src/stats.c b/src/stats.c index 86a48c60..6d9ba9d0 100644 --- a/src/stats.c +++ b/src/stats.c @@ -511,7 +511,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_SIZE_T(lg_prof_bt_max) OPT_WRITE_BOOL(prof_active) OPT_WRITE_SSIZE_T(lg_prof_sample) OPT_WRITE_BOOL(prof_accum) @@ -616,11 +615,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { - CTL_GET("opt.lg_prof_bt_max", &sv, size_t); - write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, u2s((1U << sv), 10, s)); - write_cb(cbopaque, "\n"); - CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); From b172610317babc7f365584ddd7fdaf4eb8d9d04c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 16:50:47 -0800 Subject: [PATCH 022/205] Simplify small size class infrastructure. Program-generate small size class tables for all valid combinations of LG_TINY_MIN, LG_QUANTUM, and PAGE_SHIFT. Use the appropriate table to generate all relevant data structures, and remove the distinction between tiny/quantum/cacheline/subpage bins. Remove --enable-dynamic-page-shift. This option didn't prove useful in practice, and it prevented optimizations. Add Tilera architecture support. --- .gitignore | 1 + INSTALL | 8 - configure.ac | 24 +- doc/jemalloc.xml.in | 198 ++-------- include/jemalloc/internal/arena.h | 98 +---- include/jemalloc/internal/atomic.h | 4 +- include/jemalloc/internal/ctl.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 98 ++--- include/jemalloc/internal/mb.h | 9 +- include/jemalloc/internal/size_classes.sh | 132 +++++++ include/jemalloc/internal/tcache.h | 16 +- include/jemalloc/jemalloc_defs.h.in | 3 - src/arena.c | 367 ++---------------- src/ctl.c | 69 +--- src/jemalloc.c | 53 +-- src/stats.c | 72 +--- src/tcache.c | 31 +- 17 files changed, 327 insertions(+), 858 deletions(-) create mode 100755 include/jemalloc/internal/size_classes.sh diff --git a/.gitignore b/.gitignore index 1a9bb068..e6e8bb00 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ /lib/ /Makefile /include/jemalloc/internal/jemalloc_internal\.h +/include/jemalloc/internal/size_classes\.h /include/jemalloc/jemalloc\.h /include/jemalloc/jemalloc_defs\.h /test/jemalloc_test\.h diff --git a/INSTALL b/INSTALL index 9124ac34..0ddcacb7 100644 --- a/INSTALL +++ b/INSTALL @@ -113,14 +113,6 @@ any of the following arguments (not a definitive list) to 'configure': rather than a minimal allocation. See the "opt.sysv" option documentation for usage details. ---enable-dynamic-page-shift - Under most conditions, the system page size never changes (usually 4KiB or - 8KiB, depending on architecture and configuration), and unless this option - is enabled, jemalloc assumes that page size can safely be determined during - configuration and hard-coded. Enabling dynamic page size determination has - a measurable impact on performance, since the compiler is forced to load - the page size from memory rather than embedding immediate values. - --enable-lazy-lock Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid diff --git a/configure.ac b/configure.ac index fdbf1bad..91caef46 100644 --- a/configure.ac +++ b/configure.ac @@ -367,8 +367,10 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" @@ -640,23 +642,6 @@ if test "x$enable_sysv" = "x1" ; then fi AC_SUBST([enable_sysv]) -dnl Do not determine page shift at run time by default. -AC_ARG_ENABLE([dynamic_page_shift], - [AS_HELP_STRING([--enable-dynamic-page-shift], - [Determine page size at run time (don't trust configure result)])], -[if test "x$enable_dynamic_page_shift" = "xno" ; then - enable_dynamic_page_shift="0" -else - enable_dynamic_page_shift="1" -fi -], -[enable_dynamic_page_shift="0"] -) -if test "x$enable_dynamic_page_shift" = "x1" ; then - AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ]) -fi -AC_SUBST([enable_dynamic_page_shift]) - AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include @@ -866,6 +851,11 @@ dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL +AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ + mkdir -p "include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" +]) + dnl Process .in files. AC_SUBST([cfghdrs_in]) AC_SUBST([cfghdrs_out]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 1e8c8005..cfe120fb 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -458,20 +458,11 @@ for (i = 0; i < nbins; i++) { a frontier and free list to track which regions are in use. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least - sizeof(void *). Allocation requests - that are more than half the quantum, but no more than the minimum - cacheline-multiple size class (see the opt.lg_qspace_max - option) are rounded up to the nearest multiple of the quantum. Allocation - requests that are more than the minimum cacheline-multiple size class, but - no more than the minimum subpage-multiple size class (see the opt.lg_cspace_max - option) are rounded up to the nearest multiple of the cacheline size (64). - Allocation requests that are more than the minimum subpage-multiple size - class, but no more than the maximum subpage-multiple size class are rounded - up to the nearest multiple of the subpage size (256). Allocation requests - that are more than the maximum subpage-multiple size class, but small - enough to fit in an arena-managed chunk (see the sizeof(double). All other small + object size classes are multiples of the quantum, spaced such that internal + fragmentation is limited to approximately 25% for all but the smallest size + classes. Allocation requests that are larger than the maximum small size + class, but small enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are rounded up to the nearest run size. Allocation requests that are too large to fit in an arena-managed chunk are rounded up to the nearest multiple of @@ -507,16 +498,28 @@ for (i = 0; i < nbins; i++) { [8] - Quantum-spaced + 16-spaced [16, 32, 48, ..., 128] - Cacheline-spaced - [192, 256, 320, ..., 512] + 32-spaced + [160, 192, 224, 256] - Subpage-spaced - [768, 1024, 1280, ..., 3840] + 64-spaced + [320, 384, 448, 512] + + + 128-spaced + [640, 768, 896, 1024] + + + 256-spaced + [1280, 1536, 1792, 2048] + + + 512-spaced + [2560, 3072, 3584] Large @@ -714,30 +717,6 @@ for (i = 0; i < nbins; i++) { - - - opt.lg_qspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the quantum (8 or 16 bytes, depending on architecture). - Above this size, cacheline spacing is used for size classes. The - default value is 128 bytes (2^7). - - - - - opt.lg_cspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the cacheline size (64). Above this size, subpage spacing - (256 bytes) is used for size classes. The default value is 512 bytes - (2^9). - - opt.lg_chunk @@ -1178,24 +1157,6 @@ malloc_conf = "xmalloc:true";]]> Quantum size. - - - arenas.cacheline - (size_t) - r- - - Assumed cacheline size. - - - - - arenas.subpage - (size_t) - r- - - Subpage size class interval. - - arenas.pagesize @@ -1214,80 +1175,6 @@ malloc_conf = "xmalloc:true";]]> Chunk size. - - - arenas.tspace_min - (size_t) - r- - - Minimum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.tspace_max - (size_t) - r- - - Maximum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.qspace_min - (size_t) - r- - - Minimum quantum-spaced size class. - - - - - arenas.qspace_max - (size_t) - r- - - Maximum quantum-spaced size class. - - - - - arenas.cspace_min - (size_t) - r- - - Minimum cacheline-spaced size class. - - - - - arenas.cspace_max - (size_t) - r- - - Maximum cacheline-spaced size class. - - - - - arenas.sspace_min - (size_t) - r- - - Minimum subpage-spaced size class. - - - - - arenas.sspace_max - (size_t) - r- - - Maximum subpage-spaced size class. - - arenas.tcache_max @@ -1298,52 +1185,13 @@ malloc_conf = "xmalloc:true";]]> Maximum thread-cached size class. - - - arenas.ntbins - (unsigned) - r- - - Number of tiny bin size classes. - - - - - arenas.nqbins - (unsigned) - r- - - Number of quantum-spaced bin size - classes. - - - - - arenas.ncbins - (unsigned) - r- - - Number of cacheline-spaced bin size - classes. - - - - - arenas.nsbins - (unsigned) - r- - - Number of subpage-spaced bin size - classes. - - arenas.nbins (unsigned) r- - Total number of bin size classes. + Number of bin size classes. diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 4a87ef54..16c2b1e6 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1,39 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -/* - * Subpages are an artificially designated partitioning of pages. Their only - * purpose is to support subpage-spaced size classes. - * - * There must be at least 4 subpages per page, due to the way size classes are - * handled. - */ -#define LG_SUBPAGE 8 -#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) -#define SUBPAGE_MASK (SUBPAGE - 1) - -/* Return the smallest subpage multiple that is >= s. */ -#define SUBPAGE_CEILING(s) \ - (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) - -/* Smallest size class to support. */ -#define LG_TINY_MIN 3 -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Maximum size class that is a multiple of the quantum, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_QSPACE_MAX_DEFAULT 7 - -/* - * Maximum size class that is a multiple of the cacheline, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_CSPACE_MAX_DEFAULT 9 - /* * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized * as small as possible such that this setting is still honored, without @@ -364,75 +331,26 @@ struct arena_s { arena_avail_tree_t runs_avail_clean; arena_avail_tree_t runs_avail_dirty; - /* - * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. - * - * bins[i] | size | - * --------+--------+ - * 0 | 8 | - * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | - * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | - * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | - * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | - * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | - * --------+--------+ - */ - arena_bin_t bins[1]; /* Dynamically sized. */ + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern size_t opt_lg_qspace_max; -extern size_t opt_lg_cspace_max; extern ssize_t opt_lg_dirty_mult; /* * small_size2bin is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, * and all accesses are via the SMALL_SIZE2BIN macro. */ -extern uint8_t const *small_size2bin; +extern uint8_t const small_size2bin[]; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) -extern arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ - /* Number of (2^n)-spaced tiny bins. */ -#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -extern unsigned nqbins; /* Number of quantum-spaced bins. */ -extern unsigned ncbins; /* Number of cacheline-spaced bins. */ -extern unsigned nsbins; /* Number of subpage-spaced bins. */ -extern unsigned nbins; -#define tspace_max ((size_t)(QUANTUM >> 1)) -#define qspace_min QUANTUM -extern size_t qspace_max; -extern size_t cspace_min; -extern size_t cspace_max; -extern size_t sspace_min; -extern size_t sspace_max; -#define small_maxclass sspace_max +extern arena_bin_info_t arena_bin_info[NBINS]; +/* Number of large size classes. */ #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); @@ -457,7 +375,7 @@ void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); -bool arena_boot(void); +void arena_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -478,7 +396,7 @@ JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { size_t binind = bin - arena->bins; - assert(binind < nbins); + assert(binind < NBINS); return (binind); } @@ -633,7 +551,7 @@ arena_malloc(size_t size, bool zero) assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); - if (size <= small_maxclass) { + if (size <= SMALL_MAXCLASS) { if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 9a298623..8c685939 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -70,7 +70,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -133,7 +133,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); } -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index de4b9412..28be2aef 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -40,7 +40,7 @@ struct ctl_arena_stats_s { uint64_t ndalloc_small; uint64_t nrequests_small; - malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ }; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 971336ec..f43fcd20 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -229,33 +229,48 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# ifdef __i386__ +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390x__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif #endif #define QUANTUM ((size_t)(1U << LG_QUANTUM)) @@ -291,15 +306,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - #ifdef PAGE_SHIFT # undef PAGE_SHIFT #endif @@ -309,16 +318,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #ifdef PAGE_MASK # undef PAGE_MASK #endif - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif +#define PAGE_SHIFT STATIC_PAGE_SHIFT +#define PAGE_SIZE STATIC_PAGE_SIZE +#define PAGE_MASK STATIC_PAGE_MASK /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ @@ -327,6 +329,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -352,6 +355,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -455,6 +459,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -480,6 +485,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -525,7 +531,7 @@ JEMALLOC_INLINE size_t s2u(size_t size) { - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) return (PAGE_CEILING(size)); @@ -570,7 +576,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) + if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h index dc9f2a54..3cfa7872 100644 --- a/include/jemalloc/internal/mb.h +++ b/include/jemalloc/internal/mb.h @@ -54,7 +54,7 @@ mb_write(void) ); #endif } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void mb_write(void) { @@ -87,6 +87,13 @@ mb_write(void) : "memory" /* Clobbers. */ ); } +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} #else /* * This is much slower than a simple memory barrier, but the semantics of mutex diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh new file mode 100755 index 00000000..d8306a58 --- /dev/null +++ b/include/jemalloc/internal/size_classes.sh @@ -0,0 +1,132 @@ +#!/bin/sh + +# The following limits are chosen such that they cover all supported platforms. + +# Range of quanta. +lg_qmin=3 +lg_qmax=4 + +# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. +lg_tmin=3 + +# Range of page sizes. +lg_pmin=12 +lg_pmax=16 + +function pow2() { + e=$1 + pow2_result=1 + while [ ${e} -gt 0 ] ; do + pow2_result=`expr ${pow2_result} + ${pow2_result}` + e=`expr ${e} - 1` + done +} + +cat < 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 717682d7..b964a12e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -91,7 +91,7 @@ extern __thread tcache_t *tcache_tls extern pthread_key_t tcache_tsd; /* - * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ extern size_t nhbins; @@ -181,7 +181,7 @@ tcache_event(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low * water mark. */ - if (binind < nbins) { + if (binind < NBINS) { tcache_bin_flush_small(tbin, binind, tbin->ncached - tbin->low_water + (tbin->low_water >> 2), tcache); @@ -238,7 +238,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tcache_bin_t *tbin; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); if (ret == NULL) { @@ -275,7 +275,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) size = PAGE_CEILING(size); assert(size <= tcache_maxclass); - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -328,7 +328,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr) <= small_maxclass); + assert(arena_salloc(ptr) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -339,7 +339,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); - assert(binind < nbins); + assert(binind < NBINS); if (config_fill && opt_junk) memset(ptr, 0x5a, arena_bin_info[binind].reg_size); @@ -367,13 +367,13 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > small_maxclass); + assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; if (config_fill && opt_junk) memset(ptr, 0x5a, size); diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 66da6f3d..53e85208 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -104,9 +104,6 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT diff --git a/src/arena.c b/src/arena.c index 33f3f85e..72b7f449 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,128 +4,38 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; -size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; -uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; +arena_bin_info_t arena_bin_info[NBINS]; -/* Various bin-related settings. */ -unsigned nqbins; -unsigned ncbins; -unsigned nsbins; -unsigned nbins; -size_t qspace_max; -size_t cspace_min; -size_t cspace_max; -size_t sspace_min; -size_t sspace_max; - -size_t lg_mspace; -size_t mspace_mask; - -/* - * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. - */ +JEMALLOC_ATTR(aligned(CACHELINE)) +const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) -/* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. - */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { -#if (LG_QUANTUM == 4) -/* 16-byte quantum **********************/ - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 - S2B_16(S2B_QMIN + 1) /* 32 */ - S2B_16(S2B_QMIN + 2) /* 48 */ - S2B_16(S2B_QMIN + 3) /* 64 */ - S2B_16(S2B_QMIN + 4) /* 80 */ - S2B_16(S2B_QMIN + 5) /* 96 */ - S2B_16(S2B_QMIN + 6) /* 112 */ - S2B_16(S2B_QMIN + 7) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 8) -#else -/* 8-byte quantum ***********************/ -# define S2B_QMIN 0 - S2B_8(S2B_QMIN + 0) /* 8 */ - S2B_8(S2B_QMIN + 1) /* 16 */ - S2B_8(S2B_QMIN + 2) /* 24 */ - S2B_8(S2B_QMIN + 3) /* 32 */ - S2B_8(S2B_QMIN + 4) /* 40 */ - S2B_8(S2B_QMIN + 5) /* 48 */ - S2B_8(S2B_QMIN + 6) /* 56 */ - S2B_8(S2B_QMIN + 7) /* 64 */ - S2B_8(S2B_QMIN + 8) /* 72 */ - S2B_8(S2B_QMIN + 9) /* 80 */ - S2B_8(S2B_QMIN + 10) /* 88 */ - S2B_8(S2B_QMIN + 11) /* 96 */ - S2B_8(S2B_QMIN + 12) /* 104 */ - S2B_8(S2B_QMIN + 13) /* 112 */ - S2B_8(S2B_QMIN + 14) /* 120 */ - S2B_8(S2B_QMIN + 15) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 16) -#endif -/****************************************/ - S2B_64(S2B_CMIN + 0) /* 192 */ - S2B_64(S2B_CMIN + 1) /* 256 */ - S2B_64(S2B_CMIN + 2) /* 320 */ - S2B_64(S2B_CMIN + 3) /* 384 */ - S2B_64(S2B_CMIN + 4) /* 448 */ - S2B_64(S2B_CMIN + 5) /* 512 */ -# define S2B_SMIN (S2B_CMIN + 6) - S2B_256(S2B_SMIN + 0) /* 768 */ - S2B_256(S2B_SMIN + 1) /* 1024 */ - S2B_256(S2B_SMIN + 2) /* 1280 */ - S2B_256(S2B_SMIN + 3) /* 1536 */ - S2B_256(S2B_SMIN + 4) /* 1792 */ - S2B_256(S2B_SMIN + 5) /* 2048 */ - S2B_256(S2B_SMIN + 6) /* 2304 */ - S2B_256(S2B_SMIN + 7) /* 2560 */ - S2B_256(S2B_SMIN + 8) /* 2816 */ - S2B_256(S2B_SMIN + 9) /* 3072 */ - S2B_256(S2B_SMIN + 10) /* 3328 */ - S2B_256(S2B_SMIN + 11) /* 3584 */ - S2B_256(S2B_SMIN + 12) /* 3840 */ -#if (STATIC_PAGE_SHIFT == 13) - S2B_256(S2B_SMIN + 13) /* 4096 */ - S2B_256(S2B_SMIN + 14) /* 4352 */ - S2B_256(S2B_SMIN + 15) /* 4608 */ - S2B_256(S2B_SMIN + 16) /* 4864 */ - S2B_256(S2B_SMIN + 17) /* 5120 */ - S2B_256(S2B_SMIN + 18) /* 5376 */ - S2B_256(S2B_SMIN + 19) /* 5632 */ - S2B_256(S2B_SMIN + 20) /* 5888 */ - S2B_256(S2B_SMIN + 21) /* 6144 */ - S2B_256(S2B_SMIN + 22) /* 6400 */ - S2B_256(S2B_SMIN + 23) /* 6656 */ - S2B_256(S2B_SMIN + 24) /* 6912 */ - S2B_256(S2B_SMIN + 25) /* 7168 */ - S2B_256(S2B_SMIN + 26) /* 7424 */ - S2B_256(S2B_SMIN + 27) /* 7680 */ - S2B_256(S2B_SMIN + 28) /* 7936 */ -#endif -}; -#undef S2B_1 -#undef S2B_2 -#undef S2B_4 +#define S2B_512(i) S2B_256(i) S2B_256(i) +#define S2B_1024(i) S2B_512(i) S2B_512(i) +#define S2B_2048(i) S2B_1024(i) S2B_1024(i) +#define S2B_4096(i) S2B_2048(i) S2B_2048(i) +#define S2B_8192(i) S2B_4096(i) S2B_4096(i) +#define SIZE_CLASS(bin, delta, size) \ + S2B_##delta(bin) + SIZE_CLASSES #undef S2B_8 #undef S2B_16 #undef S2B_32 #undef S2B_64 #undef S2B_128 #undef S2B_256 -#undef S2B_QMIN -#undef S2B_CMIN -#undef S2B_SMIN +#undef S2B_512 +#undef S2B_1024 +#undef S2B_2048 +#undef S2B_4096 +#undef S2B_8192 +#undef SIZE_CLASS +}; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -160,12 +70,9 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool small_size2bin_init(void); -static void small_size2bin_validate(void); -static bool small_size2bin_init_hard(void); static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size); -static bool bin_info_init(void); +static void bin_info_init(void); /******************************************************************************/ @@ -1368,7 +1275,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) size_t binind; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); bin = &arena->bins[binind]; size = arena_bin_info[binind].reg_size; @@ -1553,12 +1460,12 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); - assert(size <= small_maxclass); + assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } @@ -1594,7 +1501,7 @@ arena_salloc_demote(const void *ptr) CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < nbins); + assert(binind < NBINS); ret = arena_bin_info[binind].reg_size; } assert(ret != 0); @@ -1762,7 +1669,7 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } malloc_mutex_unlock(&arena->lock); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); @@ -1963,10 +1870,10 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, * Avoid moving the allocation if the size class can be left the same. */ if (oldsize <= arena_maxclass) { - if (oldsize <= small_maxclass) { + if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size == oldsize); - if ((size + extra <= small_maxclass && + if ((size + extra <= SMALL_MAXCLASS && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { @@ -1978,7 +1885,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } } else { assert(size <= arena_maxclass); - if (size + extra > small_maxclass) { + if (size + extra > SMALL_MAXCLASS) { if (arena_ralloc_large(ptr, oldsize, size, extra, zero) == false) return (ptr); @@ -2083,7 +1990,7 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) return (true); @@ -2096,119 +2003,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -static void -small_size2bin_validate(void) -{ - size_t i, size, binind; - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i++) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i++) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i++) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Sub-page. */ - for (; i <= sspace_max; i++) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) - >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); - } -} - -static bool -small_size2bin_init(void) -{ - - if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) - return (small_size2bin_init_hard()); - - small_size2bin = const_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -} - -static bool -small_size2bin_init_hard(void) -{ - size_t i, size, binind; - uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] - - assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); - - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); - if (custom_small_size2bin == NULL) - return (true); - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i += TINY_MIN) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> - LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - - small_size2bin = custom_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - /* * Calculate bin_info->run_size such that it meets the following constraints: * @@ -2330,104 +2124,27 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) return (good_run_size); } -static bool +static void bin_info_init(void) { arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; + size_t prev_run_size = PAGE_SIZE; - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - return (false); +#define SIZE_CLASS(bin, delta, size) \ + bin_info = &arena_bin_info[bin]; \ + bin_info->reg_size = size; \ + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + SIZE_CLASSES +#undef SIZE_CLASS } -bool +void arena_boot(void) { size_t header_size; unsigned i; - /* Set variables according to the value of opt_lg_[qc]space_max. */ - qspace_max = (1U << opt_lg_qspace_max); - cspace_min = CACHELINE_CEILING(qspace_max); - if (cspace_min == qspace_max) - cspace_min += CACHELINE; - cspace_max = (1U << opt_lg_cspace_max); - sspace_min = SUBPAGE_CEILING(cspace_max); - if (sspace_min == cspace_max) - sspace_min += SUBPAGE; - assert(sspace_min < PAGE_SIZE); - sspace_max = PAGE_SIZE - SUBPAGE; - - assert(LG_QUANTUM >= LG_TINY_MIN); - assert(ntbins <= LG_QUANTUM); - nqbins = qspace_max >> LG_QUANTUM; - ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; - nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1; - nbins = ntbins + nqbins + ncbins + nsbins; - - /* - * The small_size2bin lookup table uses uint8_t to encode each bin - * index, so we cannot support more than 256 small size classes. This - * limit is difficult to exceed (not even possible with 16B quantum and - * 4KiB pages), and such configurations are impractical, but - * nonetheless we need to protect against this case in order to avoid - * undefined behavior. - * - * Further constrain nbins to 255 if prof_promote is true, since all - * small size classes, plus a "not small" size class must be stored in - * 8 bits of arena_chunk_map_t's bits field. - */ - if (config_prof && opt_prof && prof_promote && nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } else if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); - } - /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2451,11 +2168,5 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - - return (false); + bin_info_init(); } diff --git a/src/ctl.c b/src/ctl.c index 4938e10a..0beeb3d0 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -47,7 +47,6 @@ CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) CTL_PROTO(config_debug) CTL_PROTO(config_dss) -CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_prof) @@ -59,8 +58,6 @@ CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -88,23 +85,9 @@ INDEX_PROTO(arenas_lrun_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_cacheline) -CTL_PROTO(arenas_subpage) CTL_PROTO(arenas_pagesize) CTL_PROTO(arenas_chunksize) -CTL_PROTO(arenas_tspace_min) -CTL_PROTO(arenas_tspace_max) -CTL_PROTO(arenas_qspace_min) -CTL_PROTO(arenas_qspace_max) -CTL_PROTO(arenas_cspace_min) -CTL_PROTO(arenas_cspace_max) -CTL_PROTO(arenas_sspace_min) -CTL_PROTO(arenas_sspace_max) CTL_PROTO(arenas_tcache_max) -CTL_PROTO(arenas_ntbins) -CTL_PROTO(arenas_nqbins) -CTL_PROTO(arenas_ncbins) -CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) @@ -185,7 +168,6 @@ static const ctl_node_t thread_node[] = { static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, - {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("prof"), CTL(config_prof)}, @@ -200,8 +182,6 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -250,23 +230,9 @@ static const ctl_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("cacheline"), CTL(arenas_cacheline)}, - {NAME("subpage"), CTL(arenas_subpage)}, {NAME("pagesize"), CTL(arenas_pagesize)}, {NAME("chunksize"), CTL(arenas_chunksize)}, - {NAME("tspace_min"), CTL(arenas_tspace_min)}, - {NAME("tspace_max"), CTL(arenas_tspace_max)}, - {NAME("qspace_min"), CTL(arenas_qspace_min)}, - {NAME("qspace_max"), CTL(arenas_qspace_max)}, - {NAME("cspace_min"), CTL(arenas_cspace_min)}, - {NAME("cspace_max"), CTL(arenas_cspace_max)}, - {NAME("sspace_min"), CTL(arenas_sspace_min)}, - {NAME("sspace_max"), CTL(arenas_sspace_max)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("ntbins"), CTL(arenas_ntbins)}, - {NAME("nqbins"), CTL(arenas_nqbins)}, - {NAME("ncbins"), CTL(arenas_ncbins)}, - {NAME("nsbins"), CTL(arenas_nsbins)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, {NAME("bin"), CHILD(arenas_bin)}, @@ -397,12 +363,6 @@ static bool ctl_arena_init(ctl_arena_stats_t *astats) { - if (astats->bstats == NULL) { - astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins * - sizeof(malloc_bin_stats_t)); - if (astats->bstats == NULL) - return (true); - } if (astats->lstats == NULL) { astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * sizeof(malloc_large_stats_t)); @@ -425,7 +385,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->nmalloc_small = 0; astats->ndalloc_small = 0; astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); } @@ -439,7 +399,7 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; cstats->nmalloc_small += cstats->bstats[i].nmalloc; cstats->ndalloc_small += cstats->bstats[i].ndalloc; @@ -477,7 +437,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->lstats[i].curruns += astats->lstats[i].curruns; } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { sstats->bstats[i].allocated += astats->bstats[i].allocated; sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; @@ -1092,7 +1052,6 @@ CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_dynamic_page_shift) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) CTL_RO_BOOL_CONFIG_GEN(config_prof) @@ -1107,8 +1066,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1138,7 +1095,7 @@ const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { - if (i > nbins) + if (i > NBINS) return (NULL); return (super_arenas_bin_i_node); } @@ -1182,24 +1139,10 @@ RETURN: } CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) -CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) -CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) -CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) -CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) -CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) -CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) -CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) -CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) -CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) -CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) -CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) +CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) @@ -1346,7 +1289,7 @@ const ctl_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { - if (j > nbins) + if (j > NBINS) return (NULL); return (super_stats_arenas_i_bins_j_node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 81829fe7..08e5f31c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -95,9 +95,7 @@ arenas_extend(unsigned ind) { arena_t *ret; - /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(offsetof(arena_t, bins) - + (sizeof(arena_bin_t) * nbins)); + ret = (arena_t *)base_alloc(sizeof(arena_t)); if (ret != NULL && arena_new(ret, ind) == false) { arenas[ind] = ret; return (ret); @@ -563,10 +561,6 @@ malloc_conf_init(void) } CONF_HANDLE_BOOL(abort) - CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, - PAGE_SHIFT-1) - CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, - PAGE_SHIFT-1) /* * Chunks always require at least one * header page, * plus one data page. @@ -613,14 +607,6 @@ malloc_conf_init(void) #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P } - - /* Validate configuration of options that are inter-related. */ - if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { - malloc_write(": Invalid lg_[qc]space_max " - "relationship; restoring defaults\n"); - opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; - opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; - } } } @@ -709,10 +695,7 @@ malloc_init_hard(void) if (config_prof) prof_boot1(); - if (arena_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + arena_boot(); if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); @@ -893,8 +876,8 @@ JEMALLOC_P(malloc)(size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -992,10 +975,10 @@ imemalign(void **memptr, size_t alignment, size_t size) ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, + (uintptr_t)1U && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, + result = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment, NULL), alignment, false); if (result != NULL) { arena_prof_promoted(result, @@ -1091,8 +1074,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) goto RETURN; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= small_maxclass) { - ret = icalloc(small_maxclass+1); + <= SMALL_MAXCLASS) { + ret = icalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -1177,8 +1160,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1, 0, 0, + usize <= SMALL_MAXCLASS) { + ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, false, false); if (ret != NULL) arena_prof_promoted(ret, usize); @@ -1220,8 +1203,8 @@ OOM: else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) { arena_prof_promoted(ret, usize); @@ -1436,9 +1419,9 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { + SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment, NULL); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); @@ -1517,9 +1500,9 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= small_maxclass) { - q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= - size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, NULL)) <= SMALL_MAXCLASS) { + q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); if (q == NULL) goto ERR; diff --git a/src/stats.c b/src/stats.c index 6d9ba9d0..e4500dfb 100644 --- a/src/stats.c +++ b/src/stats.c @@ -159,12 +159,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc nrequests nfills nflushes" " newruns reruns curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); @@ -176,7 +176,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, if (gap_start == UINT_MAX) gap_start = j; } else { - unsigned ntbins_, nqbins, ncbins, nsbins; size_t reg_size, run_size, allocated; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; @@ -196,10 +195,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } gap_start = UINT_MAX; } - CTL_GET("arenas.ntbins", &ntbins_, unsigned); - CTL_GET("arenas.nqbins", &nqbins, unsigned); - CTL_GET("arenas.ncbins", &ncbins, unsigned); - CTL_GET("arenas.nsbins", &nsbins, unsigned); CTL_J_GET("arenas.bin.0.size", ®_size, size_t); CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); @@ -223,27 +218,19 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nrequests, nfills, nflushes, nruns, reruns, curruns); } else { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nruns, reruns, curruns); } @@ -496,8 +483,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_qspace_max) - OPT_WRITE_SIZE_T(lg_cspace_max) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) @@ -541,51 +526,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); - CTL_GET("arenas.cacheline", &sv, size_t); - write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.subpage", &sv, size_t); - write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, - NULL, 0)) == 0) { - write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - - CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - } - - CTL_GET("arenas.qspace_min", &sv, size_t); - write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.cspace_min", &sv, size_t); - write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.sspace_min", &sv, size_t); - write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, diff --git a/src/tcache.c b/src/tcache.c index 4f4ed6c6..fa05728e 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -55,7 +55,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, unsigned i, nflush, ndeferred; bool merged_stats = false; - assert(binind < nbins); + assert(binind < NBINS); assert(rem <= tbin->ncached); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { @@ -152,7 +152,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } @@ -185,7 +185,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; malloc_mutex_unlock(&arena->lock); @@ -220,7 +220,7 @@ tcache_create(arena_t *arena) */ size = (size + CACHELINE_MASK) & (-CACHELINE); - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) tcache = (tcache_t *)arena_malloc_small(arena, size, true); else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); @@ -266,7 +266,7 @@ tcache_destroy(tcache_t *tcache) tcache_stats_merge(tcache, tcache->arena); } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; tcache_bin_flush_small(tbin, i, 0, tcache); @@ -287,7 +287,7 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[i - nbins].nrequests += + arena->stats.lstats[i - NBINS].nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(&arena->lock); } @@ -300,7 +300,7 @@ tcache_destroy(tcache_t *tcache) } tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { + if (tcache_size <= SMALL_MAXCLASS) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -357,7 +357,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) unsigned i; /* Merge and reset tcache stats. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; tcache_bin_t *tbin = &tcache->tbins[i]; malloc_mutex_lock(&bin->lock); @@ -367,7 +367,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } for (; i < nhbins; i++) { - malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; + malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; tcache_bin_t *tbin = &tcache->tbins[i]; arena->stats.nrequests_large += tbin->tstats.nrequests; lstats->nrequests += tbin->tstats.nrequests; @@ -384,17 +384,18 @@ tcache_boot(void) /* * If necessary, clamp opt_lg_tcache_max, now that - * small_maxclass and arena_maxclass are known. + * SMALL_MAXCLASS and arena_maxclass are known. + * XXX Can this be done earlier? */ if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < small_maxclass) - tcache_maxclass = small_maxclass; + opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * @@ -402,7 +403,7 @@ tcache_boot(void) if (tcache_bin_info == NULL) return (true); stack_nelms = 0; - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = @@ -421,7 +422,7 @@ tcache_boot(void) /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == + NBINS) + (((1U << opt_lg_tcache_gc_sweep) % NBINS == 0) ? 0 : 1); } else tcache_gc_incr = 0; From f081b88dfbce94c3c7c8faf0b0f91b117fbdfcc6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:24:05 -0800 Subject: [PATCH 023/205] Fix realloc(p, 0) to act like free(p). Reported by Yoni Londer. --- src/jemalloc.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 08e5f31c..865c6236 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1124,23 +1124,28 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) ; if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (ptr != NULL) { - if (config_prof || config_stats) - old_size = isalloc(ptr); - if (config_prof && opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } - idalloc(ptr); - } else if (config_prof && opt_prof) { - old_ctx = NULL; + if (ptr != NULL) { + /* realloc(ptr, 0) is equivalent to free(p). */ + if (config_prof || config_stats) + old_size = isalloc(ptr); + if (config_prof && opt_prof) { + old_ctx = prof_ctx_get(ptr); cnt = NULL; } + idalloc(ptr); ret = NULL; goto RETURN; + } else { + if (config_sysv == false || opt_sysv == false) + size = 1; + else { + if (config_prof && opt_prof) { + old_ctx = NULL; + cnt = NULL; + } + ret = NULL; + goto RETURN; + } } } @@ -1188,6 +1193,7 @@ OOM: errno = ENOMEM; } } else { + /* realloc(NULL, size) is equivalent to malloc(size). */ if (config_prof && opt_prof) old_ctx = NULL; if (malloc_init()) { From c90ad71237c05473bcb968beddebb0a487c36e75 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:31:37 -0800 Subject: [PATCH 024/205] Remove the sysv option. --- INSTALL | 5 -- configure.ac | 17 ------ doc/jemalloc.xml.in | 26 --------- .../jemalloc/internal/jemalloc_internal.h.in | 8 --- include/jemalloc/jemalloc_defs.h.in | 3 -- src/ctl.c | 6 --- src/jemalloc.c | 54 +++---------------- src/stats.c | 1 - 8 files changed, 7 insertions(+), 113 deletions(-) diff --git a/INSTALL b/INSTALL index 0ddcacb7..f9c8a369 100644 --- a/INSTALL +++ b/INSTALL @@ -108,11 +108,6 @@ any of the following arguments (not a definitive list) to 'configure': errors, as is commonly implemented by "xmalloc" wrapper function for malloc. See the "opt.xmalloc" option documentation for usage details. ---enable-sysv - Enable support for System V semantics, wherein malloc(0) returns NULL - rather than a minimal allocation. See the "opt.sysv" option documentation - for usage details. - --enable-lazy-lock Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid diff --git a/configure.ac b/configure.ac index 91caef46..3fa24919 100644 --- a/configure.ac +++ b/configure.ac @@ -626,22 +626,6 @@ if test "x$enable_xmalloc" = "x1" ; then fi AC_SUBST([enable_xmalloc]) -dnl Do not support the SYSV option by default. -AC_ARG_ENABLE([sysv], - [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])], -[if test "x$enable_sysv" = "xno" ; then - enable_sysv="0" -else - enable_sysv="1" -fi -], -[enable_sysv="0"] -) -if test "x$enable_sysv" = "x1" ; then - AC_DEFINE([JEMALLOC_SYSV], [ ]) -fi -AC_SUBST([enable_sysv]) - AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include @@ -911,7 +895,6 @@ AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) -AC_MSG_RESULT([sysv : ${enable_sysv}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index cfe120fb..e7cc6284 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -662,16 +662,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.sysv - (bool) - r- - - was specified during - build configuration. - - config.tcache @@ -808,22 +798,6 @@ for (i = 0; i < nbins; i++) { - - - opt.sysv - (bool) - r- - [] - - If enabled, attempting to allocate zero bytes will - return a NULL pointer instead of a valid pointer. - (The default behavior is to make a minimal allocation and return a - pointer to it.) This option is provided for System V compatibility. - This option is incompatible with the opt.xmalloc option. - This option is disabled by default. - - opt.xmalloc diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f43fcd20..35c05f4e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -104,13 +104,6 @@ static const bool config_stats = false #endif ; -static const bool config_sysv = -#ifdef JEMALLOC_SYSV - true -#else - false -#endif - ; static const bool config_tcache = #ifdef JEMALLOC_TCACHE true @@ -385,7 +378,6 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; -extern bool opt_sysv; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 53e85208..1360364a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -98,9 +98,6 @@ /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC -/* Support SYSV semantics. */ -#undef JEMALLOC_SYSV - /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK diff --git a/src/ctl.c b/src/ctl.c index 0beeb3d0..0fabd852 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -53,7 +53,6 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_sysv) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) @@ -64,7 +63,6 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) -CTL_PROTO(opt_sysv) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_gc_sweep) @@ -174,7 +172,6 @@ static const ctl_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("sysv"), CTL(config_sysv)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, {NAME("xmalloc"), CTL(config_xmalloc)} @@ -188,7 +185,6 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, - {NAME("sysv"), CTL(opt_sysv)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, @@ -1058,7 +1054,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_sysv) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) @@ -1072,7 +1067,6 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) -CTL_RO_NL_CGEN(config_sysv, opt_sysv, opt_sysv, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, diff --git a/src/jemalloc.c b/src/jemalloc.c index 865c6236..cc0188c1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -48,7 +48,6 @@ bool opt_junk = false; bool opt_abort = false; bool opt_junk = false; #endif -bool opt_sysv = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -575,9 +574,6 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(junk) CONF_HANDLE_BOOL(zero) } - if (config_sysv) { - CONF_HANDLE_BOOL(sysv) - } if (config_xmalloc) { CONF_HANDLE_BOOL(xmalloc) } @@ -854,19 +850,8 @@ JEMALLOC_P(malloc)(size_t size) goto OOM; } - if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in malloc(): " - "invalid size 0\n"); - abort(); - } - ret = NULL; - goto RETURN; - } - } + if (size == 0) + size = 1; if (config_prof && opt_prof) { usize = s2u(size); @@ -931,22 +916,8 @@ imemalign(void **memptr, size_t alignment, size_t size) if (malloc_init()) result = NULL; else { - if (size == 0) { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid size " - "0\n"); - abort(); - } - result = NULL; - *memptr = NULL; - ret = 0; - goto RETURN; - } - } + if (size == 0) + size = 1; /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 @@ -1047,8 +1018,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size) num_size = num * size; if (num_size == 0) { - if ((config_sysv == false || opt_sysv == false) - && ((num == 0) || (size == 0))) + if (num == 0 || size == 0) num_size = 1; else { ret = NULL; @@ -1135,18 +1105,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) idalloc(ptr); ret = NULL; goto RETURN; - } else { - if (config_sysv == false || opt_sysv == false) - size = 1; - else { - if (config_prof && opt_prof) { - old_ctx = NULL; - cnt = NULL; - } - ret = NULL; - goto RETURN; - } - } + } else + size = 1; } if (ptr != NULL) { diff --git a/src/stats.c b/src/stats.c index e4500dfb..f6851a01 100644 --- a/src/stats.c +++ b/src/stats.c @@ -489,7 +489,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(sysv) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) From d073a321091800e71ea56f98701253dc0969d879 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:41:16 -0800 Subject: [PATCH 025/205] Enable the stats configuration option by default. --- INSTALL | 4 ++-- configure.ac | 5 ++--- src/jemalloc.c | 2 -- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/INSTALL b/INSTALL index f9c8a369..b84fb1bf 100644 --- a/INSTALL +++ b/INSTALL @@ -62,8 +62,8 @@ any of the following arguments (not a definitive list) to 'configure': Enable assertions and validation code. This incurs a substantial performance hit, but is very useful during application development. ---enable-stats - Enable statistics gathering functionality. See the "opt.stats_print" +--disable-stats + Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. --enable-prof diff --git a/configure.ac b/configure.ac index 3fa24919..78f7c8e5 100644 --- a/configure.ac +++ b/configure.ac @@ -423,7 +423,7 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then fi fi -dnl Do not enable statistics calculation by default. +dnl Enable statistics calculation by default. AC_ARG_ENABLE([stats], [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], [if test "x$enable_stats" = "xno" ; then @@ -432,7 +432,7 @@ else enable_stats="1" fi ], -[enable_stats="0"] +[enable_stats="1"] ) if test "x$enable_stats" = "x1" ; then AC_DEFINE([JEMALLOC_STATS], [ ]) @@ -896,7 +896,6 @@ AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([dss : ${enable_dss}]) -AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) AC_MSG_RESULT([===============================================================================]) diff --git a/src/jemalloc.c b/src/jemalloc.c index cc0188c1..a3a9a70a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -882,8 +882,6 @@ OOM: } errno = ENOMEM; } - -RETURN: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { From 777c191485452251fbecfe6638a4a54c651e25b3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 20:49:22 -0800 Subject: [PATCH 026/205] Enable support for junk/zero filling by default. --- INSTALL | 4 ++-- configure.ac | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/INSTALL b/INSTALL index b84fb1bf..4d3e1af9 100644 --- a/INSTALL +++ b/INSTALL @@ -99,8 +99,8 @@ any of the following arguments (not a definitive list) to 'configure': Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). ---enable-fill - Enable support for junk/zero filling of memory. See the "opt.junk"/ +--disable-fill + Disable support for junk/zero filling of memory. See the "opt.junk"/ "opt.zero" option documentation for usage details. --enable-xmalloc diff --git a/configure.ac b/configure.ac index 78f7c8e5..b503f646 100644 --- a/configure.ac +++ b/configure.ac @@ -425,7 +425,8 @@ fi dnl Enable statistics calculation by default. AC_ARG_ENABLE([stats], - [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], + [AS_HELP_STRING([--disable-stats], + [Disable statistics calculation/reporting])], [if test "x$enable_stats" = "xno" ; then enable_stats="0" else @@ -594,16 +595,16 @@ if test "x$enable_dss" = "x1" ; then fi AC_SUBST([enable_dss]) -dnl Do not support the junk/zero filling option by default. +dnl Support the junk/zero filling option by default. AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])], + [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])], [if test "x$enable_fill" = "xno" ; then enable_fill="0" else enable_fill="1" fi ], -[enable_fill="0"] +[enable_fill="1"] ) if test "x$enable_fill" = "x1" ; then AC_DEFINE([JEMALLOC_FILL], [ ]) From 3add8d8cda2993f58fd2eba6efbf4fa12d5c72f3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:08:19 -0800 Subject: [PATCH 027/205] Remove unused variables in tcache_dalloc_large(). Submitted by Mike Hommey. --- include/jemalloc/internal/tcache.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index b964a12e..77bca8d9 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -360,9 +360,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) JEMALLOC_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - arena_t *arena; - arena_chunk_t *chunk; - size_t pageind, binind; + size_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -370,9 +368,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = NBINS + (size >> PAGE_SHIFT) - 1; if (config_fill && opt_junk) From bdcadf41e961a3c6bbb37d8d24e4b68a27f2b952 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:11:03 -0800 Subject: [PATCH 028/205] Remove unused variable in arena_run_split(). Submitted by Mike Hommey. --- src/arena.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 72b7f449..c14cb2c2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,12 +188,11 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, bool zero) { arena_chunk_t *chunk; - size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; + size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - old_ndirty = chunk->ndirty; run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; From 93c023d181269fd47ca3f8e773509bb5bd779684 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:13:12 -0800 Subject: [PATCH 029/205] Remove unused variables in stats_print(). Submitted by Mike Hommey. --- src/stats.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/stats.c b/src/stats.c index f6851a01..2f61e7bc 100644 --- a/src/stats.c +++ b/src/stats.c @@ -582,7 +582,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } if (config_stats) { - size_t sszp, ssz; size_t *cactive; size_t allocated, active, mapped; size_t chunks_current, chunks_high; @@ -590,9 +589,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t huge_allocated; uint64_t huge_nmalloc, huge_ndalloc; - sszp = sizeof(size_t *); - ssz = sizeof(size_t); - CTL_GET("stats.cactive", &cactive, size_t *); CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); From 5965631636c620fba2eb33698accee75fd207aab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2012 21:37:38 -0800 Subject: [PATCH 030/205] Do not enforce minimum alignment in memalign(). Do not enforce minimum alignment in memalign(). This is a non-standard function, and there is disagreement over whether to enforce minimum alignment. Solaris documentation (whence memalign() originated) says that minimum alignment is required: The value of alignment must be a power of two and must be greater than or equal to the size of a word. However, Linux's manual page says in its NOTES section: memalign() may not check that the boundary parameter is correct. This is descriptive rather than prescriptive, but applications with bad assumptions about memalign() exist, so be as forgiving as possible. Reported by Mike Hommey. --- src/jemalloc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index a3a9a70a..535efaa1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -68,7 +68,8 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen); static void malloc_conf_init(void); static bool malloc_init_hard(void); -static int imemalign(void **memptr, size_t alignment, size_t size); +static int imemalign(void **memptr, size_t alignment, size_t size, + bool enforce_min_alignment); /******************************************************************************/ /* malloc_message() setup. */ @@ -900,7 +901,8 @@ JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(noinline) #endif static int -imemalign(void **memptr, size_t alignment, size_t size) +imemalign(void **memptr, size_t alignment, size_t size, + bool enforce_min_alignment) { int ret; size_t usize; @@ -919,7 +921,7 @@ imemalign(void **memptr, size_t alignment, size_t size) /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 - || alignment < sizeof(void *)) { + || (enforce_min_alignment && alignment < sizeof(void *))) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in " "posix_memalign(): invalid alignment\n"); @@ -991,7 +993,7 @@ int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) { - return imemalign(memptr, alignment, size); + return imemalign(memptr, alignment, size, true); } JEMALLOC_ATTR(malloc) @@ -1249,7 +1251,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) = NULL #endif ; - imemalign(&ret, alignment, size); + imemalign(&ret, alignment, size, false); return (ret); } #endif @@ -1265,7 +1267,7 @@ JEMALLOC_P(valloc)(size_t size) = NULL #endif ; - imemalign(&ret, PAGE_SIZE, size); + imemalign(&ret, PAGE_SIZE, size, false); return (ret); } #endif From 4bb09830133ffa8b27a95bc3727558007722c152 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 10:37:27 -0800 Subject: [PATCH 031/205] Use glibc allocator hooks. When jemalloc is used as a libc malloc replacement (i.e. not prefixed), some particular setups may end up inconsistently calling malloc from libc and free from jemalloc, or the other way around. glibc provides hooks to make its functions use alternative implementations. Use them. Submitted by Karl Tomlinson and Mike Hommey. --- .../jemalloc/internal/jemalloc_internal.h.in | 4 ---- src/jemalloc.c | 24 +++++++++++++++++++ src/mutex.c | 4 ++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 35c05f4e..c21c218b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -144,10 +144,6 @@ static const bool config_ivsalloc = #include #endif -#ifdef JEMALLOC_LAZY_LOCK -#include -#endif - #define RB_COMPACT #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" diff --git a/src/jemalloc.c b/src/jemalloc.c index 535efaa1..ccc3a209 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1272,6 +1272,30 @@ JEMALLOC_P(valloc)(size_t size) } #endif +#if defined(__GLIBC__) && !defined(__UCLIBC__) +/* + * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible + * to inconsistently reference libc's malloc(3)-compatible functions + * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). + * + * These definitions interpose hooks in glibc.  The functions are actually + * passed an extra argument for the caller return address, which will be + * ignored. + */ +JEMALLOC_ATTR(visibility("default")) +void (* const __free_hook)(void *ptr) = JEMALLOC_P(free); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __malloc_hook)(size_t size) = JEMALLOC_P(malloc); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __realloc_hook)(void *ptr, size_t size) = JEMALLOC_P(realloc); + +JEMALLOC_ATTR(visibility("default")) +void *(* const __memalign_hook)(size_t alignment, size_t size) = + JEMALLOC_P(memalign); +#endif + #endif /* JEMALLOC_PREFIX */ /* * End non-standard override functions. diff --git a/src/mutex.c b/src/mutex.c index ca89ef1c..0e09060e 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,6 +1,10 @@ #define JEMALLOC_MUTEX_C_ #include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_LAZY_LOCK +#include +#endif + /******************************************************************************/ /* Data. */ From 7e15dab94d3f008b0a6c296ad7afec9ed47ff1ac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 12:56:37 -0800 Subject: [PATCH 032/205] Add nallocm(). Add nallocm(), which computes the real allocation size that would result from the corresponding allocm() call. nallocm() is a functional superset of OS X's malloc_good_size(), in that it takes alignment constraints into account. --- doc/jemalloc.xml.in | 38 ++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 4 - include/jemalloc/jemalloc.h.in | 1 + src/jemalloc.c | 22 +++++ test/allocm.c | 91 ++++++++++++++++--- 5 files changed, 131 insertions(+), 25 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e7cc6284..6aa412a1 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -41,6 +41,7 @@ rallocm sallocm dallocm + nallocm --> general purpose memory allocation functions @@ -154,6 +155,12 @@ void *ptr int flags + + int nallocm + size_t *rsize + size_t size + int flags + @@ -301,8 +308,9 @@ for (i = 0; i < nbins; i++) { The allocm, rallocm, - sallocm, and - dallocm functions all have a + sallocm, + dallocm, and + nallocm functions all have a flags argument that can be used to specify options. The functions only check the options that are contextually relevant. Use bitwise or (|) operations to @@ -351,7 +359,9 @@ for (i = 0; i < nbins; i++) { least size bytes of memory, sets *ptr to the base address of the allocation, and sets *rsize to the real size of the allocation if - rsize is not NULL. + rsize is not NULL. Behavior + is undefined if size is + 0. The rallocm function resizes the allocation at *ptr to be at least @@ -364,7 +374,8 @@ for (i = 0; i < nbins; i++) { language="C">size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is - undefined if (size + + undefined if size is 0, or if + (size + extra > SIZE_T_MAX). @@ -374,6 +385,15 @@ for (i = 0; i < nbins; i++) { The dallocm function causes the memory referenced by ptr to be made available for future allocations. + + The nallocm function allocates no + memory, but it performs the same size computation as the + allocm function, and if + rsize is not NULL it sets + *rsize to the real size of the allocation that + would result from the equivalent allocm + function call. Behavior is undefined if + size is 0. @@ -1857,11 +1877,13 @@ malloc_conf = "xmalloc:true";]]> Experimental API The allocm, rallocm, - sallocm, and - dallocm functions return + sallocm, + dallocm, and + nallocm functions return ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm and - rallocm functions will fail if: + error value. The allocm, + rallocm, and + nallocm functions will fail if: ALLOCM_ERR_OOM diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c21c218b..aa073467 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -548,10 +548,6 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 96 | 1100000 | 32 * 144 | 10100000 | 32 * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. */ usize = (size + (alignment - 1)) & (-alignment); /* diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 580a5ec5..428c0d39 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -59,6 +59,7 @@ int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags); #ifdef __cplusplus }; diff --git a/src/jemalloc.c b/src/jemalloc.c index ccc3a209..34fd1aa0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1586,6 +1586,28 @@ JEMALLOC_P(dallocm)(void *ptr, int flags) return (ALLOCM_SUCCESS); } +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (ALLOCM_ERR_OOM); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + if (usize == 0) + return (ALLOCM_ERR_OOM); + + if (rsize != NULL) + *rsize = usize; + return (ALLOCM_SUCCESS); +} + /* * End non-standard functions. */ diff --git a/test/allocm.c b/test/allocm.c index 59d0002e..762e350c 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -15,24 +15,33 @@ main(void) { int r; void *p; - size_t sz, alignment, total, tsz; + size_t nsz, rsz, sz, alignment, total; unsigned i; void *ps[NITER]; fprintf(stderr, "Test begin\n"); - sz = 0; - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + sz = 42; + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, 0); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected nallocm() error\n"); + abort(); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } - if (sz < 42) + if (rsz < sz) fprintf(stderr, "Real size smaller than expected\n"); + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, 42, 0); + r = JEMALLOC_P(allocm)(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); @@ -40,11 +49,20 @@ main(void) if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected nallocm() error\n"); + abort(); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); @@ -55,12 +73,22 @@ main(void) alignment = 0x80000000LU; sz = 0x80000000LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 alignment = 0x4000000000000000LLU; @@ -69,7 +97,12 @@ main(void) alignment = 0x40000000LU; sz = 0x84000001LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected nallocm() error\n"); + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -82,12 +115,22 @@ main(void) #else sz = 0xfffffff0LU; #endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + rsz = 0; + r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } + if (nsz != rsz) + fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -101,21 +144,43 @@ main(void) sz < 3 * alignment && sz < (1U << 31); sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { - r = JEMALLOC_P(allocm)(&ps[i], NULL, sz, + nsz = 0; + r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, - "Error for size %zu (0x%zx): %d\n", + "nallocm() error for size %zu" + " (0x%zx): %d\n", sz, sz, r); exit(1); } + rsz = 0; + r = JEMALLOC_P(allocm)(&ps[i], &rsz, sz, + ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, + "allocm() error for size %zu" + " (0x%zx): %d\n", + sz, sz, r); + exit(1); + } + if (rsz < sz) { + fprintf(stderr, + "Real size smaller than" + " expected\n"); + } + if (nsz != rsz) { + fprintf(stderr, + "nallocm()/allocm() rsize" + " mismatch\n"); + } if ((uintptr_t)p & (alignment-1)) { fprintf(stderr, "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } - JEMALLOC_P(sallocm)(ps[i], &tsz, 0); - total += tsz; + JEMALLOC_P(sallocm)(ps[i], &rsz, 0); + total += rsz; if (total >= (MAXALIGN << 1)) break; } From 166a745b395198c2b0d661caa717e6a9400291c6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 29 Feb 2012 12:58:39 -0800 Subject: [PATCH 033/205] Simplify zone_good_size(). Simplify zone_good_size() to avoid memory allocation. Submitted by Mike Hommey. --- src/zone.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/zone.c b/src/zone.c index 2c1b2318..07f88617 100644 --- a/src/zone.c +++ b/src/zone.c @@ -133,22 +133,10 @@ zone_destroy(malloc_zone_t *zone) static size_t zone_good_size(malloc_zone_t *zone, size_t size) { - size_t ret; - void *p; - /* - * Actually create an object of the appropriate size, then find out - * how large it could have been without moving up to the next size - * class. - */ - p = JEMALLOC_P(malloc)(size); - if (p != NULL) { - ret = isalloc(p); - JEMALLOC_P(free)(p); - } else - ret = size; - - return (ret); + if (size == 0) + size = 1; + return (s2u(size)); } static void From 0a5489e37da88a1a50fbf8552e0d3a7f8fd93ffc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 1 Mar 2012 17:19:20 -0800 Subject: [PATCH 034/205] Add --with-mangling. Add the --with-mangling configure option, which can be used to specify name mangling on a per public symbol basis that takes precedence over --with-jemalloc-prefix. Expose the memalign() and valloc() overrides even if --with-jemalloc-prefix is specified. This change does no real harm, and simplifies the code. --- INSTALL | 13 +++ configure.ac | 19 ++- include/jemalloc/internal/ctl.h | 6 +- .../jemalloc/internal/jemalloc_internal.h.in | 8 +- include/jemalloc/jemalloc.h.in | 110 ++++++++++++++---- include/jemalloc/jemalloc_defs.h.in | 45 ++++--- src/jemalloc.c | 65 +++++------ src/stats.c | 35 +++--- src/zone.c | 26 ++--- test/allocated.c | 26 ++--- test/allocm.c | 36 +++--- test/mremap.c | 3 +- test/posix_memalign.c | 16 +-- test/rallocm.c | 20 ++-- test/thread_arena.c | 13 +-- 15 files changed, 265 insertions(+), 176 deletions(-) diff --git a/INSTALL b/INSTALL index 4d3e1af9..6e32d715 100644 --- a/INSTALL +++ b/INSTALL @@ -26,6 +26,19 @@ any of the following arguments (not a definitive list) to 'configure': Embed one or more library paths, so that libjemalloc can find the libraries it is linked to. This works only on ELF-based systems. +--with-mangling= + Mangle public symbols specified in which is a comma-separated list of + name:mangled pairs. + + For example, to use ld's --wrap option as an alternative method for + overriding libc's malloc implementation, specify something like: + + --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...] + + Note that mangling happens prior to application of the prefix specified by + --with-jemalloc-prefix, and mangled symbols are then ignored when applying + the prefix. + --with-jemalloc-prefix= Prefix all public APIs with . For example, if is "prefix_", API changes like the following occur: diff --git a/configure.ac b/configure.ac index b503f646..81ab233a 100644 --- a/configure.ac +++ b/configure.ac @@ -303,6 +303,16 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +dnl Perform no name mangling by default. +AC_ARG_WITH([mangling], + [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], + [mangling_map="$with_mangling"], [mangling_map=""]) +for nm in `echo ${mangling_map} |tr ',' ' '` ; do + n="je_`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` + AC_DEFINE_UNQUOTED([${n}], [${m}]) +done + dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], @@ -317,8 +327,15 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) - AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) fi +dnl Generate macros to rename public symbols. All public symbols are prefixed +dnl with je_ in the source code, so these macro definitions are needed even if +dnl --with-jemalloc-prefix wasn't specified. +for stem in malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib memalign valloc allocm dallocm nallocm rallocm sallocm; do + n="je_${stem}" + m="${JEMALLOC_PREFIX}${stem}" + AC_DEFINE_UNQUOTED([${n}], [${m}]) +done dnl Do not mangle library-private APIs by default. AC_ARG_WITH([private_namespace], diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 28be2aef..8f72f7fa 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -74,7 +74,7 @@ int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ malloc_write(": Failure in xmallctl(\""); \ malloc_write(name); \ @@ -84,7 +84,7 @@ bool ctl_boot(void); } while (0) #define xmallctlnametomib(name, mibp, miblenp) do { \ - if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ malloc_write( \ ": Failure in xmallctlnametomib(\""); \ malloc_write(name); \ @@ -94,7 +94,7 @@ bool ctl_boot(void); } while (0) #define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ ": Failure in xmallctlbymib()\n"); \ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index aa073467..f13b406e 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -30,7 +30,7 @@ #include #include -#define JEMALLOC_MANGLE +#define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" #include "jemalloc/internal/private_namespace.h" @@ -149,7 +149,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" -extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); +extern void (*je_malloc_message)(void *wcbopaque, const char *s); /* * Define a custom assert() in order to reduce the chances of deadlock during @@ -618,13 +618,13 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. + * je_malloc_message(...) throughout the code. */ JEMALLOC_INLINE void malloc_write(const char *s) { - JEMALLOC_P(malloc_message)(NULL, s); + je_malloc_message(NULL, s); } /* diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 428c0d39..b9301175 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -15,9 +15,6 @@ extern "C" { #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" #include "jemalloc_defs@install_suffix@.h" -#ifndef JEMALLOC_P -# define JEMALLOC_P(s) s -#endif #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 @@ -32,34 +29,99 @@ extern "C" { #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 -extern const char *JEMALLOC_P(malloc_conf); -extern void (*JEMALLOC_P(malloc_message))(void *, const char *); +/* + * The je_ prefix on the following public symbol declarations is an artifact of + * namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see below). + */ +extern const char *je_malloc_conf; +extern void (*je_malloc_message)(void *, const char *); -void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); -void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int je_posix_memalign(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(nonnull(1)); -void *JEMALLOC_P(realloc)(void *ptr, size_t size); -void JEMALLOC_P(free)(void *ptr); +void *je_realloc(void *ptr, size_t size); +void je_free(void *ptr); -size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); -void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts); -int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, - size_t *miblenp); -int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, +size_t je_malloc_usable_size(const void *ptr); +void je_malloc_stats_print(void (*write_cb)(void *, const char *), + void *je_cbopaque, const char *opts); +int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); +int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) JEMALLOC_ATTR(nonnull(1)); +int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags); +int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +int je_nallocm(size_t *rsize, size_t size, int flags); + +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +#ifndef JEMALLOC_NO_DEMANGLE +#define JEMALLOC_NO_DEMANGLE +#endif +#define malloc_conf je_malloc_conf +#define malloc_message je_malloc_message +#define malloc je_malloc +#define calloc je_calloc +#define posix_memalign je_posix_memalign +#define realloc je_realloc +#define free je_free +#define malloc_usable_size je_malloc_usable_size +#define malloc_stats_print je_malloc_stats_print +#define mallctl je_mallctl +#define mallctlnametomib je_mallctlnametomib +#define mallctlbymib je_mallctlbymib +#define memalign je_memalign +#define valloc je_valloc +#define allocm je_allocm +#define dallocm je_dallocm +#define nallocm je_nallocm +#define rallocm je_rallocm +#define sallocm je_sallocm +#endif + +/* + * The je_* macros can be used as stable alternative names for the public + * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant + * for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#undef je_allocm +#undef je_dallocm +#undef je_nallocm +#undef je_rallocm +#undef je_sallocm +#endif #ifdef __cplusplus }; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 1360364a..3d1a8d30 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -1,22 +1,35 @@ -#ifndef JEMALLOC_DEFS_H_ -#define JEMALLOC_DEFS_H_ - /* - * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. - * This makes it possible, with some care, to use multiple allocators - * simultaneously. - * - * In many cases it is more convenient to manually prefix allocator function - * calls than to let macros do it automatically, particularly when using - * multiple allocators simultaneously. Define JEMALLOC_MANGLE before - * #include'ing jemalloc.h in order to cause name mangling that corresponds to - * the API prefixing. + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. */ #undef JEMALLOC_PREFIX #undef JEMALLOC_CPREFIX -#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) -#undef JEMALLOC_P -#endif + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#undef je_allocm +#undef je_dallocm +#undef je_nallocm +#undef je_rallocm +#undef je_sallocm /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -150,5 +163,3 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG - -#endif /* JEMALLOC_DEFS_H_ */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 34fd1aa0..6e34706d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -36,7 +36,7 @@ size_t lg_pagesize; unsigned ncpus; /* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); +const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -81,7 +81,7 @@ wrtmessage(void *cbopaque, const char *s) UNUSED int result = write(STDERR_FILENO, s, strlen(s)); } -void (*JEMALLOC_P(malloc_message))(void *, const char *s) +void (*je_malloc_message)(void *, const char *s) JEMALLOC_ATTR(visibility("default")) = wrtmessage; /******************************************************************************/ @@ -230,7 +230,7 @@ stats_print_atexit(void) } } } - JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); + je_malloc_stats_print(NULL, NULL, NULL); } thread_allocated_t * @@ -422,12 +422,12 @@ malloc_conf_init(void) /* Get runtime configuration. */ switch (i) { case 0: - if (JEMALLOC_P(malloc_conf) != NULL) { + if (je_malloc_conf != NULL) { /* * Use options that were compiled into the * program. */ - opts = JEMALLOC_P(malloc_conf); + opts = je_malloc_conf; } else { /* No configuration specified. */ buf[0] = '\0'; @@ -836,7 +836,7 @@ jemalloc_darwin_init(void) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(malloc)(size_t size) +je_malloc(size_t size) { void *ret; size_t usize; @@ -990,7 +990,7 @@ RETURN: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +je_posix_memalign(void **memptr, size_t alignment, size_t size) { return imemalign(memptr, alignment, size, true); @@ -999,7 +999,7 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(calloc)(size_t num, size_t size) +je_calloc(size_t num, size_t size) { void *ret; size_t num_size; @@ -1077,7 +1077,7 @@ RETURN: JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(realloc)(void *ptr, size_t size) +je_realloc(void *ptr, size_t size) { void *ret; size_t usize; @@ -1207,7 +1207,7 @@ RETURN: JEMALLOC_ATTR(visibility("default")) void -JEMALLOC_P(free)(void *ptr) +je_free(void *ptr) { if (ptr != NULL) { @@ -1234,17 +1234,13 @@ JEMALLOC_P(free)(void *ptr) /******************************************************************************/ /* * Begin non-standard override functions. - * - * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the - * entire point is to avoid accidental mixed allocator usage. */ -#ifndef JEMALLOC_PREFIX #ifdef JEMALLOC_OVERRIDE_MEMALIGN JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(memalign)(size_t alignment, size_t size) +je_memalign(size_t alignment, size_t size) { void *ret #ifdef JEMALLOC_CC_SILENCE @@ -1260,7 +1256,7 @@ JEMALLOC_P(memalign)(size_t alignment, size_t size) JEMALLOC_ATTR(malloc) JEMALLOC_ATTR(visibility("default")) void * -JEMALLOC_P(valloc)(size_t size) +je_valloc(size_t size) { void *ret #ifdef JEMALLOC_CC_SILENCE @@ -1272,7 +1268,7 @@ JEMALLOC_P(valloc)(size_t size) } #endif -#if defined(__GLIBC__) && !defined(__UCLIBC__) +#if (!defined(JEMALLOC_PREFIX) && defined(__GLIBC__) && !defined(__UCLIBC__)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions @@ -1283,20 +1279,18 @@ JEMALLOC_P(valloc)(size_t size) * ignored. */ JEMALLOC_ATTR(visibility("default")) -void (* const __free_hook)(void *ptr) = JEMALLOC_P(free); +void (* const __free_hook)(void *ptr) = je_free; JEMALLOC_ATTR(visibility("default")) -void *(* const __malloc_hook)(size_t size) = JEMALLOC_P(malloc); +void *(* const __malloc_hook)(size_t size) = je_malloc; JEMALLOC_ATTR(visibility("default")) -void *(* const __realloc_hook)(void *ptr, size_t size) = JEMALLOC_P(realloc); +void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; JEMALLOC_ATTR(visibility("default")) -void *(* const __memalign_hook)(size_t alignment, size_t size) = - JEMALLOC_P(memalign); +void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; #endif -#endif /* JEMALLOC_PREFIX */ /* * End non-standard override functions. */ @@ -1307,7 +1301,7 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = JEMALLOC_ATTR(visibility("default")) size_t -JEMALLOC_P(malloc_usable_size)(const void *ptr) +je_malloc_usable_size(const void *ptr) { size_t ret; @@ -1325,8 +1319,8 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) JEMALLOC_ATTR(visibility("default")) void -JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts) +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) { stats_print(write_cb, cbopaque, opts); @@ -1334,7 +1328,7 @@ JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, +je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { @@ -1346,7 +1340,7 @@ JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) +je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { if (malloc_init()) @@ -1357,8 +1351,8 @@ JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) +je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) { if (malloc_init()) @@ -1385,7 +1379,7 @@ iallocm(size_t usize, size_t alignment, bool zero) JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { void *p; size_t usize; @@ -1451,8 +1445,7 @@ OOM: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, - int flags) +je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { void *p, *q; size_t usize; @@ -1544,7 +1537,7 @@ OOM: JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +je_sallocm(const void *ptr, size_t *rsize, int flags) { size_t sz; @@ -1565,7 +1558,7 @@ JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(dallocm)(void *ptr, int flags) +je_dallocm(void *ptr, int flags) { size_t usize; @@ -1588,7 +1581,7 @@ JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(visibility("default")) int -JEMALLOC_P(nallocm)(size_t *rsize, size_t size, int flags) +je_nallocm(size_t *rsize, size_t size, int flags) { size_t usize; size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) diff --git a/src/stats.c b/src/stats.c index 2f61e7bc..f905d048 100644 --- a/src/stats.c +++ b/src/stats.c @@ -108,7 +108,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = JEMALLOC_P(malloc_message); + write_cb = je_malloc_message; cbopaque = NULL; } @@ -376,8 +376,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * */ epoch = 1; u64sz = sizeof(uint64_t); - err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, - sizeof(uint64_t)); + err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); if (err != 0) { if (err == EAGAIN) { malloc_write(": Memory allocation failure in " @@ -395,7 +394,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = JEMALLOC_P(malloc_message); + write_cb = je_malloc_message; cbopaque = NULL; } @@ -448,22 +447,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "\n"); #define OPT_WRITE_BOOL(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, bv ? "true" : "false"); \ write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_SIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, u2s(sv, 10, s)); \ write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_SSIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ + == 0) { \ if (ssv >= 0) { \ write_cb(cbopaque, " opt."#n": "); \ write_cb(cbopaque, u2s(ssv, 10, s)); \ @@ -474,8 +473,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "\n"); \ } #define OPT_WRITE_CHAR_P(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ - NULL, 0)) == 0) { \ + if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ + == 0) { \ write_cb(cbopaque, " opt."#n": \""); \ write_cb(cbopaque, cpv); \ write_cb(cbopaque, "\"\n"); \ @@ -535,15 +534,15 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } - if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv, - &ssz, NULL, 0)) == 0) { + if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) + == 0) { write_cb(cbopaque, "Maximum thread-cached size class: "); write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } - if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, - &ssz, NULL, 0)) == 0) { + if ((err = je_mallctl("opt.lg_tcache_gc_sweep", &ssv, &ssz, + NULL, 0)) == 0) { size_t tcache_gc_sweep = (1U << ssv); bool tcache_enabled; CTL_GET("opt.tcache", &tcache_enabled, bool); @@ -552,8 +551,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, u2s(tcache_gc_sweep, 10, s) : "N/A"); write_cb(cbopaque, "\n"); } - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0 && bv) { + if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && + bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); write_cb(cbopaque, "Average profile sample interval: "); write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); diff --git a/src/zone.c b/src/zone.c index 07f88617..5beed5f3 100644 --- a/src/zone.c +++ b/src/zone.c @@ -67,14 +67,14 @@ static void * zone_malloc(malloc_zone_t *zone, size_t size) { - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); } static void * zone_calloc(malloc_zone_t *zone, size_t num, size_t size) { - return (JEMALLOC_P(calloc)(num, size)); + return (je_calloc(num, size)); } static void * @@ -82,7 +82,7 @@ zone_valloc(malloc_zone_t *zone, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + je_posix_memalign(&ret, PAGE_SIZE, size); return (ret); } @@ -91,14 +91,14 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - JEMALLOC_P(free)(ptr); + je_free(ptr); } static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - return (JEMALLOC_P(realloc)(ptr, size)); + return (je_realloc(ptr, size)); } #if (JEMALLOC_ZONE_VERSION >= 6) @@ -107,7 +107,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - JEMALLOC_P(posix_memalign)(&ret, alignment, size); + je_posix_memalign(&ret, alignment, size); return (ret); } @@ -117,7 +117,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); + je_free(ptr); } #endif @@ -208,7 +208,7 @@ ozone_free(malloc_zone_t *zone, void *ptr) { if (ivsalloc(ptr) != 0) - JEMALLOC_P(free)(ptr); + je_free(ptr); else { size_t size = szone.size(zone, ptr); if (size != 0) @@ -222,17 +222,17 @@ ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) size_t oldsize; if (ptr == NULL) - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); oldsize = ivsalloc(ptr); if (oldsize != 0) - return (JEMALLOC_P(realloc)(ptr, size)); + return (je_realloc(ptr, size)); else { oldsize = szone.size(zone, ptr); if (oldsize == 0) - return (JEMALLOC_P(malloc)(size)); + return (je_malloc(size)); else { - void *ret = JEMALLOC_P(malloc)(size); + void *ret = je_malloc(size); if (ret != NULL) { memcpy(ret, ptr, (oldsize < size) ? oldsize : size); @@ -268,7 +268,7 @@ ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) if (ivsalloc(ptr) != 0) { assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); + je_free(ptr); } else { assert(size == szone.size(zone, ptr)); szone.free_definite_size(zone, ptr, size); diff --git a/test/allocated.c b/test/allocated.c index b1e40e47..701c1754 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -20,8 +20,7 @@ thread_start(void *arg) size_t sz, usize; sz = sizeof(a0); - if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -33,8 +32,7 @@ thread_start(void *arg) exit(1); } sz = sizeof(ap0); - if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -48,8 +46,7 @@ thread_start(void *arg) assert(*ap0 == a0); sz = sizeof(d0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -61,8 +58,7 @@ thread_start(void *arg) exit(1); } sz = sizeof(dp0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL, - 0))) { + if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) { if (err == ENOENT) { #ifdef JEMALLOC_STATS assert(false); @@ -75,28 +71,28 @@ thread_start(void *arg) } assert(*dp0 == d0); - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); exit(1); } sz = sizeof(a1); - JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0); + mallctl("thread.allocated", &a1, &sz, NULL, 0); sz = sizeof(ap1); - JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0); + mallctl("thread.allocatedp", &ap1, &sz, NULL, 0); assert(*ap1 == a1); assert(ap0 == ap1); - usize = JEMALLOC_P(malloc_usable_size)(p); + usize = malloc_usable_size(p); assert(a0 + usize <= a1); - JEMALLOC_P(free)(p); + free(p); sz = sizeof(d1); - JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0); + mallctl("thread.deallocated", &d1, &sz, NULL, 0); sz = sizeof(dp1); - JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0); + mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0); assert(*dp1 == d1); assert(dp0 == dp1); diff --git a/test/allocm.c b/test/allocm.c index 762e350c..151f5749 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -23,13 +23,13 @@ main(void) sz = 42; nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, 0); + r = nallocm(&nsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected nallocm() error\n"); abort(); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, 0); + r = allocm(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); @@ -38,32 +38,32 @@ main(void) fprintf(stderr, "Real size smaller than expected\n"); if (nsz != rsz) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); - r = JEMALLOC_P(allocm)(&p, NULL, sz, 0); + r = allocm(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ZERO); + r = nallocm(&nsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected nallocm() error\n"); abort(); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ZERO); + r = allocm(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } if (nsz != rsz) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + if (dallocm(p, 0) != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 @@ -74,14 +74,14 @@ main(void) sz = 0x80000000LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for nallocm(&nsz, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -98,11 +98,11 @@ main(void) sz = 0x84000001LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected nallocm() error\n"); rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -116,14 +116,14 @@ main(void) sz = 0xfffffff0LU; #endif nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, ALLOCM_ALIGN(alignment)); + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for nallocm(&nsz, %zu, 0x%x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; - r = JEMALLOC_P(allocm)(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); + r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, "Expected error for allocm(&p, %zu, 0x%x)\n", @@ -145,7 +145,7 @@ main(void) sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { nsz = 0; - r = JEMALLOC_P(nallocm)(&nsz, sz, + r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, @@ -155,7 +155,7 @@ main(void) exit(1); } rsz = 0; - r = JEMALLOC_P(allocm)(&ps[i], &rsz, sz, + r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { fprintf(stderr, @@ -179,14 +179,14 @@ main(void) "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } - JEMALLOC_P(sallocm)(ps[i], &rsz, 0); + sallocm(ps[i], &rsz, 0); total += rsz; if (total >= (MAXALIGN << 1)) break; } for (i = 0; i < NITER; i++) { if (ps[i] != NULL) { - JEMALLOC_P(dallocm)(ps[i], 0); + dallocm(ps[i], 0); ps[i] = NULL; } } diff --git a/test/mremap.c b/test/mremap.c index 146c66f4..8d35a64e 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -17,8 +17,7 @@ main(void) fprintf(stderr, "Test begin\n"); sz = sizeof(lg_chunk); - if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL, - 0))) { + if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { assert(err != ENOENT); fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 3e306c01..789131ce 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -24,7 +24,7 @@ main(void) /* Test error conditions. */ for (alignment = 0; alignment < sizeof(void *); alignment++) { - err = JEMALLOC_P(posix_memalign)(&p, alignment, 1); + err = posix_memalign(&p, alignment, 1); if (err != EINVAL) { fprintf(stderr, "Expected error for invalid alignment %zu\n", @@ -34,7 +34,7 @@ main(void) for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { - err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1); + err = posix_memalign(&p, alignment + 1, 1); if (err == 0) { fprintf(stderr, "Expected error for invalid alignment %zu\n", @@ -49,7 +49,7 @@ main(void) alignment = 0x80000000LU; size = 0x80000000LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -63,7 +63,7 @@ main(void) alignment = 0x40000000LU; size = 0x84000001LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -76,7 +76,7 @@ main(void) #else size = 0xfffffff0LU; #endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + err = posix_memalign(&p, alignment, size); if (err == 0) { fprintf(stderr, "Expected error for posix_memalign(&p, %zu, %zu)\n", @@ -95,7 +95,7 @@ main(void) size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { - err = JEMALLOC_P(posix_memalign)(&ps[i], + err = posix_memalign(&ps[i], alignment, size); if (err) { fprintf(stderr, @@ -103,13 +103,13 @@ main(void) size, size, strerror(err)); exit(1); } - total += JEMALLOC_P(malloc_usable_size)(ps[i]); + total += malloc_usable_size(ps[i]); if (total >= (MAXALIGN << 1)) break; } for (i = 0; i < NITER; i++) { if (ps[i] != NULL) { - JEMALLOC_P(free)(ps[i]); + free(ps[i]); ps[i] = NULL; } } diff --git a/test/rallocm.c b/test/rallocm.c index ccf326bb..9c0df403 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -24,14 +24,14 @@ main(void) pagesize = (size_t)result; } - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + r = allocm(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { fprintf(stderr, "Unexpected allocm() error\n"); abort(); } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -42,7 +42,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -53,7 +53,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_ERR_NOT_MOVED) fprintf(stderr, "Unexpected rallocm() result\n"); if (q != p) @@ -64,7 +64,7 @@ main(void) } q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0); + r = rallocm(&q, &tsz, sz + 5, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -76,7 +76,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0); + r = rallocm(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q == p) @@ -88,7 +88,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0); + r = rallocm(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (tsz == sz) { @@ -98,7 +98,7 @@ main(void) p = q; sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -109,7 +109,7 @@ main(void) } sz = tsz; - r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); + r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) fprintf(stderr, "Unexpected rallocm() error\n"); if (q != p) @@ -120,7 +120,7 @@ main(void) } sz = tsz; - JEMALLOC_P(dallocm)(p, 0); + dallocm(p, 0); fprintf(stderr, "Test end\n"); return (0); diff --git a/test/thread_arena.c b/test/thread_arena.c index ef8d6817..2922d1b4 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -18,22 +18,22 @@ thread_start(void *arg) size_t size; int err; - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); return (void *)1; } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, - &main_arena_ind, sizeof(main_arena_ind)))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, + sizeof(main_arena_ind)))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -57,7 +57,7 @@ main(void) fprintf(stderr, "Test begin\n"); - p = JEMALLOC_P(malloc)(1); + p = malloc(1); if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); ret = 1; @@ -65,8 +65,7 @@ main(void) } size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, - 0))) { + if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; From 62320b85517c472beb12bf0ba69660393712f51a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 1 Mar 2012 17:53:16 -0800 Subject: [PATCH 035/205] Reorder macros. --- include/jemalloc/jemalloc.h.in | 8 ++++---- include/jemalloc/jemalloc_defs.h.in | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index b9301175..f567ee57 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -88,10 +88,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define memalign je_memalign #define valloc je_valloc #define allocm je_allocm -#define dallocm je_dallocm -#define nallocm je_nallocm #define rallocm je_rallocm #define sallocm je_sallocm +#define dallocm je_dallocm +#define nallocm je_nallocm #endif /* @@ -117,10 +117,10 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_memalign #undef je_valloc #undef je_allocm -#undef je_dallocm -#undef je_nallocm #undef je_rallocm #undef je_sallocm +#undef je_dallocm +#undef je_nallocm #endif #ifdef __cplusplus diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 3d1a8d30..444950c8 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -26,10 +26,10 @@ #undef je_memalign #undef je_valloc #undef je_allocm -#undef je_dallocm -#undef je_nallocm #undef je_rallocm #undef je_sallocm +#undef je_dallocm +#undef je_nallocm /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. From 84f7cdb0c588322dfd50a26497fc1cb54b792018 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 15:59:45 -0800 Subject: [PATCH 036/205] Rename prn to prng. Rename prn to prng so that Windows doesn't choke when trying to create a file named prn.h. --- include/jemalloc/internal/ckh.h | 2 +- include/jemalloc/internal/jemalloc_internal.h.in | 8 ++++---- include/jemalloc/internal/{prn.h => prng.h} | 8 ++++---- include/jemalloc/internal/prof.h | 6 +++--- src/ckh.c | 6 +++--- src/prof.c | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) rename include/jemalloc/internal/{prn.h => prng.h} (89%) diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 65f30f56..05d1fc03 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -42,7 +42,7 @@ struct ckh_s { /* Used for pseudo-random number generation. */ #define CKH_A 1103515241 #define CKH_C 12347 - uint32_t prn_state; + uint32_t prng_state; /* Total number of items. */ size_t count; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index f13b406e..403e4804 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -316,7 +316,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); (((s) + PAGE_MASK) & ~PAGE_MASK) #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -342,7 +342,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); #define JEMALLOC_H_STRUCTS #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -445,7 +445,7 @@ void jemalloc_prefork(void); void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -471,7 +471,7 @@ void jemalloc_postfork(void); #define JEMALLOC_H_INLINES #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" diff --git a/include/jemalloc/internal/prn.h b/include/jemalloc/internal/prng.h similarity index 89% rename from include/jemalloc/internal/prn.h rename to include/jemalloc/internal/prng.h index 0709d708..83a5462b 100644 --- a/include/jemalloc/internal/prn.h +++ b/include/jemalloc/internal/prng.h @@ -4,7 +4,7 @@ /* * Simple linear congruential pseudo-random number generator: * - * prn(y) = (a*x + c) % m + * prng(y) = (a*x + c) % m * * where the following constants ensure maximal period: * @@ -25,7 +25,7 @@ * uint32_t state : Seed value. * const uint32_t a, c : See above discussion. */ -#define prn32(r, lg_range, state, a, c) do { \ +#define prng32(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 32); \ \ @@ -34,8 +34,8 @@ r >>= (32 - lg_range); \ } while (false) -/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prn64(r, lg_range, state, a, c) do { \ +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 64); \ \ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 744d361e..e08a50ab 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -140,7 +140,7 @@ struct prof_tdata_s { void **vec; /* Sampling state. */ - uint64_t prn_state; + uint64_t prng_state; uint64_t threshold; uint64_t accum; }; @@ -245,7 +245,7 @@ bool prof_boot2(void); if (prof_tdata->threshold == 0) { \ /* Initialize. Seed the prng differently for */\ /* each thread. */\ - prof_tdata->prn_state = \ + prof_tdata->prng_state = \ (uint64_t)(uintptr_t)&size; \ prof_sample_threshold_update(prof_tdata); \ } \ @@ -307,7 +307,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * pp 500 * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ - prn64(r, 53, prof_tdata->prn_state, + prng64(r, 53, prof_tdata->prng_state, (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / diff --git a/src/ckh.c b/src/ckh.c index 4a6d1e37..bea6ef8a 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -361,7 +361,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->ninserts = 0; ckh->nrelocs = 0; #endif - ckh->prn_state = 42; /* Value doesn't really matter. */ + ckh->prng_state = 42; /* Value doesn't really matter. */ ckh->count = 0; /* diff --git a/src/prof.c b/src/prof.c index 21b60e3e..c4a2d64d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1170,7 +1170,7 @@ prof_tdata_init(void) return (NULL); } - prof_tdata->prn_state = 0; + prof_tdata->prng_state = 0; prof_tdata->threshold = 0; prof_tdata->accum = 0; From 7b398aca3bfa558774548ffed6c1a8baba46cc79 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 16:38:37 -0800 Subject: [PATCH 037/205] Add fine-grained build/install targets. --- INSTALL | 13 +++++++++++++ Makefile.in | 27 ++++++++++++++++++--------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/INSTALL b/INSTALL index 6e32d715..92cd0851 100644 --- a/INSTALL +++ b/INSTALL @@ -169,11 +169,24 @@ PATH="?" === Advanced compilation ======================================================= +To build only parts of jemalloc, use the following targets: + + build_lib_shared + build_lib_static + build_lib + build_doc_html + build_doc_man + build_doc + To install only parts of jemalloc, use the following targets: install_bin install_include + install_lib_shared + install_lib_static install_lib + install_doc_html + install_doc_man install_doc To clean up build results to varying degrees, use the following make targets: diff --git a/Makefile.in b/Makefile.in index ca4365e8..0accf102 100644 --- a/Makefile.in +++ b/Makefile.in @@ -76,9 +76,9 @@ CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ .SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) # Default target. -all: $(DSOS) $(STATIC_LIBS) +all: build -dist: doc +dist: build_doc @srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< @@ -86,9 +86,9 @@ dist: doc @srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< -doc_html: $(DOCS_HTML) -doc_man: $(DOCS_MAN3) -doc: $(DOCS) +build_doc_html: $(DOCS_HTML) +build_doc_man: $(DOCS_MAN3) +build_doc: $(DOCS) # # Include generated dependency files. @@ -140,6 +140,10 @@ else $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread endif +build_lib_shared: $(DSOS) +build_lib_static: $(STATIC_LIBS) +build: build_lib_shared build_lib_static + install_bin: install -d $(BINDIR) @for b in $(BINS); do \ @@ -154,28 +158,33 @@ install_include: install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done -install_lib: $(DSOS) $(STATIC_LIBS) +install_lib_shared: $(DSOS) install -d $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR) ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO) + +install_lib_static: $(DSOS) $(STATIC_LIBS) + install -d $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR) install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR) -install_html: +install_lib: install_lib_shared install_lib_static + +install_doc_html: install -d $(DATADIR)/doc/jemalloc@install_suffix@ @for d in $(DOCS_HTML); do \ echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \ install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \ done -install_man: +install_doc_man: install -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ echo "install -m 644 $$d $(MANDIR)/man3"; \ install -m 644 $$d $(MANDIR)/man3; \ done -install_doc: install_html install_man +install_doc: install_doc_html install_doc_man install: install_bin install_include install_lib install_doc From 7e77eaffffe5c73d44ee64b14ba4b3d7693179d6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 17:47:37 -0800 Subject: [PATCH 038/205] Add the --disable-experimental option. --- INSTALL | 3 +++ Makefile.in | 8 +++--- configure.ac | 39 ++++++++++++++++++++++------- doc/jemalloc.xml.in | 4 ++- include/jemalloc/jemalloc.h.in | 8 ++++++ include/jemalloc/jemalloc_defs.h.in | 3 +++ src/jemalloc.c | 12 ++++++++- 7 files changed, 63 insertions(+), 14 deletions(-) diff --git a/INSTALL b/INSTALL index 92cd0851..c0ae106a 100644 --- a/INSTALL +++ b/INSTALL @@ -116,6 +116,9 @@ any of the following arguments (not a definitive list) to 'configure': Disable support for junk/zero filling of memory. See the "opt.junk"/ "opt.zero" option documentation for usage details. +--disable-experimental + Disable support for the experimental API (*allocm()). + --enable-xmalloc Enable support for optional immediate termination due to out-of-memory errors, as is commonly implemented by "xmalloc" wrapper function for malloc. diff --git a/Makefile.in b/Makefile.in index 0accf102..1f1ffd33 100644 --- a/Makefile.in +++ b/Makefile.in @@ -63,10 +63,12 @@ DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \ +CTESTS := @srcroot@test/allocated.c @srcroot@test/bitmap.c \ + @srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ @srcroot@test/thread_arena.c +ifeq (@enable_experimental@, 1) +CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c +endif .PHONY: all dist doc_html doc_man doc .PHONY: install_bin install_include install_lib diff --git a/configure.ac b/configure.ac index 81ab233a..c61a665b 100644 --- a/configure.ac +++ b/configure.ac @@ -303,6 +303,34 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) +public_syms="malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" + +dnl Check for allocator-related functions that should be wrapped. +AC_CHECK_FUNC([memalign], + [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN]) + public_syms="${public_syms} memalign"]) +AC_CHECK_FUNC([valloc], + [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC]) + public_syms="${public_syms} valloc"]) + +dnl Support the experimental API by default. +AC_ARG_ENABLE([experimental], + [AS_HELP_STRING([--disable-experimental], + [Disable support for the experimental API])], +[if test "x$enable_experimental" = "xno" ; then + enable_experimental="0" +else + enable_experimental="1" +fi +], +[enable_experimental="1"] +) +if test "x$enable_experimental" = "x1" ; then + AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ]) + public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm" +fi +AC_SUBST([enable_experimental]) + dnl Perform no name mangling by default. AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], @@ -331,7 +359,7 @@ fi dnl Generate macros to rename public symbols. All public symbols are prefixed dnl with je_ in the source code, so these macro definitions are needed even if dnl --with-jemalloc-prefix wasn't specified. -for stem in malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib memalign valloc allocm dallocm nallocm rallocm sallocm; do +for stem in ${public_syms}; do n="je_${stem}" m="${JEMALLOC_PREFIX}${stem}" AC_DEFINE_UNQUOTED([${n}], [${m}]) @@ -795,14 +823,6 @@ if test "x${osspin}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSSPIN]) fi -dnl ============================================================================ -dnl Check for allocator-related functions that should be wrapped. - -AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])]) -AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])]) - dnl ============================================================================ dnl Darwin-related configuration. @@ -903,6 +923,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) +AC_MSG_RESULT([experimental : ${enable_experimental}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) AC_MSG_RESULT([stats : ${enable_stats}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 6aa412a1..ca13493a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -304,7 +304,9 @@ for (i = 0; i < nbins; i++) { Experimental API The experimental API is subject to change or removal without regard - for backward compatibility. + for backward compatibility. If + is specified during configuration, the experimental API is + omitted. The allocm, rallocm, diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f567ee57..742daddd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -16,6 +16,7 @@ extern "C" { #include "jemalloc_defs@install_suffix@.h" +#ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) @@ -28,6 +29,7 @@ extern "C" { #define ALLOCM_SUCCESS 0 #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 +#endif /* * The je_ prefix on the following public symbol declarations is an artifact of @@ -53,6 +55,7 @@ int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +#ifdef JEMALLOC_EXPERIMENTAL int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, @@ -61,6 +64,7 @@ int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); int je_nallocm(size_t *rsize, size_t size, int flags); +#endif /* * By default application code must explicitly refer to mangled symbol names, @@ -87,12 +91,14 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define mallctlbymib je_mallctlbymib #define memalign je_memalign #define valloc je_valloc +#ifdef JEMALLOC_EXPERIMENTAL #define allocm je_allocm #define rallocm je_rallocm #define sallocm je_sallocm #define dallocm je_dallocm #define nallocm je_nallocm #endif +#endif /* * The je_* macros can be used as stable alternative names for the public @@ -116,12 +122,14 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_mallctlbymib #undef je_memalign #undef je_valloc +#ifdef JEMALLOC_EXPERIMENTAL #undef je_allocm #undef je_rallocm #undef je_sallocm #undef je_dallocm #undef je_nallocm #endif +#endif #ifdef __cplusplus }; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 444950c8..049cf01a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ /* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL +/* Support the experimental API. */ +#undef JEMALLOC_EXPERIMENTAL + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/jemalloc.c b/src/jemalloc.c index 6e34706d..3c39c857 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1361,6 +1361,15 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); } +/* + * End non-standard functions. + */ +/******************************************************************************/ +/* + * Begin experimental functions. + */ +#ifdef JEMALLOC_EXPERIMENTAL + JEMALLOC_INLINE void * iallocm(size_t usize, size_t alignment, bool zero) { @@ -1601,8 +1610,9 @@ je_nallocm(size_t *rsize, size_t size, int flags) return (ALLOCM_SUCCESS); } +#endif /* - * End non-standard functions. + * End experimental functions. */ /******************************************************************************/ From 1af52cfe64fb6dc11b845a4fa8c6332b319286cc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 2 Mar 2012 17:51:26 -0800 Subject: [PATCH 039/205] Update copyright dates. --- COPYING | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/COPYING b/COPYING index 10ade120..b8a48ff0 100644 --- a/COPYING +++ b/COPYING @@ -1,9 +1,9 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following licenses: -------------------------------------------------------------------------------- -Copyright (C) 2002-2010 Jason Evans . +Copyright (C) 2002-2012 Jason Evans . All rights reserved. -Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -24,7 +24,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -Copyright (C) 2009-2010 Facebook, Inc. +Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, From c71b9946cedec094b542016a15f752ff2fef997b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 4 Mar 2012 10:46:12 -0800 Subject: [PATCH 040/205] Unify license. Update Facebook's software license (with permission from Facebook) to merge with that of the other copyright holders. --- COPYING | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/COPYING b/COPYING index b8a48ff0..e27fc4d6 100644 --- a/COPYING +++ b/COPYING @@ -1,9 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are -subject to the following licenses: +subject to the following license: -------------------------------------------------------------------------------- Copyright (C) 2002-2012 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2012 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -24,28 +25,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- -Copyright (C) 2009-2012 Facebook, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. -* Neither the name of Facebook, Inc. nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- From 6684cacfa8fa9d82762983bce11bb953ce62ba8e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:15:36 -0800 Subject: [PATCH 041/205] Tweak configure.ac to support cross-compiling. Submitted by Andreas Vinsander. --- configure.ac | 78 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/configure.ac b/configure.ac index c61a665b..5b6c6b37 100644 --- a/configure.ac +++ b/configure.ac @@ -14,7 +14,7 @@ if test "x${CFLAGS}" = "x" ; then else CFLAGS="${CFLAGS} $1" fi -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[ ]], [[ return 0; @@ -28,14 +28,12 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( dnl JE_COMPILABLE(label, hcode, mcode, rvar) AC_DEFUN([JE_COMPILABLE], [ -AC_MSG_CHECKING([whether $1 is compilable]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[$2], [$3])], - AC_MSG_RESULT([yes]) - [$4="yes"], - AC_MSG_RESULT([no]) - [$4="no"] -) +AC_CACHE_CHECK([whether $1 is compilable], + [$4], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ @@ -154,15 +152,15 @@ case "${host_cpu}" in ;; i686) JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], - [asm]) - if test "x${asm}" = "xyes" ; then + [je_cv_asm]) + if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; x86_64) JE_COMPILABLE([__asm__ syntax], [], - [[__asm__ volatile("pause"); return 0;]], [asm]) - if test "x${asm}" = "xyes" ; then + [[__asm__ volatile("pause"); return 0;]], [je_cv_asm]) + if test "x${je_cv_asm}" = "xyes" ; then CPU_SPINWAIT='__asm__ volatile("pause")' fi ;; @@ -254,8 +252,8 @@ AC_SUBST([so]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], [], - [attribute]) -if test "x${attribute}" = "xyes" ; then + [je_cv_attribute]) +if test "x${je_cv_attribute}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then JE_CFLAGS_APPEND([-fvisibility=hidden]) @@ -267,8 +265,8 @@ JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ #include ], [ void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [mremap_fixed]) -if test "x${mremap_fixed}" = "xyes" ; then +], [je_cv_mremap_fixed]) +if test "x${je_cv_mremap_fixed}" = "xyes" ; then AC_DEFINE([JEMALLOC_MREMAP_FIXED]) fi @@ -672,12 +670,14 @@ if test "x$enable_xmalloc" = "x1" ; then fi AC_SUBST([enable_xmalloc]) -AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_CACHE_CHECK([STATIC_PAGE_SHIFT], + [je_cv_static_page_shift], + AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include #include #include -]], [[ +]], +[[ long result; FILE *f; @@ -694,10 +694,14 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( return 0; ]])], - [STATIC_PAGE_SHIFT=`cat conftest.out`] - AC_MSG_RESULT([$STATIC_PAGE_SHIFT]) - AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]), - AC_MSG_RESULT([error])) + [je_cv_static_page_shift=`cat conftest.out`], + [je_cv_static_page_shift=undefined])) + +if test "x$je_cv_static_page_shift" != "xundefined"; then + AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift]) +else + AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) +fi dnl ============================================================================ dnl jemalloc configuration. @@ -761,7 +765,7 @@ enable_tls="1" ) if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( +AC_COMPILE_IFELSE([AC_LANG_PROGRAM( [[ __thread int x; ]], [[ @@ -782,9 +786,19 @@ dnl ============================================================================ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. - -AC_CHECK_FUNC([ffsl], [], - [AC_MSG_ERROR([Cannot build without ffsl(3)])]) +JE_COMPILABLE([a program using ffsl], + [ + #include + ], + [ + { + int rv = ffsl(0x08); + } + ], + [je_cv_function_ffsl]) +if test "x${je_cv_function_ffsl}" != "xyes" ; then + AC_MSG_ERROR([Cannot build without ffsl(3)]) +fi dnl ============================================================================ dnl Check for atomic(3) operations as provided on Darwin. @@ -803,8 +817,8 @@ JE_COMPILABLE([Darwin OSAtomic*()], [ volatile int64_t *x64p = &x64; OSAtomicAdd64(1, x64p); } -], [osatomic]) -if test "x${osatomic}" = "xyes" ; then +], [je_cv_osatomic]) +if test "x${je_cv_osatomic}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSATOMIC]) fi @@ -818,8 +832,8 @@ JE_COMPILABLE([Darwin OSSpin*()], [ OSSpinLock lock = 0; OSSpinLockLock(&lock); OSSpinLockUnlock(&lock); -], [osspin]) -if test "x${osspin}" = "xyes" ; then +], [je_cv_osspin]) +if test "x${je_cv_osspin}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSSPIN]) fi From 3492daf1ce6fb85040d28dfd4dcb51cbf6b0da51 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:16:57 -0800 Subject: [PATCH 042/205] Add SH4 and mips architecture support. Submitted by Andreas Vinsander. --- include/jemalloc/internal/atomic.h | 16 ++++++++++++++++ include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ 2 files changed, 19 insertions(+) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 8c685939..1dbb7d6a 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -160,6 +160,22 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } +#elif (defined __SH4__ || defined __mips__) && (__GNUC__ > 4 || \ + (__GNUC__ == 4 && (__GNUC_MINOR__ > 1 || (__GNUC_MINOR__ == 1 && \ + __GNUC_PATCHLEVEL__ > 1)))) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} #else # error "Missing implementation for 32-bit atomic operations" #endif diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 403e4804..3e445f76 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -254,6 +254,9 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); # ifdef __s390x__ # define LG_QUANTUM 4 # endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif # ifdef __tile__ # define LG_QUANTUM 4 # endif From b8c8be7f8abe72f4cb4f315f3078ad864fd6a2d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 12:26:26 -0800 Subject: [PATCH 043/205] Use UINT64_C() rather than LLU for 64-bit constants. --- include/jemalloc/internal/hash.h | 2 +- include/jemalloc/internal/prof.h | 2 +- src/ckh.c | 8 ++++---- src/prof.c | 9 +++++---- test/allocm.c | 18 +++++++++--------- test/posix_memalign.c | 17 +++++++++-------- 6 files changed, 29 insertions(+), 27 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 8a46ce30..d695e77f 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995LLU; + const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e08a50ab..f647f637 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -308,7 +308,7 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, - (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) diff --git a/src/ckh.c b/src/ckh.c index bea6ef8a..39925ced 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -535,7 +535,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU); + h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -546,7 +546,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13LLU); + UINT64_C(0x8432a476666bbc13)); } *hash1 = ret1; @@ -583,7 +583,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, u.i = 0; #endif u.v = key; - h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); + h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -594,7 +594,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, } else { assert(SIZEOF_PTR == 8); ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); + ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); } *hash1 = ret1; diff --git a/src/prof.c b/src/prof.c index c4a2d64d..d78658d6 100644 --- a/src/prof.c +++ b/src/prof.c @@ -993,7 +993,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) filename[i] = v; i++; - if (vseq != 0xffffffffffffffffLLU) { + if (vseq != UINT64_C(0xffffffffffffffff)) { s = u2s(vseq, 10, buf); slen = strlen(s); memcpy(&filename[i], s, slen); @@ -1020,7 +1020,7 @@ prof_fdump(void) if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(filename, opt_prof_leak, false); } @@ -1113,7 +1113,8 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) assert(hash1 != NULL); assert(hash2 != NULL); - h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU); + h = hash(bt->vec, bt->len * sizeof(void *), + UINT64_C(0x94122f335b332aea)); if (minbits <= 32) { /* * Avoid doing multiple hashes, since a single hash provides @@ -1124,7 +1125,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) } else { ret1 = h; ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13LLU); + UINT64_C(0x8432a476666bbc13)); } *hash1 = ret1; diff --git a/test/allocm.c b/test/allocm.c index 151f5749..137e74c3 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -6,8 +6,8 @@ #include "jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 int @@ -67,8 +67,8 @@ main(void) fprintf(stderr, "Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - sz = 0x8000000000000000LLU; + alignment = UINT64_C(0x8000000000000000); + sz = UINT64_C(0x8000000000000000); #else alignment = 0x80000000LU; sz = 0x80000000LU; @@ -91,8 +91,8 @@ main(void) fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - sz = 0x8400000000000001LLU; + alignment = UINT64_C(0x4000000000000000); + sz = UINT64_C(0x8400000000000001); #else alignment = 0x40000000LU; sz = 0x84000001LU; @@ -109,11 +109,11 @@ main(void) sz, ALLOCM_ALIGN(alignment)); } - alignment = 0x10LLU; + alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 - sz = 0xfffffffffffffff0LLU; + sz = UINT64_C(0xfffffffffffffff0); #else - sz = 0xfffffff0LU; + sz = 0xfffffff0LU; #endif nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 789131ce..5abb4201 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,8 +9,8 @@ #include "jemalloc_test.h" #define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) #define NITER 4 int @@ -43,8 +44,8 @@ main(void) } #if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - size = 0x8000000000000000LLU; + alignment = UINT64_C(0x8000000000000000); + size = UINT64_C(0x8000000000000000); #else alignment = 0x80000000LU; size = 0x80000000LU; @@ -57,8 +58,8 @@ main(void) } #if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - size = 0x8400000000000001LLU; + alignment = UINT64_C(0x4000000000000000); + size = UINT64_C(0x8400000000000001); #else alignment = 0x40000000LU; size = 0x84000001LU; @@ -70,9 +71,9 @@ main(void) alignment, size); } - alignment = 0x10LLU; + alignment = 0x10LU; #if LG_SIZEOF_PTR == 3 - size = 0xfffffffffffffff0LLU; + size = UINT64_C(0xfffffffffffffff0); #else size = 0xfffffff0LU; #endif From 4507f34628dfae26e6b0a6faa13e5f9a49600616 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 5 Mar 2012 14:34:37 -0800 Subject: [PATCH 044/205] Remove the lg_tcache_gc_sweep option. Remove the lg_tcache_gc_sweep option, because it is no longer very useful. Prior to the addition of dynamic adjustment of tcache fill count, it was possible for fill/flush overhead to be a problem, but this problem no longer occurs. --- doc/jemalloc.xml.in | 20 +------------------- include/jemalloc/internal/tcache.h | 23 +++++++++++------------ src/ctl.c | 4 ---- src/jemalloc.c | 2 -- src/stats.c | 11 ----------- src/tcache.c | 10 ---------- 6 files changed, 12 insertions(+), 58 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ca13493a..74da409f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -853,29 +853,11 @@ malloc_conf = "xmalloc:true";]]> allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use. See the opt.lg_tcache_gc_sweep - and opt.lg_tcache_max - options for related tuning information. This option is enabled by + option for related tuning information. This option is enabled by default. - - - opt.lg_tcache_gc_sweep - (ssize_t) - r- - [] - - Approximate interval (log base 2) between full - thread-specific cache garbage collection sweeps, counted in terms of - thread-specific cache allocation/deallocation events. Garbage - collection is actually performed incrementally, one size class at a - time, in order to avoid large collection pauses. The default sweep - interval is 8192 (2^13); setting this option to -1 will disable garbage - collection. - - opt.lg_tcache_max diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 77bca8d9..e5f9518e 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -21,12 +21,15 @@ typedef struct tcache_s tcache_t; #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* - * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation - * events between full GC sweeps (-1: disabled). Integer rounding may cause - * the actual number to be slightly higher, since GC is performed - * incrementally. + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. */ -#define LG_TCACHE_GC_SWEEP_DEFAULT 13 +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -69,7 +72,6 @@ struct tcache_s { extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; -extern ssize_t opt_lg_tcache_gc_sweep; extern tcache_bin_info_t *tcache_bin_info; @@ -99,9 +101,6 @@ extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; -/* Number of tcache allocation/deallocation events between incremental GCs. */ -extern unsigned tcache_gc_incr; - void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -166,12 +165,12 @@ JEMALLOC_INLINE void tcache_event(tcache_t *tcache) { - if (tcache_gc_incr == 0) + if (TCACHE_GC_INCR == 0) return; tcache->ev_cnt++; - assert(tcache->ev_cnt <= tcache_gc_incr); - if (tcache->ev_cnt == tcache_gc_incr) { + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (tcache->ev_cnt == TCACHE_GC_INCR) { size_t binind = tcache->next_gc_bin; tcache_bin_t *tbin = &tcache->tbins[binind]; tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; diff --git a/src/ctl.c b/src/ctl.c index 0fabd852..e7639d7f 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -65,7 +65,6 @@ CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) -CTL_PROTO(opt_lg_tcache_gc_sweep) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -187,7 +186,6 @@ static const ctl_node_t opt_node[] = { {NAME("zero"), CTL(opt_zero)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1069,8 +1067,6 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) -CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, - ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ diff --git a/src/jemalloc.c b/src/jemalloc.c index 3c39c857..ad1ee8ef 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -580,8 +580,6 @@ malloc_conf_init(void) } if (config_tcache) { CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, - (sizeof(size_t) << 3) - 1) CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, (sizeof(size_t) << 3) - 1) } diff --git a/src/stats.c b/src/stats.c index f905d048..f9763785 100644 --- a/src/stats.c +++ b/src/stats.c @@ -490,7 +490,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(zero) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) OPT_WRITE_SSIZE_T(lg_tcache_max) OPT_WRITE_BOOL(prof) OPT_WRITE_CHAR_P(prof_prefix) @@ -541,16 +540,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); } - if ((err = je_mallctl("opt.lg_tcache_gc_sweep", &ssv, &ssz, - NULL, 0)) == 0) { - size_t tcache_gc_sweep = (1U << ssv); - bool tcache_enabled; - CTL_GET("opt.tcache", &tcache_enabled, bool); - write_cb(cbopaque, "Thread cache GC sweep interval: "); - write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - u2s(tcache_gc_sweep, 10, s) : "N/A"); - write_cb(cbopaque, "\n"); - } if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); diff --git a/src/tcache.c b/src/tcache.c index fa05728e..478b7f5f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -6,7 +6,6 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ @@ -24,7 +23,6 @@ pthread_key_t tcache_tsd; size_t nhbins; size_t tcache_maxclass; -unsigned tcache_gc_incr; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -419,14 +417,6 @@ tcache_boot(void) stack_nelms += tcache_bin_info[i].ncached_max; } - /* Compute incremental GC event threshold. */ - if (opt_lg_tcache_gc_sweep >= 0) { - tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - NBINS) + (((1U << opt_lg_tcache_gc_sweep) % NBINS == - 0) ? 0 : 1); - } else - tcache_gc_incr = 0; - if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != 0) { malloc_write( From d81e4bdd5c991bd5642c8b859ef1f752b51cd9be Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 6 Mar 2012 14:57:45 -0800 Subject: [PATCH 045/205] Implement malloc_vsnprintf(). Implement malloc_vsnprintf() (a subset of vsnprintf(3)) as well as several other printing functions based on it, so that formatted printing can be relied upon without concern for inducing a dependency on floating point runtime support. Replace malloc_write() calls with malloc_*printf() where doing so simplifies the code. Add name mangling for library-private symbols in the data and BSS sections. Adjust CONF_HANDLE_*() macros in malloc_conf_init() to expose all opt_* variable use to cpp so that proper mangling occurs. --- Makefile.in | 2 +- configure.ac | 12 + include/jemalloc/internal/ctl.h | 12 +- include/jemalloc/internal/hash.h | 16 +- .../jemalloc/internal/jemalloc_internal.h.in | 72 +-- include/jemalloc/internal/private_namespace.h | 28 +- include/jemalloc/internal/stats.h | 7 - include/jemalloc/internal/util.h | 130 +++++ include/jemalloc/jemalloc_defs.h.in | 3 + src/chunk_mmap.c | 9 +- src/huge.c | 5 +- src/jemalloc.c | 234 +++----- src/prof.c | 221 +++---- src/stats.c | 222 ++------ src/util.c | 539 ++++++++++++++++++ test/allocm.c | 14 +- test/posix_memalign.c | 2 +- 17 files changed, 958 insertions(+), 570 deletions(-) create mode 100644 include/jemalloc/internal/util.h create mode 100644 src/util.c diff --git a/Makefile.in b/Makefile.in index 1f1ffd33..62864556 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,7 +50,7 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c + @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 5b6c6b37..5a11588f 100644 --- a/configure.ac +++ b/configure.ac @@ -144,6 +144,18 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) +AC_CHECK_SIZEOF([intmax_t]) +if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then + LG_SIZEOF_INTMAX_T=4 +elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then + LG_SIZEOF_INTMAX_T=3 +elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then + LG_SIZEOF_INTMAX_T=2 +else + AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T]) + AC_CANONICAL_HOST dnl CPU-specific settings. CPU_SPINWAIT="" diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 8f72f7fa..a48d09fe 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -76,19 +76,17 @@ bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write(": Failure in xmallctl(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + malloc_printf( \ + ": Failure in xmallctl(\"%s\", ...)\n", \ + name); \ abort(); \ } \ } while (0) #define xmallctlnametomib(name, mibp, miblenp) do { \ if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ - malloc_write( \ - ": Failure in xmallctlnametomib(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + malloc_printf(": Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ abort(); \ } \ } while (0) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index d695e77f..2f501f5d 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -48,14 +48,14 @@ hash(const void *key, size_t len, uint64_t seed) data2 = (const unsigned char *)data; switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; } h ^= h >> r; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3e445f76..3774bb5d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -149,39 +149,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" -extern void (*je_malloc_message)(void *wcbopaque, const char *s); - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -# ifdef JEMALLOC_DEBUG -# define assert(e) do { \ - if (!(e)) { \ - char line_buf[UMAX2S_BUFSIZE]; \ - malloc_write(": "); \ - malloc_write(__FILE__); \ - malloc_write(":"); \ - malloc_write(u2s(__LINE__, 10, line_buf)); \ - malloc_write(": Failed assertion: "); \ - malloc_write("\""); \ - malloc_write(#e); \ - malloc_write("\"\n"); \ - abort(); \ - } \ -} while (0) -# else -# define assert(e) -# endif -#endif - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if ((c) == false) \ - assert(false); \ -} while (0) - /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), * but there are circular dependencies that cannot be broken without @@ -215,9 +182,6 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); # define JEMALLOC_INLINE static inline #endif -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - /* Smallest size class to support. */ #define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) @@ -318,6 +282,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -344,6 +309,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -443,10 +409,10 @@ thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); -int buferror(int errnum, char *buf, size_t buflen); void jemalloc_prefork(void); void jemalloc_postfork(void); +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -473,6 +439,7 @@ void jemalloc_postfork(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" @@ -487,33 +454,13 @@ void jemalloc_postfork(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -size_t pow2_ceil(size_t x); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -void malloc_write(const char *s); arena_t *choose_arena(void); thread_allocated_t *thread_allocated_get(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} - /* * Compute usable size that would result from allocating an object with the * specified size. @@ -619,17 +566,6 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } } -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - je_malloc_message(NULL, s); -} - /* * Choose an arena based on a per-thread value (fast-path code, calls slow-path * code if necessary). diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index db2192e6..89d3b5ca 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -23,6 +23,7 @@ #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) #define arenas_extend JEMALLOC_N(arenas_extend) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define arenas_tls JEMALLOC_N(arenas_tls) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) @@ -137,8 +138,30 @@ #define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_tprintf JEMALLOC_N(malloc_tprintf) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) +#define malloc_vtprintf JEMALLOC_N(malloc_vtprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) +#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) @@ -156,6 +179,7 @@ #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -166,6 +190,7 @@ #define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) #define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) #define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) @@ -185,6 +210,7 @@ #define tcache_event JEMALLOC_N(tcache_event) #define tcache_get JEMALLOC_N(tcache_get) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_tls JEMALLOC_N(tcache_tls) #define thread_allocated_get JEMALLOC_N(thread_allocated_get) #define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define u2s JEMALLOC_N(u2s) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index 4af23c33..27f68e36 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -1,8 +1,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#define UMAX2S_BUFSIZE 65 - typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; @@ -135,11 +133,6 @@ extern bool opt_stats_print; extern size_t stats_cactive; -char *u2s(uint64_t x, unsigned base, char *s); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); void stats_print(void (*write)(void *, const char *), void *cbopaque, const char *opts); diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h new file mode 100644 index 00000000..a268109c --- /dev/null +++ b/include/jemalloc/internal/util.h @@ -0,0 +1,130 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#define assert_not_implemented(e) do { \ + if (config_debug && !(e)) \ + not_implemented(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern void (*je_malloc_message)(void *wcbopaque, const char *s); + +int buferror(int errnum, char *buf, size_t buflen); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +int malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +int malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_ATTR(format(printf, 3, 4)); +/* + * malloc_[v]tprintf() prints to a thread-local string buffer, so the result is + * overwritten by the next call to malloc_[v]{,c,t}printf(). + */ +const char * malloc_vtprintf(const char *format, va_list ap); +const char * malloc_tprintf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +void malloc_write(const char *s); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + je_malloc_message(NULL, s); +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 049cf01a..6b2b0d01 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -166,3 +166,6 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#undef LG_SIZEOF_INTMAX_T diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 164e86e7..c7409284 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -61,9 +61,8 @@ pages_map(void *addr, size_t size, bool noreserve) char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in munmap(): %s\n", buf); if (opt_abort) abort(); } diff --git a/src/huge.c b/src/huge.c index f2fba869..2d51c529 100644 --- a/src/huge.c +++ b/src/huge.c @@ -239,9 +239,8 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in mremap(): "); - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": Error in mremap(): %s\n", + buf); if (opt_abort) abort(); memcpy(ret, ptr, copysize); diff --git a/src/jemalloc.c b/src/jemalloc.c index ad1ee8ef..e148ae0e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -55,7 +55,6 @@ size_t opt_narenas = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); @@ -71,19 +70,6 @@ static bool malloc_init_hard(void); static int imemalign(void **memptr, size_t alignment, size_t size, bool enforce_min_alignment); -/******************************************************************************/ -/* malloc_message() setup. */ - -JEMALLOC_CATTR(visibility("hidden"), static) -void -wrtmessage(void *cbopaque, const char *s) -{ - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); -} - -void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; - /******************************************************************************/ /* * Begin miscellaneous support functions. @@ -178,25 +164,6 @@ choose_arena_hard(void) return (ret); } -/* - * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so - * provide a wrapper. - */ -int -buferror(int errnum, char *buf, size_t buflen) -{ -#ifdef _GNU_SOURCE - char *b = strerror_r(errno, buf, buflen); - if (b != buf) { - strncpy(buf, b, buflen); - buf[buflen-1] = '\0'; - } - return (0); -#else - return (strerror_r(errno, buf, buflen)); -#endif -} - static void stats_print_atexit(void) { @@ -324,68 +291,64 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, for (accept = false; accept == false;) { switch (*opts) { - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '_': - opts++; - break; - case ':': - opts++; - *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; - *v_p = opts; - accept = true; - break; - case '\0': - if (opts != *opts_p) { - malloc_write(": Conf string " - "ends with key\n"); - } - return (true); - default: - malloc_write(": Malformed conf " - "string\n"); - return (true); + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write(": Conf string ends " + "with key\n"); + } + return (true); + default: + malloc_write(": Malformed conf string\n"); + return (true); } } for (accept = false; accept == false;) { switch (*opts) { - case ',': - opts++; - /* - * Look ahead one character here, because the - * next time this function is called, it will - * assume that end of input has been cleanly - * reached if no input remains, but we have - * optimistically already consumed the comma if - * one exists. - */ - if (*opts == '\0') { - malloc_write(": Conf string " - "ends with comma\n"); - } - *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; - accept = true; - break; - case '\0': - *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; - accept = true; - break; - default: - opts++; - break; + case ',': + opts++; + /* + * Look ahead one character here, because the next time + * this function is called, it will assume that end of + * input has been cleanly reached if no input remains, + * but we have optimistically already consumed the + * comma if one exists. + */ + if (*opts == '\0') { + malloc_write(": Conf string ends " + "with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; } } @@ -397,17 +360,9 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen) { - char buf[PATH_MAX + 1]; - malloc_write(": "); - malloc_write(msg); - malloc_write(": "); - memcpy(buf, k, klen); - memcpy(&buf[klen], ":", 1); - memcpy(&buf[klen+1], v, vlen); - buf[klen+1+vlen] = '\0'; - malloc_write(buf); - malloc_write("\n"); + malloc_printf(": %s: %.*s:%.*s\n", msg, (int)klen, k, + (int)vlen, v); } static void @@ -458,8 +413,7 @@ malloc_conf_init(void) opts = buf; } break; - } - case 2: { + } case 2: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" @@ -480,8 +434,7 @@ malloc_conf_init(void) opts = buf; } break; - } - default: + } default: /* NOTREACHED */ assert(false); buf[0] = '\0'; @@ -490,15 +443,15 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL(n) \ +#define CONF_HANDLE_BOOL(o, n) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ vlen == sizeof("true")-1) \ - opt_##n = true; \ + o = true; \ else if (strncmp("false", v, vlen) == \ 0 && vlen == sizeof("false")-1) \ - opt_##n = false; \ + o = false; \ else { \ malloc_conf_error( \ "Invalid conf value", \ @@ -506,7 +459,7 @@ malloc_conf_init(void) } \ continue; \ } -#define CONF_HANDLE_SIZE_T(n, min, max) \ +#define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ unsigned long ul; \ @@ -524,10 +477,10 @@ malloc_conf_init(void) "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - opt_##n = ul; \ + o = ul; \ continue; \ } -#define CONF_HANDLE_SSIZE_T(n, min, max) \ +#define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ long l; \ @@ -546,54 +499,58 @@ malloc_conf_init(void) "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - opt_##n = l; \ + o = l; \ continue; \ } -#define CONF_HANDLE_CHAR_P(n, d) \ +#define CONF_HANDLE_CHAR_P(o, n, d) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ size_t cpylen = (vlen <= \ - sizeof(opt_##n)-1) ? vlen : \ - sizeof(opt_##n)-1; \ - strncpy(opt_##n, v, cpylen); \ - opt_##n[cpylen] = '\0'; \ + sizeof(o)-1) ? vlen : \ + sizeof(o)-1; \ + strncpy(o, v, cpylen); \ + o[cpylen] = '\0'; \ continue; \ } - CONF_HANDLE_BOOL(abort) + CONF_HANDLE_BOOL(opt_abort, abort) /* * Chunks always require at least one * header page, * plus one data page. */ - CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, PAGE_SHIFT+1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) - CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(stats_print) + CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, + -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_stats_print, stats_print) if (config_fill) { - CONF_HANDLE_BOOL(junk) - CONF_HANDLE_BOOL(zero) + CONF_HANDLE_BOOL(opt_junk, junk) + CONF_HANDLE_BOOL(opt_zero, zero) } if (config_xmalloc) { - CONF_HANDLE_BOOL(xmalloc) + CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } if (config_tcache) { - CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + CONF_HANDLE_BOOL(opt_tcache, tcache) + CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, + lg_tcache_max, -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(prof) - CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_BOOL(prof_active) - CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + CONF_HANDLE_BOOL(opt_prof, prof) + CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix, + "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, prof_active) + CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + lg_prof_sample, 0, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + CONF_HANDLE_BOOL(opt_prof_accum, prof_accum) + CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, + lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_gdump) - CONF_HANDLE_BOOL(prof_leak) + CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) + CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); @@ -773,12 +730,9 @@ malloc_init_hard(void) * machinery will fail to allocate memory at far lower limits. */ if (narenas > chunksize / sizeof(arena_t *)) { - char buf[UMAX2S_BUFSIZE]; - narenas = chunksize / sizeof(arena_t *); - malloc_write(": Reducing narenas to limit ("); - malloc_write(u2s(narenas, 10, buf)); - malloc_write(")\n"); + malloc_printf(": Reducing narenas to limit (%d)\n", + narenas); } /* Allocate and initialize arenas. */ diff --git a/src/prof.c b/src/prof.c index d78658d6..b57c5b8a 100644 --- a/src/prof.c +++ b/src/prof.c @@ -74,16 +74,18 @@ static _Unwind_Reason_Code prof_unwind_callback( struct _Unwind_Context *context, void *arg); #endif static bool prof_flush(bool propagate_err); -static bool prof_write(const char *s, bool propagate_err); +static bool prof_write(bool propagate_err, const char *s); +static bool prof_printf(bool propagate_err, const char *format, ...) + JEMALLOC_ATTR(format(printf, 2, 3)); static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx); static void prof_ctx_destroy(prof_ctx_t *ctx); static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, - bool propagate_err); +static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, + prof_bt_t *bt); static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(const char *filename, bool leakcheck, - bool propagate_err); +static bool prof_dump(bool propagate_err, const char *filename, + bool leakcheck); static void prof_dump_filename(char *filename, char v, int64_t vseq); static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, @@ -587,7 +589,7 @@ prof_flush(bool propagate_err) } static bool -prof_write(const char *s, bool propagate_err) +prof_write(bool propagate_err, const char *s) { unsigned i, slen, n; @@ -616,6 +618,20 @@ prof_write(const char *s, bool propagate_err) return (false); } +JEMALLOC_ATTR(format(printf, 2, 3)) +static bool +prof_printf(bool propagate_err, const char *format, ...) +{ + bool ret; + va_list ap; + + va_start(ap, format); + ret = prof_write(propagate_err, malloc_vtprintf(format, ap)); + va_end(ap); + + return (ret); +} + static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) { @@ -744,9 +760,8 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) } static bool -prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) { - char buf[UMAX2S_BUFSIZE]; unsigned i; cassert(config_prof); @@ -758,27 +773,19 @@ prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) return (false); } - if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), - propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), - propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), - propagate_err) - || prof_write("] @", propagate_err)) + if (prof_printf(propagate_err, "%"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @", + ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) return (true); for (i = 0; i < bt->len; i++) { - if (prof_write(" 0x", propagate_err) - || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), - propagate_err)) + if (prof_printf(propagate_err, " %#"PRIx64, + (uintptr_t)bt->vec[i])) return (true); } - if (prof_write("\n", propagate_err)) + if (prof_write(propagate_err, "\n")) return (true); return (false); @@ -788,39 +795,15 @@ static bool prof_dump_maps(bool propagate_err) { int mfd; - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; - /* /proc//maps\0 */ - char mpath[6 + UMAX2S_BUFSIZE - + 5 + 1]; cassert(config_prof); - i = 0; - - s = "/proc/"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = "/maps"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - mpath[i] = '\0'; - - mfd = open(mpath, O_RDONLY); + mfd = open(malloc_tprintf("/proc/%d/maps", (int)getpid()), + O_RDONLY); if (mfd != -1) { ssize_t nread; - if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) && + if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && propagate_err) return (true); nread = 0; @@ -842,7 +825,7 @@ prof_dump_maps(bool propagate_err) } static bool -prof_dump(const char *filename, bool leakcheck, bool propagate_err) +prof_dump(bool propagate_err, const char *filename, bool leakcheck) { prof_cnt_t cnt_all; size_t tabind; @@ -854,7 +837,6 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_ctx_t *p; void *v; } ctx; - char buf[UMAX2S_BUFSIZE]; size_t leak_nctx; cassert(config_prof); @@ -863,9 +845,9 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { - malloc_write(": creat(\""); - malloc_write(filename); - malloc_write("\", 0644) failed\n"); + malloc_printf( + ": creat(\"%s\"), 0644) failed\n", + filename); if (opt_abort) abort(); } @@ -879,31 +861,27 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); /* Dump profile header. */ - if (prof_write("heap profile: ", propagate_err) - || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) - goto ERROR; - if (opt_lg_prof_sample == 0) { - if (prof_write("] @ heapprofile\n", propagate_err)) + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes)) goto ERROR; } else { - if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, - buf), propagate_err) - || prof_write("\n", propagate_err)) + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) goto ERROR; } /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { - if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) + if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) goto ERROR; } @@ -917,17 +895,14 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err) prof_leave(); if (leakcheck && cnt_all.curbytes != 0) { - malloc_write(": Leak summary: "); - malloc_write(u2s(cnt_all.curbytes, 10, buf)); - malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(u2s(cnt_all.curobjs, 10, buf)); - malloc_write((cnt_all.curobjs != 1) ? " objects, " : - " object, "); - malloc_write(u2s(leak_nctx, 10, buf)); - malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); - malloc_write(": Run pprof on \""); - malloc_write(filename); - malloc_write("\" for leak detail\n"); + malloc_printf(": Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", + cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + ": Run pprof on \"%s\" for leak detail\n", + filename); } return (false); @@ -936,76 +911,24 @@ ERROR: return (true); } -#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \ - + 1 \ - + UMAX2S_BUFSIZE \ - + 2 \ - + UMAX2S_BUFSIZE \ - + 5 + 1) +#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) static void prof_dump_filename(char *filename, char v, int64_t vseq) { - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; cassert(config_prof); - /* - * Construct a filename of the form: - * - * ...v.heap\0 - */ - - i = 0; - - s = opt_prof_prefix; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(prof_dump_seq, 10, buf); - prof_dump_seq++; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = v; - i++; - if (vseq != UINT64_C(0xffffffffffffffff)) { - s = u2s(vseq, 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; + /* "...v.heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c%"PRId64".heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + } else { + /* "....heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c.heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } - - s = ".heap"; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = '\0'; } static void @@ -1022,14 +945,14 @@ prof_fdump(void) malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); + prof_dump(false, filename, opt_prof_leak); } } void prof_idump(void) { - char filename[DUMP_FILENAME_BUFSIZE]; + char filename[PATH_MAX + 1]; cassert(config_prof); @@ -1048,7 +971,7 @@ prof_idump(void) prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + prof_dump(false, filename, false); } } @@ -1072,7 +995,7 @@ prof_mdump(const char *filename) malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } - return (prof_dump(filename, false, true)); + return (prof_dump(true, filename, false)); } void @@ -1097,7 +1020,7 @@ prof_gdump(void) prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); + prof_dump(false, filename, false); } } diff --git a/src/stats.c b/src/stats.c index f9763785..38c8bb3c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -44,8 +44,6 @@ size_t stats_cactive = 0; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void malloc_vcprintf(void (*write_cb)(void *, const char *), - void *cbopaque, const char *format, va_list ap); static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), @@ -55,97 +53,6 @@ static void stats_arena_print(void (*write_cb)(void *, const char *), /******************************************************************************/ -/* XXX Refactor by adding malloc_vsnprintf(). */ -/* - * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. u2s() provides minimal integer - * printing functionality, so that malloc_printf() use can be limited to - * JEMALLOC_STATS code. - */ -char * -u2s(uint64_t x, unsigned base, char *s) -{ - unsigned i; - - i = UMAX2S_BUFSIZE - 1; - s[i] = '\0'; - switch (base) { - case 10: - do { - i--; - s[i] = "0123456789"[x % (uint64_t)10]; - x /= (uint64_t)10; - } while (x > 0); - break; - case 16: - do { - i--; - s[i] = "0123456789abcdef"[x & 0xf]; - x >>= 4; - } while (x > 0); - break; - default: - do { - i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % - (uint64_t)base]; - x /= (uint64_t)base; - } while (x > 0); - } - - return (&s[i]); -} - -static void -malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap) -{ - char buf[4096]; - - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = je_malloc_message; - cbopaque = NULL; - } - - vsnprintf(buf, sizeof(buf), format, ap); - write_cb(cbopaque, buf); -} - -/* - * Print to a callback function in such a way as to (hopefully) avoid memory - * allocation. - */ -JEMALLOC_ATTR(format(printf, 3, 4)) -void -malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(write_cb, cbopaque, format, ap); - va_end(ap); -} - -/* - * Print to stderr in such a way as to (hopefully) avoid memory allocation. - */ -JEMALLOC_ATTR(format(printf, 1, 2)) -void -malloc_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); -} - static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) @@ -360,7 +267,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, int err; uint64_t epoch; size_t u64sz; - char s[UMAX2S_BUFSIZE]; bool general = true; bool merged = true; bool unmerged = true; @@ -403,22 +309,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, for (i = 0; opts[i] != '\0'; i++) { switch (opts[i]) { - case 'g': - general = false; - break; - case 'm': - merged = false; - break; - case 'a': - unmerged = false; - break; - case 'b': - bins = false; - break; - case 'l': - large = false; - break; - default:; + case 'g': + general = false; + break; + case 'm': + merged = false; + break; + case 'a': + unmerged = false; + break; + case 'b': + bins = false; + break; + case 'l': + large = false; + break; + default:; } } } @@ -438,46 +344,34 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); - write_cb(cbopaque, "Version: "); - write_cb(cbopaque, cpv); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); CTL_GET("config.debug", &bv, bool); - write_cb(cbopaque, "Assertions "); - write_cb(cbopaque, bv ? "enabled" : "disabled"); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); #define OPT_WRITE_BOOL(n) \ if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, bv ? "true" : "false"); \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ } #define OPT_WRITE_SIZE_T(n) \ if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(sv, 10, s)); \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ } #define OPT_WRITE_SSIZE_T(n) \ if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ == 0) { \ - if (ssv >= 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(ssv, 10, s)); \ - } else { \ - write_cb(cbopaque, " opt."#n": -"); \ - write_cb(cbopaque, u2s(-ssv, 10, s)); \ - } \ - write_cb(cbopaque, "\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ } #define OPT_WRITE_CHAR_P(n) \ if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ == 0) { \ - write_cb(cbopaque, " opt."#n": \""); \ - write_cb(cbopaque, cpv); \ - write_cb(cbopaque, "\"\n"); \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ } write_cb(cbopaque, "Run-time option settings:\n"); @@ -505,68 +399,52 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, #undef OPT_WRITE_SSIZE_T #undef OPT_WRITE_CHAR_P - write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, u2s(ncpus, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); CTL_GET("arenas.narenas", &uv, unsigned); - write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, u2s(uv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); - write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, u2s(sizeof(void *), 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", + sizeof(void *)); CTL_GET("arenas.quantum", &sv, size_t); - write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { - write_cb(cbopaque, - "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, ":1\n"); + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: %u:1\n", + (1U << ssv)); } else { write_cb(cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) == 0) { - write_cb(cbopaque, - "Maximum thread-cached size class: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); } if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && bv) { CTL_GET("opt.lg_prof_sample", &sv, size_t); - write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); + malloc_cprintf(write_cb, cbopaque, + "Average profile sample interval: %"PRIu64 + " (2^%zu)\n", (((uint64_t)1U) << sv), sv); CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - write_cb(cbopaque, "Average profile dump interval: "); if (ssv >= 0) { - write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), - 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: %"PRIu64 + " (2^%zd)\n", + (((uint64_t)1U) << ssv), ssv); + } else { + write_cb(cbopaque, + "Average profile dump interval: N/A\n"); + } } - CTL_GET("arenas.chunksize", &sv, size_t); - write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, u2s(sv, 10, s)); CTL_GET("opt.lg_chunk", &sv, size_t); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); + malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", + (ZU(1) << sv), sv); } if (config_stats) { diff --git a/src/util.c b/src/util.c new file mode 100644 index 00000000..7c4c0d44 --- /dev/null +++ b/src/util.c @@ -0,0 +1,539 @@ +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_write(": Failed assertion\n"); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + if (config_debug) { \ + malloc_write(": Unreachable code reached\n"); \ + abort(); \ + } \ +} while (0) + +#define not_implemented() do { \ + if (config_debug) { \ + malloc_write(": Not implemented\n"); \ + abort(); \ + } \ +} while (0) + +#define JEMALLOC_UTIL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1) +static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s, + size_t *slen_p); +#define D2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p); +#define O2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p); +#define X2S_BUFSIZE (2 + U2S_BUFSIZE) +static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, + size_t *slen_p); + +/******************************************************************************/ + +/* malloc_message() setup. */ +JEMALLOC_CATTR(visibility("hidden"), static) +void +wrtmessage(void *cbopaque, const char *s) +{ + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +} + +void (*je_malloc_message)(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); +#endif +} + +static char * +u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) +{ + unsigned i; + + i = U2S_BUFSIZE - 1; + s[i] = '\0'; + switch (base) { + case 10: + do { + i--; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; + } while (x > 0); + break; + case 16: { + const char *digits = (uppercase) + ? "0123456789ABCDEF" + : "0123456789abcdef"; + + do { + i--; + s[i] = digits[x & 0xf]; + x >>= 4; + } while (x > 0); + break; + } default: { + const char *digits = (uppercase) + ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + : "0123456789abcdefghijklmnopqrstuvwxyz"; + + assert(base >= 2 && base <= 36); + do { + i--; + s[i] = digits[x % (uint64_t)base]; + x /= (uint64_t)base; + } while (x > 0); + }} + + *slen_p = U2S_BUFSIZE - 1 - i; + return (&s[i]); +} + +static char * +d2s(intmax_t x, char sign, char *s, size_t *slen_p) +{ + bool neg; + + if ((neg = (x < 0))) + x = -x; + s = u2s(x, 10, false, s, slen_p); + if (neg) + sign = '-'; + switch (sign) { + case '-': + if (neg == false) + break; + /* Fall through. */ + case ' ': + case '+': + s--; + (*slen_p)++; + *s = sign; + break; + default: not_reached(); + } + return (s); +} + +static char * +o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) +{ + + s = u2s(x, 8, false, s, slen_p); + if (alt_form && *s != '0') { + s--; + (*slen_p)++; + *s = '0'; + } + return (s); +} + +static char * +x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) +{ + + s = u2s(x, 16, uppercase, s, slen_p); + if (alt_form) { + s -= 2; + (*slen_p) += 2; + memcpy(s, uppercase ? "0X" : "0x", 2); + } + return (s); +} + +int +malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int ret; + size_t i; + const char *f; + va_list tap; + +#define APPEND_C(c) do { \ + if (i < size) \ + str[i] = (c); \ + i++; \ +} while (0) +#define APPEND_S(s, slen) do { \ + if (i < size) { \ + size_t cpylen = (slen <= size - i) ? slen : size - i; \ + memcpy(&str[i], s, cpylen); \ + } \ + i += slen; \ +} while (0) +#define APPEND_PADDED_S(s, slen, width, left_justify) do { \ + /* Left padding. */ \ + size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ + (size_t)width - slen : 0); \ + if (left_justify == false && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ + /* Value. */ \ + APPEND_S(s, slen); \ + /* Right padding. */ \ + if (left_justify && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ +} while (0) +#define GET_ARG_NUMERIC(val, len) do { \ + switch (len) { \ + case '?': \ + val = va_arg(ap, int); \ + break; \ + case 'l': \ + val = va_arg(ap, long); \ + break; \ + case 'q': \ + val = va_arg(ap, long long); \ + break; \ + case 'j': \ + val = va_arg(ap, intmax_t); \ + break; \ + case 't': \ + val = va_arg(ap, ptrdiff_t); \ + break; \ + case 'z': \ + val = va_arg(ap, size_t); \ + break; \ + default: not_reached(); \ + } \ +} while (0) + + if (config_debug) + va_copy(tap, ap); + + i = 0; + f = format; + while (true) { + switch (*f) { + case '\0': goto OUT; + case '%': { + bool alt_form = false; + bool zero_pad = false; + bool left_justify = false; + bool plus_space = false; + bool plus_plus = false; + int prec = -1; + int width = -1; + char len = '?'; + + f++; + if (*f == '%') { + /* %% */ + APPEND_C(*f); + break; + } + /* Flags. */ + while (true) { + switch (*f) { + case '#': + assert(alt_form == false); + alt_form = true; + break; + case '0': + assert(zero_pad == false); + zero_pad = true; + break; + case '-': + assert(left_justify == false); + left_justify = true; + break; + case ' ': + assert(plus_space == false); + plus_space = true; + break; + case '+': + assert(plus_plus == false); + plus_plus = true; + break; + default: goto WIDTH; + } + f++; + } + /* Width. */ + WIDTH: + switch (*f) { + case '*': + width = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + unsigned long uwidth; + errno = 0; + uwidth = strtoul(f, (char **)&f, 10); + assert(uwidth != ULONG_MAX || errno != ERANGE); + width = (int)uwidth; + if (*f == '.') { + f++; + goto PRECISION; + } else + goto LENGTH; + break; + } case '.': + f++; + goto PRECISION; + default: goto LENGTH; + } + /* Precision. */ + PRECISION: + switch (*f) { + case '*': + prec = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + unsigned long uprec; + errno = 0; + uprec = strtoul(f, (char **)&f, 10); + assert(uprec != ULONG_MAX || errno != ERANGE); + prec = (int)uprec; + break; + } + default: break; + } + /* Length. */ + LENGTH: + switch (*f) { + case 'l': + f++; + if (*f == 'l') { + len = 'q'; + f++; + } else + len = 'l'; + break; + case 'j': + len = 'j'; + f++; + break; + case 't': + len = 't'; + f++; + break; + case 'z': + len = 'z'; + f++; + break; + default: break; + } + /* Conversion specifier. */ + switch (*f) { + char *s; + size_t slen; + case 'd': case 'i': { + intmax_t val; + char buf[D2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = d2s(val, (plus_plus ? '+' : (plus_space ? + ' ' : '-')), buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'o': { + uintmax_t val; + char buf[O2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = o2s(val, alt_form, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'u': { + uintmax_t val; + char buf[U2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = u2s(val, 10, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'x': case 'X': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = x2s(val, alt_form, *f == 'X', buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'c': { + unsigned char val; + char buf[2]; + + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + val = va_arg(ap, int); + buf[0] = val; + buf[1] = '\0'; + APPEND_PADDED_S(buf, 1, width, left_justify); + f++; + break; + } case 's': + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + s = va_arg(ap, char *); + slen = (prec == -1) ? strlen(s) : prec; + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + case 'p': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = x2s(val, true, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } + default: not_implemented(); + } + break; + } default: { + APPEND_C(*f); + f++; + break; + }} + } + OUT: + if (i < size) + str[i] = '\0'; + else + str[size - 1] = '\0'; + ret = i; + + if (config_debug) { + char buf[ret + 2]; + int tret; + + /* + * Verify that the resulting string matches what vsnprintf() + * would have created. + */ + tret = vsnprintf(buf, sizeof(buf), format, tap); + assert(tret == ret); + assert(memcmp(str, buf, ret + 1) == 0); + } + } + +#undef APPEND_C +#undef APPEND_S +#undef APPEND_PADDED_S +#undef GET_ARG_NUMERIC + return (ret); +} + +JEMALLOC_ATTR(format(printf, 3, 4)) +int +malloc_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vsnprintf(str, size, format, ap); + va_end(ap); + + return (ret); +} + +const char * +malloc_vtprintf(const char *format, va_list ap) +{ + /* buf must be large enough for all possible uses within jemalloc. */ + static __thread char buf[4096]; + + malloc_vsnprintf(buf, sizeof(buf), format, ap); + + return (buf); +} + +JEMALLOC_ATTR(format(printf, 1, 2)) +const char * +malloc_tprintf(const char *format, ...) +{ + const char *ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vtprintf(format, ap); + va_end(ap); + + return (ret); +} + +void +malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap) +{ + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = je_malloc_message; + cbopaque = NULL; + } + + write_cb(cbopaque, malloc_vtprintf(format, ap)); +} + +/* + * Print to a callback function in such a way as to (hopefully) avoid memory + * allocation. + */ +JEMALLOC_ATTR(format(printf, 3, 4)) +void +malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(write_cb, cbopaque, format, ap); + va_end(ap); +} + +/* Print to stderr in such a way as to avoid memory allocation. */ +JEMALLOC_ATTR(format(printf, 1, 2)) +void +malloc_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); +} diff --git a/test/allocm.c b/test/allocm.c index 137e74c3..3aa0fd23 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -77,14 +77,14 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) @@ -105,7 +105,7 @@ main(void) r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } @@ -119,14 +119,14 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for nallocm(&nsz, %zu, 0x%x)\n", + "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", + "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) @@ -150,7 +150,7 @@ main(void) if (r != ALLOCM_SUCCESS) { fprintf(stderr, "nallocm() error for size %zu" - " (0x%zx): %d\n", + " (%#zx): %d\n", sz, sz, r); exit(1); } @@ -160,7 +160,7 @@ main(void) if (r != ALLOCM_SUCCESS) { fprintf(stderr, "allocm() error for size %zu" - " (0x%zx): %d\n", + " (%#zx): %d\n", sz, sz, r); exit(1); } diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 5abb4201..0ea35c89 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -100,7 +100,7 @@ main(void) alignment, size); if (err) { fprintf(stderr, - "Error for size %zu (0x%zx): %s\n", + "Error for size %zu (%#zx): %s\n", size, size, strerror(err)); exit(1); } From 7cca6085754736c7e4e51d27e2f0234e84806628 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 11:31:54 -0700 Subject: [PATCH 046/205] Remove extra '}'. --- src/util.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/util.c b/src/util.c index 7c4c0d44..7d658aaa 100644 --- a/src/util.c +++ b/src/util.c @@ -443,7 +443,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) tret = vsnprintf(buf, sizeof(buf), format, tap); assert(tret == ret); assert(memcmp(str, buf, ret + 1) == 0); - } } #undef APPEND_C From 125b93e43fe764e46c8a89fc2f3957a4e02c92e1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 11:33:59 -0700 Subject: [PATCH 047/205] Remove bashism. Submitted by Mike Hommey. --- include/jemalloc/internal/size_classes.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index d8306a58..79b4ba23 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -13,7 +13,7 @@ lg_tmin=3 lg_pmin=12 lg_pmax=16 -function pow2() { +pow2() { e=$1 pow2_result=1 while [ ${e} -gt 0 ] ; do From 2bb6c7a632fc4c0afe2532ea4044d337d9b288ae Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 13:38:00 -0700 Subject: [PATCH 048/205] s/PRIx64/PRIxPTR/ for uintptr_t printf() argument. --- src/prof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prof.c b/src/prof.c index b57c5b8a..2ca66c73 100644 --- a/src/prof.c +++ b/src/prof.c @@ -780,7 +780,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) return (true); for (i = 0; i < bt->len; i++) { - if (prof_printf(propagate_err, " %#"PRIx64, + if (prof_printf(propagate_err, " %#"PRIxPTR, (uintptr_t)bt->vec[i])) return (true); } From 08fc3b2d5173512a2c1fdbe11cf00c8c70bad503 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 15:07:53 -0700 Subject: [PATCH 049/205] Fix --with-mangling/--with-jemalloc-prefix interaction. Fix --with-mangling to remove mangled symbols from the set of functions to apply a prefix to. Prior to this change, the interaction was correct with autoconf 2.59, but incorrect with autoconf 2.65. --- configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5a11588f..5cf2855f 100644 --- a/configure.ac +++ b/configure.ac @@ -346,9 +346,12 @@ AC_ARG_WITH([mangling], [AS_HELP_STRING([--with-mangling=], [Mangle symbols in ])], [mangling_map="$with_mangling"], [mangling_map=""]) for nm in `echo ${mangling_map} |tr ',' ' '` ; do - n="je_`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`" + n="je_${k}" m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'` AC_DEFINE_UNQUOTED([${n}], [${m}]) + dnl Remove key from public_syms so that it isn't redefined later. + public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '` done dnl Do not prefix public APIs by default. From 025d86118673f153b6ccd68e49054e58493b57f4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 12 Mar 2012 15:57:24 -0700 Subject: [PATCH 050/205] Update config.guess and config.sub. --- config.guess | 1034 +++++++++++++++++++++++++++----------------------- config.sub | 432 ++++++++++++++++----- 2 files changed, 882 insertions(+), 584 deletions(-) diff --git a/config.guess b/config.guess index 0773d0f6..d622a44e 100755 --- a/config.guess +++ b/config.guess @@ -1,9 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2004-03-03' +timestamp='2012-02-10' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -16,24 +17,24 @@ timestamp='2004-03-03' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -53,7 +54,8 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -66,11 +68,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -104,7 +106,7 @@ set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; @@ -123,7 +125,7 @@ case $CC_FOR_BUILD,$HOST_CC,$CC in ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ;' +esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) @@ -141,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward @@ -158,6 +160,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched @@ -166,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null + | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? @@ -176,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in fi ;; *) - os=netbsd + os=netbsd ;; esac # The OS release @@ -196,71 +199,30 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" - exit 0 ;; - amd64:OpenBSD:*:*) - echo x86_64-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - cats:OpenBSD:*:*) - echo arm-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - macppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvmeppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pegasos:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mipseb-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit 0 ;; + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; macppc:MirBSD:*:*) - echo powerppc-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on @@ -306,40 +268,46 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha*:OpenVMS:*:*) - echo alpha-hp-vms - exit 0 ;; + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix - exit 0 ;; + exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 - exit 0 ;; + exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 - exit 0;; + exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; + exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos - exit 0 ;; + exit ;; *:OS/390:*:*) echo i370-ibm-openedition - exit 0 ;; + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; *:OS400:*:*) - echo powerpc-ibm-os400 - exit 0 ;; + echo powerpc-ibm-os400 + exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp - exit 0;; + exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then @@ -347,32 +315,51 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in else echo pyramid-pyramid-bsd fi - exit 0 ;; + exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 - exit 0 ;; + exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 - exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7 && exit 0 ;; + sparc) echo sparc-icl-nx7; exit ;; esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) @@ -381,10 +368,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; + exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 @@ -396,10 +383,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo sparc-sun-sunos${UNAME_RELEASE} ;; esac - exit 0 ;; + exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor @@ -409,41 +396,41 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 - exit 0 ;; + exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; + exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -467,35 +454,36 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit (-1); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c \ - && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; + exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax - exit 0 ;; + exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix - exit 0 ;; + exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 - exit 0 ;; + exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 - exit 0 ;; + exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ @@ -508,29 +496,29 @@ EOF else echo i586-dg-dgux${UNAME_RELEASE} fi - exit 0 ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 - exit 0 ;; + exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 - exit 0 ;; + exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd - exit 0 ;; + exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; + exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix - exit 0 ;; + exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` @@ -538,7 +526,7 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build @@ -553,15 +541,19 @@ EOF exit(0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo rs6000-ibm-aix3.2.5 + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi - exit 0 ;; - *:AIX:*:[45]) + exit ;; + *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 @@ -574,28 +566,28 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:*:*) echo rs6000-ibm-aix - exit 0 ;; + exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 - exit 0 ;; + exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 + exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx - exit 0 ;; + exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 - exit 0 ;; + exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd - exit 0 ;; + exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 - exit 0 ;; + exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in @@ -604,52 +596,52 @@ EOF 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac + esac ;; + esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + sed 's/^ //' << EOF >$dummy.c - #define _HPUX_SOURCE - #include - #include + #define _HPUX_SOURCE + #include + #include - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa @@ -657,9 +649,19 @@ EOF esac if [ ${HP_ARCH} = "hppa2.0w" ] then - # avoid double evaluation of $set_cc_for_build - test -n "$CC_FOR_BUILD" || eval $set_cc_for_build - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ then HP_ARCH="hppa2.0w" else @@ -667,11 +669,11 @@ EOF fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -699,207 +701,173 @@ EOF exit (0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 - exit 0 ;; + exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd - exit 0 ;; + exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd - exit 0 ;; + exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix - exit 0 ;; + exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf - exit 0 ;; + exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf - exit 0 ;; + exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi - exit 0 ;; + exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites - exit 0 ;; + exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit 0 ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit 0 ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit 0 ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit 0 ;; + exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; *:UNICOS/mp:*:*) - echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:FreeBSD:*:*) - # Determine whether the default compiler uses glibc. - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #if __GLIBC__ >= 2 - LIBC=gnu - #else - LIBC= - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - # GNU/KFreeBSD systems have a "k" prefix to indicate we are using - # FreeBSD's kernel, but not the complete OS. - case ${LIBC} in gnu) kernel_only='k' ;; esac - echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} - exit 0 ;; + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) + exit ;; + *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; + exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 - exit 0 ;; - x86:Interix*:[34]*) - echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' - exit 0 ;; + exit ;; + *:Interix*:*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks - exit 0 ;; + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix - exit 0 ;; + exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin - exit 0 ;; + exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; + exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit 0 ;; + exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix - exit 0 ;; - arm*:Linux:*:*) + exit ;; + aarch64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit 0 ;; - ia64:Linux:*:*) + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit 0 ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit 0 ;; + exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; @@ -909,11 +877,90 @@ EOF EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit 0 ;; + exit ;; + arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo ${UNAME_MACHINE}-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-gnu + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in @@ -921,115 +968,71 @@ EOF PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac - exit 0 ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit 0 ;; + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux - exit 0 ;; + exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit 0 ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit 0 ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #ifdef __INTEL_COMPILER - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 - test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 - ;; + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 - exit 0 ;; + exit ;; i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. + # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx - exit 0 ;; + exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop - exit 0 ;; + exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos - exit 0 ;; - i*86:syllable:*:*) + exit ;; + i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable - exit 0 ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit 0 ;; + exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then @@ -1037,15 +1040,16 @@ EOF else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi - exit 0 ;; - i*86:*:5:[78]*) + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi - exit 0 ;; + exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv - exit 0 ;; + exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv - exit 0 ;; + exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 - exit 0 ;; + exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; + exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` @@ -1137,68 +1154,94 @@ EOF else echo ns32k-sni-sysv fi - exit 0 ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit 0 ;; + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 - exit 0 ;; + exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 - exit 0 ;; + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos - exit 0 ;; + exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; + exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 - exit 0 ;; + exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + echo mips-nec-sysv${UNAME_RELEASE} else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv${UNAME_RELEASE} fi - exit 0 ;; + exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos - exit 0 ;; + exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos - exit 0 ;; + exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos - exit 0 ;; + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Darwin:*:*) - case `uname -p` in - *86) UNAME_PROCESSOR=i686 ;; - powerpc) UNAME_PROCESSOR=powerpc ;; + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; + unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit 0 ;; + exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then @@ -1206,22 +1249,28 @@ EOF UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit 0 ;; + exit ;; *:QNX:*:4*) echo i386-pc-qnx - exit 0 ;; + exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} - exit 0 ;; + exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux - exit 0 ;; + exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv - exit 0 ;; + exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 @@ -1232,31 +1281,53 @@ EOF UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 - exit 0 ;; + exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 - exit 0 ;; + exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex - exit 0 ;; + exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 - exit 0 ;; + exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 - exit 0 ;; + exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 - exit 0 ;; + exit ;; *:ITS:*:*) echo pdp10-unknown-its - exit 0 ;; + exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit 0 ;; + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1279,16 +1350,16 @@ main () #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 - "4" + "4" #else - "" + "" #endif - ); exit (0); + ); exit (0); #endif #endif #if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); + printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) @@ -1377,11 +1448,12 @@ main () } EOF -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) @@ -1390,22 +1462,22 @@ then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd - exit 0 ;; + exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; c34*) echo c34-convex-bsd - exit 0 ;; + exit ;; c38*) echo c38-convex-bsd - exit 0 ;; + exit ;; c4*) echo c4-convex-bsd - exit 0 ;; + exit ;; esac fi @@ -1416,7 +1488,9 @@ This script, last modified $timestamp, has failed to recognize the operating system you are using. It is advised that you download the most up to date version of the config scripts from - ftp://ftp.gnu.org/pub/gnu/config/ + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +and + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD If the version you run ($0) is already up to date, please send the following data and any information you think might be diff --git a/config.sub b/config.sub index 264f820a..c894da45 100755 --- a/config.sub +++ b/config.sub @@ -1,9 +1,10 @@ #! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2004-02-23' +timestamp='2012-02-10' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software @@ -20,23 +21,25 @@ timestamp='2004-02-23' # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - +# along with this program; if not, see . +# # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. + # Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. +# diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. @@ -70,7 +73,8 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -83,11 +87,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -99,7 +103,7 @@ while test $# -gt 0 ; do *local*) # First pass through any local machine types. echo $1 - exit 0;; + exit ;; * ) break ;; @@ -118,11 +122,18 @@ esac # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ - kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ + linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; + android-linux) + os=-linux-android + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown + ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] @@ -145,10 +156,13 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis) + -apple | -axis | -knuth | -cray | -microblaze) os= basic_machine=$1 ;; + -bluegene*) + os=-cnk + ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 @@ -163,13 +177,17 @@ case $os in os=-chorusos basic_machine=$1 ;; - -chorusrdb) - os=-chorusrdb + -chorusrdb) + os=-chorusrdb basic_machine=$1 - ;; + ;; -hiux*) os=-hiuxwe2 ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -186,6 +204,10 @@ case $os in # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -227,25 +249,36 @@ case $basic_machine in # Some are omitted here because they have special meanings below. 1750a | 580 \ | a29k \ + | aarch64 | aarch64_be \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | be32 | be64 \ + | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ + | epiphany \ + | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ - | m32r | m68000 | m68k | m88k | mcore \ + | le32 | le64 \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ - | mips64vr | mips64vrel \ + | mips64octeon | mips64octeonel \ | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ @@ -254,30 +287,65 @@ case $basic_machine in | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ + | moxie \ + | mt \ | msp430 \ + | nds32 | nds32le | nds32be \ + | nios | nios2 \ | ns16k | ns32k \ - | openrisc | or32 \ + | open8 \ + | or32 \ | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ - | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | rl78 | rx \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ - | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ - | v850 | v850e \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu \ + | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ + | ubicom32 \ + | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ | we32k \ - | x86 | xscale | xstormy16 | xtensa \ - | z8k) + | x86 | xc16x | xstormy16 | xtensa \ + | z8k | z80) basic_machine=$basic_machine-unknown ;; - m6811 | m68hc11 | m6812 | m68hc12) - # Motorola 68HC11/12. + c54x) + basic_machine=tic54x-unknown + ;; + c55x) + basic_machine=tic55x-unknown + ;; + c6x) + basic_machine=tic6x-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) basic_machine=$basic_machine-unknown os=-none ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; + ms1) + basic_machine=mt-unknown + ;; + + strongarm | thumb | xscale) + basic_machine=arm-unknown + ;; + xgate) + basic_machine=$basic_machine-unknown + os=-none + ;; + xscaleeb) + basic_machine=armeb-unknown + ;; + + xscaleel) + basic_machine=armel-unknown + ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and @@ -293,32 +361,40 @@ case $basic_machine in # Recognize the basic CPU types with company name. 580-* \ | a29k-* \ + | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* \ - | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | cydra-* \ + | avr-* | avr32-* \ + | be32-* | be64-* \ + | bfin-* | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* \ + | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ - | m32r-* \ + | le32-* | le64-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ + | mips64octeon-* | mips64octeonel-* \ | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ @@ -326,26 +402,39 @@ case $basic_machine in | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ + | mmix-* \ + | mt-* \ | msp430-* \ - | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ + | nds32-* | nds32le-* | nds32be-* \ + | nios-* | nios2-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ + | open8-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ | pyramid-* \ - | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | rl78-* | romp-* | rs6000-* | rx-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ + | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tile*-* \ | tron-* \ - | v850-* | v850e-* | vax-* \ + | ubicom32-* \ + | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ + | vax-* \ | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* \ + | xstormy16-* | xtensa*-* \ | ymp-* \ - | z8k-*) + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. @@ -363,7 +452,7 @@ case $basic_machine in basic_machine=a29k-amd os=-udi ;; - abacus) + abacus) basic_machine=abacus-unknown ;; adobe68k) @@ -409,6 +498,10 @@ case $basic_machine in basic_machine=m68k-apollo os=-bsd ;; + aros) + basic_machine=i386-pc + os=-aros + ;; aux) basic_machine=m68k-apple os=-aux @@ -417,10 +510,35 @@ case $basic_machine in basic_machine=ns32k-sequent os=-dynix ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; + c54x-*) + basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c55x-*) + basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + c6x-*) + basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; c90) basic_machine=c90-cray os=-unicos ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; convex-c1) basic_machine=c1-convex os=-bsd @@ -445,13 +563,20 @@ case $basic_machine in basic_machine=j90-cray os=-unicos ;; - cr16c) - basic_machine=cr16c-unknown + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16 | cr16-*) + basic_machine=cr16-unknown os=-elf ;; crds | unos) basic_machine=m68k-crds ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; @@ -481,6 +606,14 @@ case $basic_machine in basic_machine=m88k-motorola os=-sysv3 ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx @@ -592,7 +725,6 @@ case $basic_machine in i370-ibm* | ibm*) basic_machine=i370-ibm ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 @@ -631,6 +763,14 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; m88k-omron*) basic_machine=m88k-omron ;; @@ -642,10 +782,17 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; + microblaze) + basic_machine=microblaze-xilinx + ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; miniframe) basic_machine=m68000-convergent ;; @@ -659,10 +806,6 @@ case $basic_machine in mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; - mmix*) - basic_machine=mmix-knuth - os=-mmixware - ;; monitor) basic_machine=m68k-rom68k os=-coff @@ -675,10 +818,21 @@ case $basic_machine in basic_machine=i386-pc os=-msdos ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; + msys) + basic_machine=i386-pc + os=-msys + ;; mvs) basic_machine=i370-ibm os=-mvs ;; + nacl) + basic_machine=le32-unknown + os=-nacl + ;; ncr3000) basic_machine=i486-ncr os=-sysv4 @@ -743,9 +897,11 @@ case $basic_machine in np1) basic_machine=np1-gould ;; - nv1) - basic_machine=nv1-cray - os=-unicosmp + neo-tandem) + basic_machine=neo-tandem + ;; + nse-tandem) + basic_machine=nse-tandem ;; nsr-tandem) basic_machine=nsr-tandem @@ -754,9 +910,8 @@ case $basic_machine in basic_machine=hppa1.1-oki os=-proelf ;; - or32 | or32-*) + openrisc | openrisc-*) basic_machine=or32-unknown - os=-coff ;; os400) basic_machine=powerpc-ibm @@ -778,6 +933,14 @@ case $basic_machine in basic_machine=i860-intel os=-osf ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; pbd) basic_machine=sparc-tti ;; @@ -787,6 +950,12 @@ case $basic_machine in pc532 | pc532-*) basic_machine=ns32k-pc532 ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; @@ -816,9 +985,10 @@ case $basic_machine in ;; power) basic_machine=power-ibm ;; - ppc) basic_machine=powerpc-unknown + ppc | ppcbe) basic_machine=powerpc-unknown ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ppc-* | ppcbe-*) + basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown @@ -843,6 +1013,10 @@ case $basic_machine in basic_machine=i586-unknown os=-pw32 ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; rom68k) basic_machine=m68k-rom68k os=-coff @@ -869,6 +1043,10 @@ case $basic_machine in sb1el) basic_machine=mipsisa64sb1el-unknown ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; sei) basic_machine=mips-sei os=-seiux @@ -880,6 +1058,9 @@ case $basic_machine in basic_machine=sh-hitachi os=-hms ;; + sh5el) + basic_machine=sh5le-unknown + ;; sh64) basic_machine=sh64-unknown ;; @@ -901,6 +1082,9 @@ case $basic_machine in basic_machine=i860-stratus os=-sysv4 ;; + strongarm-* | thumb-*) + basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; sun2) basic_machine=m68000-sun ;; @@ -957,17 +1141,9 @@ case $basic_machine in basic_machine=t90-cray os=-unicos ;; - tic54x | c54x*) - basic_machine=tic54x-unknown - os=-coff - ;; - tic55x | c55x*) - basic_machine=tic55x-unknown - os=-coff - ;; - tic6x | c6x*) - basic_machine=tic6x-unknown - os=-coff + tile*) + basic_machine=$basic_machine-unknown + os=-linux-gnu ;; tx39) basic_machine=mipstx39-unknown @@ -1029,9 +1205,16 @@ case $basic_machine in basic_machine=hppa1.1-winbond os=-proelf ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; xps | xps100) basic_machine=xps100-honeywell ;; + xscale-* | xscalee[bl]-*) + basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` + ;; ymp) basic_machine=ymp-cray os=-unicos @@ -1040,6 +1223,10 @@ case $basic_machine in basic_machine=z8k-unknown os=-sim ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; none) basic_machine=none-none os=-none @@ -1059,6 +1246,9 @@ case $basic_machine in romp) basic_machine=romp-ibm ;; + mmix) + basic_machine=mmix-knuth + ;; rs6000) basic_machine=rs6000-ibm ;; @@ -1075,13 +1265,10 @@ case $basic_machine in we32k) basic_machine=we32k-att ;; - sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; - sh64) - basic_machine=sh64-unknown - ;; - sparc | sparcv9 | sparcv9b) + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) @@ -1125,9 +1312,12 @@ esac if [ x"$os" != x"" ] then case $os in - # First match some system type aliases - # that might get confused with valid system types. + # First match some system type aliases + # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. + -auroraux) + os=-auroraux + ;; -solaris1 | -solaris1.*) os=`echo $os | sed -e 's|solaris1|sunos4|'` ;; @@ -1148,26 +1338,31 @@ case $os in # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ + | -sym* | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ + | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -chorusos* | -chorusrdb* | -cegcc* \ + | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*) + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1185,7 +1380,7 @@ case $os in os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) @@ -1206,7 +1401,7 @@ case $os in -opened*) os=-openedition ;; - -os400*) + -os400*) os=-os400 ;; -wince*) @@ -1255,7 +1450,7 @@ case $os in -sinix*) os=-sysv4 ;; - -tpf*) + -tpf*) os=-tpf ;; -triton*) @@ -1294,6 +1489,14 @@ case $os in -kaos*) os=-kaos ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; + -nacl*) + ;; -none) ;; *) @@ -1316,6 +1519,12 @@ else # system, and we'll never get to this point. case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; *-acorn) os=-riscix1.2 ;; @@ -1325,9 +1534,18 @@ case $basic_machine in arm*-semi) os=-aout ;; - c4x-* | tic4x-*) - os=-coff - ;; + c4x-* | tic4x-*) + os=-coff + ;; + tic54x-*) + os=-coff + ;; + tic55x-*) + os=-coff + ;; + tic6x-*) + os=-coff + ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 @@ -1346,13 +1564,13 @@ case $basic_machine in ;; m68000-sun) os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 ;; m68*-cisco) os=-aout ;; + mep-*) + os=-elf + ;; mips*-cisco) os=-elf ;; @@ -1371,9 +1589,15 @@ case $basic_machine in *-be) os=-beos ;; + *-haiku) + os=-haiku + ;; *-ibm) os=-aix ;; + *-knuth) + os=-mmixware + ;; *-wec) os=-proelf ;; @@ -1476,7 +1700,7 @@ case $basic_machine in -sunos*) vendor=sun ;; - -aix*) + -cnk*|-aix*) vendor=ibm ;; -beos*) @@ -1539,7 +1763,7 @@ case $basic_machine in esac echo $basic_machine$os -exit 0 +exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) From eb2398106fd1b43989c12796eb706aea79b11859 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 08:46:12 -0700 Subject: [PATCH 051/205] Fix malloc_stats_print() option support. Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. --- src/stats.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/stats.c b/src/stats.c index 38c8bb3c..f494974b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -49,7 +49,7 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i); static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); + void *cbopaque, unsigned i, bool bins, bool large); /******************************************************************************/ @@ -203,7 +203,7 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, static void stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) + unsigned i, bool bins, bool large) { unsigned nthreads; size_t pagesize, pactive, pdirty, mapped; @@ -256,8 +256,10 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); - stats_arena_bins_print(write_cb, cbopaque, i); - stats_arena_lruns_print(write_cb, cbopaque, i); + if (bins) + stats_arena_bins_print(write_cb, cbopaque, i); + if (large) + stats_arena_lruns_print(write_cb, cbopaque, i); } void @@ -506,7 +508,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "\nMerged arenas stats:\n"); stats_arena_print(write_cb, cbopaque, - narenas); + narenas, bins, large); } } } @@ -532,7 +534,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque, "\narenas[%u]:\n", i); stats_arena_print(write_cb, - cbopaque, i); + cbopaque, i, bins, large); } } } From 4c2faa8a7c42a47a6bea509f5a23234bc5a66d40 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 11:09:23 -0700 Subject: [PATCH 052/205] Fix a regression in JE_COMPILABLE(). Revert JE_COMPILABLE() so that it detects link errors. Cross-compiling should still work as long as a valid configure cache is provided. Clean up some comments/whitespace. --- configure.ac | 28 +++++++++---------- .../jemalloc/internal/jemalloc_internal.h.in | 9 ++---- src/jemalloc.c | 5 +--- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/configure.ac b/configure.ac index 5cf2855f..a5ca859b 100644 --- a/configure.ac +++ b/configure.ac @@ -26,14 +26,17 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( ]) dnl JE_COMPILABLE(label, hcode, mcode, rvar) +dnl +dnl Use AC_RUN_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors +dnl cause failure. AC_DEFUN([JE_COMPILABLE], [ AC_CACHE_CHECK([whether $1 is compilable], [$4], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$2], - [$3])], - [$4=yes], - [$4=no])]) + [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ @@ -801,16 +804,13 @@ dnl ============================================================================ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. -JE_COMPILABLE([a program using ffsl], - [ - #include - ], - [ - { - int rv = ffsl(0x08); - } - ], - [je_cv_function_ffsl]) +JE_COMPILABLE([a program using ffsl], [ +#include +], [ + { + int rv = ffsl(0x08); + } +], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" != "xyes" ; then AC_MSG_ERROR([Cannot build without ffsl(3)]) fi diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3774bb5d..5759ed58 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -240,14 +240,14 @@ static const bool config_ivsalloc = #define LONG_MASK (LONG - 1) /* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ +#define LONG_CEILING(a) \ (((a) + LONG_MASK) & ~LONG_MASK) #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) #define PTR_MASK (SIZEOF_PTR - 1) /* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ +#define PTR_CEILING(a) \ (((a) + PTR_MASK) & ~PTR_MASK) /* @@ -566,10 +566,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } } -/* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). - */ +/* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * choose_arena(void) { diff --git a/src/jemalloc.c b/src/jemalloc.c index e148ae0e..f564b65e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -101,10 +101,7 @@ arenas_extend(unsigned ind) return (arenas[0]); } -/* - * Choose an arena based on a per-thread value (slow-path code only, called - * only by choose_arena()). - */ +/* Slow path, called only by choose_arena(). */ arena_t * choose_arena_hard(void) { From 0a0bbf63e5d9bc60d6854c6d46b437fbeebd1470 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 12:55:21 -0700 Subject: [PATCH 053/205] Implement aligned_alloc(). Implement aligned_alloc(), which was added in the C11 standard. The function is weakly specified to the point that a minimally compliant implementation would be painful to use (size must be an integral multiple of alignment!), which in practice makes posix_memalign() a safer choice. --- Makefile.in | 6 +- configure.ac | 2 +- doc/jemalloc.xml.in | 35 ++++++++ include/jemalloc/jemalloc.h.in | 3 + include/jemalloc/jemalloc_defs.h.in | 1 + src/jemalloc.c | 37 ++++++--- test/aligned_alloc.c | 123 ++++++++++++++++++++++++++++ test/aligned_alloc.exp | 25 ++++++ 8 files changed, 218 insertions(+), 14 deletions(-) create mode 100644 test/aligned_alloc.c create mode 100644 test/aligned_alloc.exp diff --git a/Makefile.in b/Makefile.in index 62864556..01ed083c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -63,9 +63,9 @@ DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/allocated.c @srcroot@test/bitmap.c \ - @srcroot@test/mremap.c @srcroot@test/posix_memalign.c \ - @srcroot@test/thread_arena.c +CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c ifeq (@enable_experimental@, 1) CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c endif diff --git a/configure.ac b/configure.ac index a5ca859b..295e646d 100644 --- a/configure.ac +++ b/configure.ac @@ -316,7 +316,7 @@ AC_PATH_PROG([AR], [ar], , [$PATH]) AC_PATH_PROG([LD], [ld], , [$PATH]) AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) -public_syms="malloc_conf malloc_message malloc calloc posix_memalign realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" +public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib" dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 74da409f..926deafe 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -30,6 +30,7 @@ malloc calloc posix_memalign + aligned_alloc realloc free malloc_usable_size @@ -73,6 +74,11 @@ size_t alignment size_t size + + void *aligned_alloc + size_t alignment + size_t size + void *realloc void *ptr @@ -190,6 +196,14 @@ alignment must be a power of 2 at least as large as sizeof(void *). + The aligned_alloc function + allocates size bytes of memory such that the + allocation's base address is an even multiple of + alignment. The requested + alignment must be a power of 2. Behavior is + undefined if size is not an integral multiple of + alignment. + The realloc function changes the size of the previously allocated memory referenced by ptr to size bytes. The @@ -1789,6 +1803,27 @@ malloc_conf = "xmalloc:true";]]> + The aligned_alloc function returns + a pointer to the allocated memory if successful; otherwise a + NULL pointer is returned and + errno is set. The + aligned_alloc function will fail if: + + + EINVAL + + The alignment parameter is + not a power of 2. + + + + ENOMEM + + Memory allocation error. + + + + The realloc function returns a pointer, possibly identical to ptr, to the allocated memory if successful; otherwise a NULL diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 742daddd..f0581dbd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -43,6 +43,7 @@ void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); int je_posix_memalign(void **memptr, size_t alignment, size_t size) JEMALLOC_ATTR(nonnull(1)); +void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); void *je_realloc(void *ptr, size_t size); void je_free(void *ptr); @@ -82,6 +83,7 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #define malloc je_malloc #define calloc je_calloc #define posix_memalign je_posix_memalign +#define aligned_alloc je_aligned_alloc #define realloc je_realloc #define free je_free #define malloc_usable_size je_malloc_usable_size @@ -113,6 +115,7 @@ int je_nallocm(size_t *rsize, size_t size, int flags); #undef je_malloc #undef je_calloc #undef je_posix_memalign +#undef je_aligned_alloc #undef je_realloc #undef je_free #undef je_malloc_usable_size diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 6b2b0d01..f8f80e62 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -16,6 +16,7 @@ #undef je_malloc #undef je_calloc #undef je_posix_memalign +#undef je_aligned_alloc #undef je_realloc #undef je_free #undef je_malloc_usable_size diff --git a/src/jemalloc.c b/src/jemalloc.c index f564b65e..2f3f3722 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -68,7 +68,7 @@ static void malloc_conf_error(const char *msg, const char *k, size_t klen, static void malloc_conf_init(void); static bool malloc_init_hard(void); static int imemalign(void **memptr, size_t alignment, size_t size, - bool enforce_min_alignment); + size_t min_alignment); /******************************************************************************/ /* @@ -851,7 +851,7 @@ JEMALLOC_ATTR(noinline) #endif static int imemalign(void **memptr, size_t alignment, size_t size, - bool enforce_min_alignment) + size_t min_alignment) { int ret; size_t usize; @@ -862,6 +862,8 @@ imemalign(void **memptr, size_t alignment, size_t size, #endif ; + assert(min_alignment != 0); + if (malloc_init()) result = NULL; else { @@ -870,10 +872,10 @@ imemalign(void **memptr, size_t alignment, size_t size, /* Make sure that alignment is a large enough power of 2. */ if (((alignment - 1) & alignment) != 0 - || (enforce_min_alignment && alignment < sizeof(void *))) { + || (alignment < min_alignment)) { if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid alignment\n"); + malloc_write(": Error allocating " + "aligned memory: invalid alignment\n"); abort(); } result = NULL; @@ -915,8 +917,8 @@ imemalign(void **memptr, size_t alignment, size_t size, if (result == NULL) { if (config_xmalloc && opt_xmalloc) { - malloc_write(": Error in posix_memalign(): " - "out of memory\n"); + malloc_write(": Error allocating aligned " + "memory: out of memory\n"); abort(); } ret = ENOMEM; @@ -942,7 +944,22 @@ int je_posix_memalign(void **memptr, size_t alignment, size_t size) { - return imemalign(memptr, alignment, size, true); + return imemalign(memptr, alignment, size, sizeof(void *)); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_aligned_alloc(size_t alignment, size_t size) +{ + void *ret; + int err; + + if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + ret = NULL; + errno = err; + } + return (ret); } JEMALLOC_ATTR(malloc) @@ -1196,7 +1213,7 @@ je_memalign(size_t alignment, size_t size) = NULL #endif ; - imemalign(&ret, alignment, size, false); + imemalign(&ret, alignment, size, 1); return (ret); } #endif @@ -1212,7 +1229,7 @@ je_valloc(size_t size) = NULL #endif ; - imemalign(&ret, PAGE_SIZE, size, false); + imemalign(&ret, PAGE_SIZE, size, 1); return (ret); } #endif diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c new file mode 100644 index 00000000..2a95604f --- /dev/null +++ b/test/aligned_alloc.c @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define CHUNK 0x400000 +/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */ +#define MAXALIGN ((size_t)0x2000000LU) +#define NITER 4 + +int +main(void) +{ + size_t alignment, size, total; + unsigned i; + void *p, *ps[NITER]; + + fprintf(stderr, "Test begin\n"); + + /* Test error conditions. */ + alignment = 0; + errno = 0; + p = aligned_alloc(alignment, 1); + if (p != NULL || errno != EINVAL) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", alignment); + } + + for (alignment = sizeof(size_t); alignment < MAXALIGN; + alignment <<= 1) { + errno = 0; + p = aligned_alloc(alignment + 1, 1); + if (p != NULL || errno != EINVAL) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", + alignment + 1); + } + } + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x8000000000000000); + size = UINT64_C(0x8000000000000000); +#else + alignment = 0x80000000LU; + size = 0x80000000LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(%zu, %zu)\n", + alignment, size); + } + +#if LG_SIZEOF_PTR == 3 + alignment = UINT64_C(0x4000000000000000); + size = UINT64_C(0x8400000000000001); +#else + alignment = 0x40000000LU; + size = 0x84000001LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(%zu, %zu)\n", + alignment, size); + } + + alignment = 0x10LU; +#if LG_SIZEOF_PTR == 3 + size = UINT64_C(0xfffffffffffffff0); +#else + size = 0xfffffff0LU; +#endif + errno = 0; + p = aligned_alloc(alignment, size); + if (p != NULL || errno != ENOMEM) { + fprintf(stderr, + "Expected error for aligned_alloc(&p, %zu, %zu)\n", + alignment, size); + } + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + fprintf(stderr, "Alignment: %zu\n", alignment); + for (size = 1; + size < 3 * alignment && size < (1U << 31); + size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + ps[i] = aligned_alloc(alignment, size); + if (ps[i] == NULL) { + fprintf(stderr, + "Error for size %zu (%#zx): %s\n", + size, size, strerror(errno)); + exit(1); + } + total += malloc_usable_size(ps[i]); + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + free(ps[i]); + ps[i] = NULL; + } + } + } + } + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/aligned_alloc.exp b/test/aligned_alloc.exp new file mode 100644 index 00000000..b5061c72 --- /dev/null +++ b/test/aligned_alloc.exp @@ -0,0 +1,25 @@ +Test begin +Alignment: 8 +Alignment: 16 +Alignment: 32 +Alignment: 64 +Alignment: 128 +Alignment: 256 +Alignment: 512 +Alignment: 1024 +Alignment: 2048 +Alignment: 4096 +Alignment: 8192 +Alignment: 16384 +Alignment: 32768 +Alignment: 65536 +Alignment: 131072 +Alignment: 262144 +Alignment: 524288 +Alignment: 1048576 +Alignment: 2097152 +Alignment: 4194304 +Alignment: 8388608 +Alignment: 16777216 +Alignment: 33554432 +Test end From 824d34e5b7f5cf00bf472ec79f7ec1c6e3474114 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 13:19:04 -0700 Subject: [PATCH 054/205] Modify malloc_vsnprintf() validation code. Modify malloc_vsnprintf() validation code to verify that output is identical to vsnprintf() output, even if both outputs are truncated due to buffer exhaustion. --- include/jemalloc/internal/util.h | 6 ++++++ src/util.c | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index a268109c..c5f7520c 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -4,6 +4,12 @@ /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 +/* + * Size of static buffer used by malloc_[v]{,c,t}printf(). This must be large + * enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. diff --git a/src/util.c b/src/util.c index 7d658aaa..47e7b66e 100644 --- a/src/util.c +++ b/src/util.c @@ -433,7 +433,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) ret = i; if (config_debug) { - char buf[ret + 2]; + char buf[MALLOC_PRINTF_BUFSIZE]; int tret; /* @@ -442,7 +442,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) */ tret = vsnprintf(buf, sizeof(buf), format, tap); assert(tret == ret); - assert(memcmp(str, buf, ret + 1) == 0); + assert(strcmp(buf, str) == 0); } #undef APPEND_C @@ -469,8 +469,7 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) const char * malloc_vtprintf(const char *format, va_list ap) { - /* buf must be large enough for all possible uses within jemalloc. */ - static __thread char buf[4096]; + static __thread char buf[MALLOC_PRINTF_BUFSIZE]; malloc_vsnprintf(buf, sizeof(buf), format, ap); From 4e2e3dd9cf19ed5991938a708a8b50611aa5bbf8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 13 Mar 2012 16:31:41 -0700 Subject: [PATCH 055/205] Fix fork-related bugs. Acquire/release arena bin locks as part of the prefork/postfork. This bug made deadlock in the child between fork and exec a possibility. Split jemalloc_postfork() into jemalloc_postfork_{parent,child}() so that the child can reinitialize mutexes rather than unlocking them. In practice, this bug tended not to cause problems. --- include/jemalloc/internal/arena.h | 3 ++ include/jemalloc/internal/base.h | 5 +- include/jemalloc/internal/chunk_dss.h | 9 ++-- include/jemalloc/internal/huge.h | 3 ++ .../jemalloc/internal/jemalloc_internal.h.in | 3 +- include/jemalloc/internal/mutex.h | 3 ++ include/jemalloc/internal/private_namespace.h | 18 ++++++- src/arena.c | 30 +++++++++++ src/base.c | 23 +++++++- src/chunk_dss.c | 36 +++++++++++-- src/huge.c | 21 ++++++++ src/jemalloc.c | 52 +++++++++++-------- src/mutex.c | 26 ++++++++++ 13 files changed, 194 insertions(+), 38 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 16c2b1e6..1609adcf 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -376,6 +376,9 @@ void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); void arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index e353f309..796a2835 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -9,12 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern malloc_mutex_t base_mtx; - void *base_alloc(size_t size); extent_node_t *base_node_alloc(void); void base_node_dealloc(extent_node_t *node); bool base_boot(void); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 35cd461a..a39a2031 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -9,16 +9,13 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -extern malloc_mutex_t dss_mtx; - void *chunk_alloc_dss(size_t size, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 3a6b0b87..e8513c93 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -28,6 +28,9 @@ size_t huge_salloc(const void *ptr); prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); bool huge_boot(void); +void huge_prefork(void); +void huge_postfork_parent(void); +void huge_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 5759ed58..800d72c2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -410,7 +410,8 @@ thread_allocated_t *thread_allocated_get_hard(void); arena_t *arenas_extend(unsigned ind); arena_t *choose_arena_hard(void); void jemalloc_prefork(void); -void jemalloc_postfork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 6a7b4fce..9d136585 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -29,6 +29,9 @@ extern bool isthreaded; bool malloc_mutex_init(malloc_mutex_t *mutex); void malloc_mutex_destroy(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 89d3b5ca..e7370fec 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -8,6 +8,9 @@ #define arena_malloc_small JEMALLOC_N(arena_malloc_small) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) @@ -32,6 +35,9 @@ #define base_boot JEMALLOC_N(base_boot) #define base_node_alloc JEMALLOC_N(base_node_alloc) #define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) #define bitmap_full JEMALLOC_N(bitmap_full) #define bitmap_get JEMALLOC_N(bitmap_get) #define bitmap_info_init JEMALLOC_N(bitmap_info_init) @@ -54,6 +60,9 @@ #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) +#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) @@ -115,6 +124,9 @@ #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_malloc JEMALLOC_N(huge_malloc) #define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_postfork_child JEMALLOC_N(huge_postfork_child) +#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) +#define huge_prefork JEMALLOC_N(huge_prefork) #define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) #define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) #define huge_ralloc JEMALLOC_N(huge_ralloc) @@ -129,12 +141,16 @@ #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) #define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) -#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) #define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) #define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) diff --git a/src/arena.c b/src/arena.c index c14cb2c2..898f8c7d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2169,3 +2169,33 @@ arena_boot(void) bin_info_init(); } + +void +arena_prefork(arena_t *arena) +{ + unsigned i; + + malloc_mutex_prefork(&arena->lock); + for (i = 0; i < NBINS; i++) + malloc_mutex_prefork(&arena->bins[i].lock); +} + +void +arena_postfork_parent(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->lock); +} + +void +arena_postfork_child(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->lock); +} diff --git a/src/base.c b/src/base.c index cc85e849..eb68334b 100644 --- a/src/base.c +++ b/src/base.c @@ -4,7 +4,7 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t base_mtx; +static malloc_mutex_t base_mtx; /* * Current pages that are being used for internal memory allocations. These @@ -104,3 +104,24 @@ base_boot(void) return (false); } + +void +base_prefork(void) +{ + + malloc_mutex_prefork(&base_mtx); +} + +void +base_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&base_mtx); +} + +void +base_postfork_child(void) +{ + + malloc_mutex_postfork_child(&base_mtx); +} diff --git a/src/chunk_dss.c b/src/chunk_dss.c index c25baea3..405dc29b 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,14 +3,18 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t dss_mtx; +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +static malloc_mutex_t dss_mtx; /* Base address of the DSS. */ -static void *dss_base; +static void *dss_base; /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; +static void *dss_prev; /* Current upper limit on DSS addresses. */ -static void *dss_max; +static void *dss_max; /* * Trees of chunks that were previously allocated (trees differ only in node @@ -291,4 +295,28 @@ chunk_dss_boot(void) return (false); } +void +chunk_dss_prefork(void) +{ + + if (config_dss) + malloc_mutex_prefork(&dss_mtx); +} + +void +chunk_dss_postfork_parent(void) +{ + + if (config_dss) + malloc_mutex_postfork_parent(&dss_mtx); +} + +void +chunk_dss_postfork_child(void) +{ + + if (config_dss) + malloc_mutex_postfork_child(&dss_mtx); +} + /******************************************************************************/ diff --git a/src/huge.c b/src/huge.c index 2d51c529..a4e6cc8f 100644 --- a/src/huge.c +++ b/src/huge.c @@ -359,3 +359,24 @@ huge_boot(void) return (false); } + +void +huge_prefork(void) +{ + + malloc_mutex_prefork(&huge_mtx); +} + +void +huge_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&huge_mtx); +} + +void +huge_postfork_child(void) +{ + + malloc_mutex_postfork_child(&huge_mtx); +} diff --git a/src/jemalloc.c b/src/jemalloc.c index 2f3f3722..385eb03a 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -610,8 +610,8 @@ malloc_init_hard(void) malloc_conf_init(); /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, - jemalloc_postfork) != 0) { + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); @@ -1593,40 +1593,46 @@ jemalloc_prefork(void) unsigned i; /* Acquire all mutexes in a safe order. */ - - malloc_mutex_lock(&arenas_lock); + malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) - malloc_mutex_lock(&arenas[i]->lock); + arena_prefork(arenas[i]); } - - malloc_mutex_lock(&base_mtx); - - malloc_mutex_lock(&huge_mtx); - - if (config_dss) - malloc_mutex_lock(&dss_mtx); + base_prefork(); + huge_prefork(); + chunk_dss_prefork(); } void -jemalloc_postfork(void) +jemalloc_postfork_parent(void) { unsigned i; /* Release all mutexes, now that fork() has completed. */ - - if (config_dss) - malloc_mutex_unlock(&dss_mtx); - - malloc_mutex_unlock(&huge_mtx); - - malloc_mutex_unlock(&base_mtx); - + chunk_dss_postfork_parent(); + huge_postfork_parent(); + base_postfork_parent(); for (i = 0; i < narenas; i++) { if (arenas[i] != NULL) - malloc_mutex_unlock(&arenas[i]->lock); + arena_postfork_parent(arenas[i]); } - malloc_mutex_unlock(&arenas_lock); + malloc_mutex_postfork_parent(&arenas_lock); +} + +void +jemalloc_postfork_child(void) +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + chunk_dss_postfork_child(); + huge_postfork_child(); + base_postfork_child(); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + arena_postfork_child(arenas[i]); + } + malloc_mutex_postfork_child(&arenas_lock); } /******************************************************************************/ diff --git a/src/mutex.c b/src/mutex.c index 0e09060e..243b7129 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -92,3 +92,29 @@ malloc_mutex_destroy(malloc_mutex_t *mutex) } #endif } + +void +malloc_mutex_prefork(malloc_mutex_t *mutex) +{ + + malloc_mutex_lock(mutex); +} + +void +malloc_mutex_postfork_parent(malloc_mutex_t *mutex) +{ + + malloc_mutex_unlock(mutex); +} + +void +malloc_mutex_postfork_child(malloc_mutex_t *mutex) +{ + + if (malloc_mutex_init(mutex)) { + malloc_printf(": Error re-initializing mutex in " + "child\n"); + if (opt_abort) + abort(); + } +} From 6508bc6931b54b50aaa0976ee7f0681482b2a80a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 15 Mar 2012 17:07:42 -0700 Subject: [PATCH 056/205] Remove #include . Remove #include , which is no longer needed (now using sysconf(3) to get number of CPUs). --- include/jemalloc/internal/jemalloc_internal.h.in | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 800d72c2..2c213120 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -2,7 +2,6 @@ #include #include #include -#include #include #include From 39006f990771518b1b7d4b8dfdfac72409ef26ca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 16 Mar 2012 16:57:02 -0700 Subject: [PATCH 057/205] Look for pthreads functionality in libc. If there is no libpthread, look for pthreads functionality in libc before failing to configure pthreads. This is necessary on at least the Android platform. Reported by Mike Hommey. --- configure.ac | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 295e646d..47d5784b 100644 --- a/configure.ac +++ b/configure.ac @@ -746,8 +746,11 @@ dnl ============================================================================ dnl Configure pthreads. AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) +dnl Some systems may embed pthreads functionality in libc; check for libpthread +dnl first, but try libc too before failing. AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_MSG_ERROR([libpthread is missing])]) + [AC_SEARCH_LIBS([pthread_create], , , + AC_MSG_ERROR([libpthread is missing]))]) CPPFLAGS="$CPPFLAGS -D_REENTRANT" From e7b8fa18d256e0bc18b61ee03b69af87fa3d7969 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 16 Mar 2012 17:09:32 -0700 Subject: [PATCH 058/205] Rename the "tcache.flush" mallctl to "thread.tcache.flush". --- doc/jemalloc.xml.in | 36 ++++++++++++++++++------------------ src/ctl.c | 12 ++++++------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 926deafe..ffc6c94e 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1030,24 +1030,6 @@ malloc_conf = "xmalloc:true";]]> by default. - - - tcache.flush - (void) - -- - [] - - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface - need not be called, since automatic periodic incremental garbage - collection occurs, and the thread cache is automatically discarded when - a thread exits. However, garbage collection is triggered by allocation - activity, so it is possible for a thread that stops - allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful. - - thread.arena @@ -1119,6 +1101,24 @@ malloc_conf = "xmalloc:true";]]> mallctl* calls. + + + thread.tcache.flush + (void) + -- + [] + + Flush calling thread's tcache. This interface releases + all cached objects and internal data structures associated with the + calling thread's thread-specific cache. Ordinarily, this interface + need not be called, since automatic periodic incremental garbage + collection occurs, and the thread cache is automatically discarded when + a thread exits. However, garbage collection is triggered by allocation + activity, so it is possible for a thread that stops + allocating/deallocating to retain its cache indefinitely, in which case + the developer may find manual flushing useful. + + arenas.narenas diff --git a/src/ctl.c b/src/ctl.c index e7639d7f..1ef84e80 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -39,7 +39,7 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) -CTL_PROTO(tcache_flush) +CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocatedp) @@ -151,7 +151,7 @@ CTL_PROTO(stats_mapped) #define INDEX(i) false, {.indexed = {i##_index}}, NULL static const ctl_node_t tcache_node[] = { - {NAME("flush"), CTL(tcache_flush)} + {NAME("flush"), CTL(thread_tcache_flush)} }; static const ctl_node_t thread_node[] = { @@ -159,7 +159,8 @@ static const ctl_node_t thread_node[] = { {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, - {NAME("deallocatedp"), CTL(thread_deallocatedp)} + {NAME("deallocatedp"), CTL(thread_deallocatedp)}, + {NAME("tcache"), CHILD(tcache)} }; static const ctl_node_t config_node[] = { @@ -334,7 +335,6 @@ static const ctl_node_t stats_node[] = { static const ctl_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, - {NAME("tcache"), CHILD(tcache)}, {NAME("thread"), CHILD(thread)}, {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, @@ -967,8 +967,8 @@ RETURN: } static int -tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { int ret; tcache_t *tcache; From 7091b415bb41b9d7098a24cfe0a577299622f5db Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 09:36:44 -0700 Subject: [PATCH 059/205] Fix various documentation formatting regressions. --- configure.ac | 17 +++++++++++++---- doc/jemalloc.xml.in | 38 ++++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 47d5784b..4a068d09 100644 --- a/configure.ac +++ b/configure.ac @@ -87,14 +87,23 @@ AC_SUBST([MANDIR]) dnl Support for building documentation. AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then + DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then + DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets" +else + dnl Documentation building will fail if this default gets used. + DEFAULT_XSLROOT="" +fi AC_ARG_WITH([xslroot], - [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], + [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], [ if test "x$with_xslroot" = "xno" ; then - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" + XSLROOT="${DEFAULT_XSLROOT}" else XSLROOT="${with_xslroot}" -fi, - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +fi +], + XSLROOT="${DEFAULT_XSLROOT}" ) AC_SUBST([XSLROOT]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ffc6c94e..3cbc851f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -377,7 +377,7 @@ for (i = 0; i < nbins; i++) { sets *rsize to the real size of the allocation if rsize is not NULL. Behavior is undefined if size is - 0. + 0. The rallocm function resizes the allocation at *ptr to be at least @@ -390,7 +390,7 @@ for (i = 0; i < nbins; i++) { language="C">size + extra) bytes, though inability to allocate the extra byte(s) will not by itself result in failure. Behavior is - undefined if size is 0, or if + undefined if size is 0, or if (size + extra > SIZE_T_MAX). @@ -409,7 +409,7 @@ for (i = 0; i < nbins; i++) { *rsize to the real size of the allocation that would result from the equivalent allocm function call. Behavior is undefined if - size is 0. + size is 0. @@ -516,53 +516,55 @@ for (i = 0; i < nbins; i++) { Size classes - - - - + + + + Category - Subcategory + Spacing Size - Small - Tiny + Small + lg [8] - 16-spaced + 16 [16, 32, 48, ..., 128] - 32-spaced + 32 [160, 192, 224, 256] - 64-spaced + 64 [320, 384, 448, 512] - 128-spaced + 128 [640, 768, 896, 1024] - 256-spaced + 256 [1280, 1536, 1792, 2048] - 512-spaced + 512 [2560, 3072, 3584] - Large + Large + 4 KiB [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] - Huge + Huge + 4 MiB [4 MiB, 8 MiB, 12 MiB, ...] From f3e139a1ef17f300ebed0577a0ee7b3714404707 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 09:54:20 -0700 Subject: [PATCH 060/205] Use AC_LINK_IFELSE() rather than AC_RUN_IFELSE() in JE_COMPILABLE(). Reported by Mike Hommey. --- configure.ac | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 4a068d09..c7110240 100644 --- a/configure.ac +++ b/configure.ac @@ -27,16 +27,16 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( dnl JE_COMPILABLE(label, hcode, mcode, rvar) dnl -dnl Use AC_RUN_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors +dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors dnl cause failure. AC_DEFUN([JE_COMPILABLE], [ AC_CACHE_CHECK([whether $1 is compilable], [$4], - [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2], - [$3])], - [$4=yes], - [$4=no])]) + [AC_LINK_IFELSE([AC_LANG_PROGRAM([$2], + [$3])], + [$4=yes], + [$4=no])]) ]) dnl ============================================================================ From e24c7af35d1e9d24d02166ac98cfca7cf807ff13 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 10:21:17 -0700 Subject: [PATCH 061/205] Invert NO_TLS to JEMALLOC_TLS. --- configure.ac | 28 +++++++++---------- .../jemalloc/internal/jemalloc_internal.h.in | 4 +-- include/jemalloc/internal/prof.h | 2 +- include/jemalloc/internal/tcache.h | 2 +- include/jemalloc/jemalloc_defs.h.in | 2 +- src/chunk_mmap.c | 4 +-- src/jemalloc.c | 10 +++---- src/prof.c | 2 +- src/tcache.c | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/configure.ac b/configure.ac index c7110240..b92a7491 100644 --- a/configure.ac +++ b/configure.ac @@ -210,7 +210,7 @@ case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" abi="macho" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" @@ -218,14 +218,14 @@ case "${host}" in *-*-freebsd*) CFLAGS="$CFLAGS" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," ;; *-*-linux*) CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) RPATH="-Wl,-rpath," ;; *-*-netbsd*) @@ -240,7 +240,7 @@ case "${host}" in [CFLAGS="$CFLAGS"; abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," ;; *-*-solaris2*) @@ -291,7 +291,7 @@ JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); ], [je_cv_mremap_fixed]) if test "x${je_cv_mremap_fixed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_MREMAP_FIXED]) + AC_DEFINE([JEMALLOC_MREMAP_FIXED], [ ]) fi dnl Support optional additions to rpath. @@ -329,10 +329,10 @@ public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_all dnl Check for allocator-related functions that should be wrapped. AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN]) + [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ]) public_syms="${public_syms} memalign"]) AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC]) + [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ]) public_syms="${public_syms} valloc"]) dnl Support the experimental API by default. @@ -458,7 +458,7 @@ fi [enable_cc_silence="0"] ) if test "x$enable_cc_silence" = "x1" ; then - AC_DEFINE([JEMALLOC_CC_SILENCE]) + AC_DEFINE([JEMALLOC_CC_SILENCE], [ ]) fi dnl Do not compile with debugging by default. @@ -808,8 +808,8 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( enable_tls="0") fi AC_SUBST([enable_tls]) -if test "x${enable_tls}" = "x0" ; then - AC_DEFINE_UNQUOTED([NO_TLS], [ ]) +if test "x${enable_tls}" = "x1" ; then + AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ]) fi dnl ============================================================================ @@ -846,7 +846,7 @@ JE_COMPILABLE([Darwin OSAtomic*()], [ } ], [je_cv_osatomic]) if test "x${je_cv_osatomic}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSATOMIC]) + AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) fi dnl ============================================================================ @@ -861,15 +861,15 @@ JE_COMPILABLE([Darwin OSSpin*()], [ OSSpinLockUnlock(&lock); ], [je_cv_osspin]) if test "x${je_cv_osspin}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSSPIN]) + AC_DEFINE([JEMALLOC_OSSPIN], [ ]) fi dnl ============================================================================ dnl Darwin-related configuration. if test "x${abi}" = "xmacho" ; then - AC_DEFINE([JEMALLOC_IVSALLOC]) - AC_DEFINE([JEMALLOC_ZONE]) + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) + AC_DEFINE([JEMALLOC_ZONE], [ ]) dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 dnl releases. malloc_zone_t and malloc_introspection_t have new fields in diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 2c213120..dbfd3fc8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -357,7 +357,7 @@ extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern pthread_key_t arenas_tsd; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS /* * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. @@ -382,7 +382,7 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread thread_allocated_t thread_allocated_tls; # define ALLOCATED_GET() (thread_allocated_tls.allocated) # define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index f647f637..48231928 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -180,7 +180,7 @@ extern uint64_t prof_interval; extern bool prof_promote; /* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define PROF_TCACHE_GET() prof_tdata_tls diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index e5f9518e..ed037cf9 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -76,7 +76,7 @@ extern ssize_t opt_lg_tcache_max; extern tcache_bin_info_t *tcache_bin_info; /* Map of thread-specific caches. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS extern __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); # define TCACHE_GET() tcache_tls diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f8f80e62..434dd368 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -122,7 +122,7 @@ #undef STATIC_PAGE_SHIFT /* TLS is used to map arenas and magazine caches to threads. */ -#undef NO_TLS +#undef JEMALLOC_TLS /* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index c7409284..6ea21180 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -8,7 +8,7 @@ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and * potentially avoid some system calls. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS static __thread bool mmap_unaligned_tls JEMALLOC_ATTR(tls_model("initial-exec")); #define MMAP_UNALIGNED_GET() mmap_unaligned_tls @@ -225,7 +225,7 @@ bool chunk_mmap_boot(void) { -#ifdef NO_TLS +#ifndef JEMALLOC_TLS if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { malloc_write(": Error in pthread_key_create()\n"); return (true); diff --git a/src/jemalloc.c b/src/jemalloc.c index 385eb03a..e2b61343 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -9,11 +9,11 @@ arena_t **arenas; unsigned narenas; pthread_key_t arenas_tsd; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread thread_allocated_t thread_allocated_tls; #endif pthread_key_t thread_allocated_tsd; @@ -58,7 +58,7 @@ size_t opt_narenas = 0; static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static void arenas_cleanup(void *arg); -#ifdef NO_TLS +#ifndef JEMALLOC_TLS static void thread_allocated_cleanup(void *arg); #endif static bool malloc_conf_next(char const **opts_p, char const **k_p, @@ -251,7 +251,7 @@ arenas_cleanup(void *arg) malloc_mutex_unlock(&arenas_lock); } -#ifdef NO_TLS +#ifndef JEMALLOC_TLS static void thread_allocated_cleanup(void *arg) { @@ -656,7 +656,7 @@ malloc_init_hard(void) return (true); } -#ifdef NO_TLS +#ifndef JEMALLOC_TLS /* Initialize allocation counters before any allocations can occur. */ if (config_stats && pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) != 0) { diff --git a/src/prof.c b/src/prof.c index 2ca66c73..9c327379 100644 --- a/src/prof.c +++ b/src/prof.c @@ -26,7 +26,7 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread prof_tdata_t *prof_tdata_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif diff --git a/src/tcache.c b/src/tcache.c index 478b7f5f..f90308cd 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -11,7 +11,7 @@ tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ /* Map of thread-specific caches. */ -#ifndef NO_TLS +#ifdef JEMALLOC_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); #endif From 650285d5be2aacd9c0e60260563dd0235b729af7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 19 Mar 2012 10:25:27 -0700 Subject: [PATCH 062/205] Generalize dlsym() configuration. Generalize dlsym() configuration to succeed if dlsym() is in libc rather than libdl. --- configure.ac | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index b92a7491..76cb6700 100644 --- a/configure.ac +++ b/configure.ac @@ -777,8 +777,10 @@ fi ) if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) + AC_CHECK_FUNC([dlsym], [], + [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], + [AC_MSG_ERROR([libdl is missing])]) + ]) AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) fi AC_SUBST([enable_lazy_lock]) From f4d0fc310effc6d1200223851897a8036123738c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 20 Mar 2012 18:03:09 +0100 Subject: [PATCH 063/205] Unbreak mac after commit 4e2e3dd --- src/zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zone.c b/src/zone.c index 5beed5f3..a0372e1a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -152,7 +152,7 @@ zone_force_unlock(malloc_zone_t *zone) { if (isthreaded) - jemalloc_postfork(); + jemalloc_postfork_parent(); } malloc_zone_t * From 154829d2560a202ef6378b089655747585e44fb5 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 20 Mar 2012 18:01:38 +0100 Subject: [PATCH 064/205] Improve zone support for OSX I tested a build from 10.7 run on 10.7 and 10.6, and a build from 10.6 run on 10.6. The AC_COMPILE_IFELSE limbo is to avoid running a program during configure, which presumably makes it work when cross compiling for iOS. --- configure.ac | 57 +++++++-------- src/jemalloc.c | 20 +++-- src/zone.c | 193 ++++++------------------------------------------- 3 files changed, 63 insertions(+), 207 deletions(-) diff --git a/configure.ac b/configure.ac index 76cb6700..02d4f536 100644 --- a/configure.ac +++ b/configure.ac @@ -877,39 +877,32 @@ if test "x${abi}" = "xmacho" ; then dnl releases. malloc_zone_t and malloc_introspection_t have new fields in dnl 10.6, which is the only source-level indication of the change. AC_MSG_CHECKING([malloc zone version]) - AC_TRY_COMPILE([#include -#include ], [ - static malloc_zone_t zone; - static struct malloc_introspection_t zone_introspect; + AC_DEFUN([JE_ZONE_PROGRAM], + [AC_LANG_PROGRAM( + [#include ], + [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]] + )]) - zone.size = NULL; - zone.malloc = NULL; - zone.calloc = NULL; - zone.valloc = NULL; - zone.free = NULL; - zone.realloc = NULL; - zone.destroy = NULL; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = 6; - zone.memalign = NULL; - zone.free_definite_size = NULL; - - zone_introspect.enumerator = NULL; - zone_introspect.good_size = NULL; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = NULL; - zone_introspect.force_unlock = NULL; - zone_introspect.statistics = NULL; - zone_introspect.zone_locked = NULL; -], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6]) - AC_MSG_RESULT([6])], - [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3]) - AC_MSG_RESULT([3])]) + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=] + )])],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[ + AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=] + )])])])]) + if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then + AC_MSG_RESULT([unsupported]) + AC_MSG_ERROR([Unsupported malloc zone version]) + fi + if test "${JEMALLOC_ZONE_VERSION}" = 9; then + JEMALLOC_ZONE_VERSION=8 + AC_MSG_RESULT([> 8]) + else + AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION]) + fi + AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION]) fi dnl ============================================================================ diff --git a/src/jemalloc.c b/src/jemalloc.c index e2b61343..2610452e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -747,15 +747,23 @@ malloc_init_hard(void) arenas[0] = init_arenas[0]; #ifdef JEMALLOC_ZONE - /* Register the custom zone. */ - malloc_zone_register(create_zone()); + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_t *jemalloc_zone = create_zone(); + malloc_zone_register(jemalloc_zone); /* - * Convert the default szone to an "overlay zone" that is capable of - * deallocating szone-allocated objects, but allocating new objects - * from jemalloc. + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it at the + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone + * then becomes the default. */ - szone2ozone(malloc_default_zone()); + do { + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + } while (malloc_default_zone() != jemalloc_zone); #endif malloc_initialized = true; diff --git a/src/zone.c b/src/zone.c index a0372e1a..a8f09c98 100644 --- a/src/zone.c +++ b/src/zone.c @@ -6,8 +6,8 @@ /******************************************************************************/ /* Data. */ -static malloc_zone_t zone, szone; -static struct malloc_introspection_t zone_introspect, ozone_introspect; +static malloc_zone_t zone; +static struct malloc_introspection_t zone_introspect; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -18,8 +18,10 @@ static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); static void *zone_valloc(malloc_zone_t *zone, size_t size); static void zone_free(malloc_zone_t *zone, void *ptr); static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) static void *zone_memalign(malloc_zone_t *zone, size_t alignment, +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) size_t size); static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size); @@ -28,19 +30,6 @@ static void *zone_destroy(malloc_zone_t *zone); static size_t zone_good_size(malloc_zone_t *zone, size_t size); static void zone_force_lock(malloc_zone_t *zone); static void zone_force_unlock(malloc_zone_t *zone); -static size_t ozone_size(malloc_zone_t *zone, void *ptr); -static void ozone_free(malloc_zone_t *zone, void *ptr); -static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, - void **results, unsigned num_requested); -static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, - unsigned num); -#if (JEMALLOC_ZONE_VERSION >= 6) -static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, - size_t size); -#endif -static void ozone_force_lock(malloc_zone_t *zone); -static void ozone_force_unlock(malloc_zone_t *zone); /******************************************************************************/ /* @@ -101,7 +90,7 @@ zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) return (je_realloc(ptr, size)); } -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) static void * zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) { @@ -111,7 +100,9 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) return (ret); } +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { @@ -171,10 +162,15 @@ create_zone(void) zone.batch_free = NULL; zone.introspect = &zone_introspect; zone.version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) +#if (JEMALLOC_ZONE_VERSION >= 5) zone.memalign = zone_memalign; +#endif +#if (JEMALLOC_ZONE_VERSION >= 6) zone.free_definite_size = zone_free_definite_size; #endif +#if (JEMALLOC_ZONE_VERSION >= 8) + zone.pressure_relief = NULL; +#endif zone_introspect.enumerator = NULL; zone_introspect.good_size = (void *)zone_good_size; @@ -187,156 +183,15 @@ create_zone(void) #if (JEMALLOC_ZONE_VERSION >= 6) zone_introspect.zone_locked = NULL; #endif - +#if (JEMALLOC_ZONE_VERSION >= 7) + zone_introspect.enable_discharge_checking = NULL; + zone_introspect.disable_discharge_checking = NULL; + zone_introspect.discharge = NULL; +#ifdef __BLOCKS__ + zone_introspect.enumerate_discharged_pointers = NULL; +#else + zone_introspect.enumerate_unavailable_without_blocks = NULL; +#endif +#endif return (&zone); } - -static size_t -ozone_size(malloc_zone_t *zone, void *ptr) -{ - size_t ret; - - ret = ivsalloc(ptr); - if (ret == 0) - ret = szone.size(zone, ptr); - - return (ret); -} - -static void -ozone_free(malloc_zone_t *zone, void *ptr) -{ - - if (ivsalloc(ptr) != 0) - je_free(ptr); - else { - size_t size = szone.size(zone, ptr); - if (size != 0) - (szone.free)(zone, ptr); - } -} - -static void * -ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) -{ - size_t oldsize; - - if (ptr == NULL) - return (je_malloc(size)); - - oldsize = ivsalloc(ptr); - if (oldsize != 0) - return (je_realloc(ptr, size)); - else { - oldsize = szone.size(zone, ptr); - if (oldsize == 0) - return (je_malloc(size)); - else { - void *ret = je_malloc(size); - if (ret != NULL) { - memcpy(ret, ptr, (oldsize < size) ? oldsize : - size); - (szone.free)(zone, ptr); - } - return (ret); - } - } -} - -static unsigned -ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, - unsigned num_requested) -{ - - /* Don't bother implementing this interface, since it isn't required. */ - return (0); -} - -static void -ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) -{ - unsigned i; - - for (i = 0; i < num; i++) - ozone_free(zone, to_be_freed[i]); -} - -#if (JEMALLOC_ZONE_VERSION >= 6) -static void -ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) -{ - - if (ivsalloc(ptr) != 0) { - assert(ivsalloc(ptr) == size); - je_free(ptr); - } else { - assert(size == szone.size(zone, ptr)); - szone.free_definite_size(zone, ptr, size); - } -} -#endif - -static void -ozone_force_lock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_lock(zone); -} - -static void -ozone_force_unlock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_unlock(zone); -} - -/* - * Overlay the default scalable zone (szone) such that existing allocations are - * drained, and further allocations come from jemalloc. This is necessary - * because Core Foundation directly accesses and uses the szone before the - * jemalloc library is even loaded. - */ -void -szone2ozone(malloc_zone_t *zone) -{ - - /* - * Stash a copy of the original szone so that we can call its - * functions as needed. Note that the internally, the szone stores its - * bookkeeping data structures immediately following the malloc_zone_t - * header, so when calling szone functions, we need to pass a pointer - * to the original zone structure. - */ - memcpy(&szone, zone, sizeof(malloc_zone_t)); - - zone->size = (void *)ozone_size; - zone->malloc = (void *)zone_malloc; - zone->calloc = (void *)zone_calloc; - zone->valloc = (void *)zone_valloc; - zone->free = (void *)ozone_free; - zone->realloc = (void *)ozone_realloc; - zone->destroy = (void *)zone_destroy; - zone->zone_name = "jemalloc_ozone"; - zone->batch_malloc = ozone_batch_malloc; - zone->batch_free = ozone_batch_free; - zone->introspect = &ozone_introspect; - zone->version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone->memalign = zone_memalign; - zone->free_definite_size = ozone_free_definite_size; -#endif - - ozone_introspect.enumerator = NULL; - ozone_introspect.good_size = (void *)zone_good_size; - ozone_introspect.check = NULL; - ozone_introspect.print = NULL; - ozone_introspect.log = NULL; - ozone_introspect.force_lock = (void *)ozone_force_lock; - ozone_introspect.force_unlock = (void *)ozone_force_unlock; - ozone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - ozone_introspect.zone_locked = NULL; -#endif -} From cd9a1346e96f71bdecdc654ea50fc62d76371e74 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 21 Mar 2012 18:33:03 -0700 Subject: [PATCH 065/205] Implement tsd. Implement tsd, which is a TLS/TSD abstraction that uses one or both internally. Modify bootstrapping such that no tsd's are utilized until allocation is safe. Remove malloc_[v]tprintf(), and use malloc_snprintf() instead. Fix %p argument size handling in malloc_vsnprintf(). Fix a long-standing statistics-related bug in the "thread.arena" mallctl that could cause crashes due to linked list corruption. --- Makefile.in | 3 +- configure.ac | 20 ++ include/jemalloc/internal/arena.h | 16 +- include/jemalloc/internal/chunk.h | 3 +- .../jemalloc/internal/jemalloc_internal.h.in | 82 ++--- include/jemalloc/internal/private_namespace.h | 2 - include/jemalloc/internal/prof.h | 42 +-- include/jemalloc/internal/tcache.h | 35 +- include/jemalloc/internal/tsd.h | 319 ++++++++++++++++++ include/jemalloc/internal/util.h | 17 +- include/jemalloc/jemalloc_defs.h.in | 9 + src/chunk.c | 14 +- src/chunk_mmap.c | 40 +-- src/ctl.c | 42 ++- src/jemalloc.c | 171 ++++------ src/prof.c | 45 ++- src/tcache.c | 103 +++--- src/tsd.c | 72 ++++ src/util.c | 33 +- 19 files changed, 705 insertions(+), 363 deletions(-) create mode 100644 include/jemalloc/internal/tsd.h create mode 100644 src/tsd.c diff --git a/Makefile.in b/Makefile.in index 01ed083c..494ac9a6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -50,7 +50,8 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c + @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c \ + @srcroot@src/tsd.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 02d4f536..44ff6eec 100644 --- a/configure.ac +++ b/configure.ac @@ -763,6 +763,20 @@ AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], CPPFLAGS="$CPPFLAGS -D_REENTRANT" +dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use +dnl it rather than pthreads TSD cleanup functions to support cleanup during +dnl thread exit, in order to avoid pthreads library recursion during +dnl bootstrapping. +force_tls="0" +AC_CHECK_FUNC([_malloc_thread_cleanup], + [have__malloc_thread_cleanup="1"], + [have__malloc_thread_cleanup="0"] + ) +if test "x$have__malloc_thread_cleanup" = "x1" ; then + AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ]) + force_tls="1" +fi + dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], [AS_HELP_STRING([--enable-lazy-lock], @@ -795,6 +809,10 @@ fi , enable_tls="1" ) +if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then + AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues]) + enable_tls="1" +fi if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( @@ -812,6 +830,8 @@ fi AC_SUBST([enable_tls]) if test "x${enable_tls}" = "x1" ; then AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ]) +elif test "x${force_tls}" = "x1" ; then + AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function]) fi dnl ============================================================================ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 1609adcf..c5214893 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -391,6 +391,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(size_t size, bool zero); +void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif @@ -552,7 +553,7 @@ arena_malloc(size_t size, bool zero) tcache_t *tcache; assert(size != 0); - assert(QUANTUM_CEILING(size) <= arena_maxclass); + assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { if ((tcache = tcache_get()) != NULL) @@ -571,6 +572,19 @@ arena_malloc(size_t size, bool zero) } } +JEMALLOC_INLINE void * +arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) +{ + + assert(size != 0); + assert(size <= arena_maxclass); + + if (size <= SMALL_MAXCLASS) + return (arena_malloc_small(arena, size, zero)); + else + return (arena_malloc_large(arena, size, zero)); +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 9a62ba18..8e24e8f3 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -44,7 +44,8 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); -bool chunk_boot(void); +bool chunk_boot0(void); +bool chunk_boot1(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index dbfd3fc8..387aabbe 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -289,6 +289,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" @@ -316,6 +317,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -335,6 +337,11 @@ typedef struct { uint64_t allocated; uint64_t deallocated; } thread_allocated_t; +/* + * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * argument. + */ +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) #undef JEMALLOC_H_STRUCTS /******************************************************************************/ @@ -356,25 +363,6 @@ extern size_t lg_pagesize; extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -extern pthread_key_t arenas_tsd; -#ifdef JEMALLOC_TLS -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -# define ARENA_GET() arenas_tls -# define ARENA_SET(v) do { \ - arenas_tls = (v); \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#else -# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) -# define ARENA_SET(v) do { \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#endif - /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -382,31 +370,8 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifdef JEMALLOC_TLS -extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() (thread_allocated_tls.allocated) -# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) -# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_tls.allocated += a; \ - thread_allocated_tls.deallocated += d; \ -} while (0) -#else -# define ALLOCATED_GET() (thread_allocated_get()->allocated) -# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) -# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = thread_allocated_get(); \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ -} while (0) -#endif -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - arena_t *arenas_extend(unsigned ind); +void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); @@ -420,6 +385,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -447,6 +413,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" @@ -454,13 +421,21 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) + size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); arena_t *choose_arena(void); -thread_allocated_t *thread_allocated_get(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +malloc_tsd_externs(arenas, arena_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) + /* * Compute usable size that would result from allocating an object with the * specified size. @@ -572,25 +547,13 @@ choose_arena(void) { arena_t *ret; - ret = ARENA_GET(); - if (ret == NULL) { + if ((ret = *arenas_tsd_get()) == NULL) { ret = choose_arena_hard(); assert(ret != NULL); } return (ret); } - -JEMALLOC_INLINE thread_allocated_t * -thread_allocated_get(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - pthread_getspecific(thread_allocated_tsd); - - if (thread_allocated == NULL) - return (thread_allocated_get_hard()); - return (thread_allocated); -} #endif #include "jemalloc/internal/bitmap.h" @@ -611,6 +574,7 @@ size_t ivsalloc(const void *ptr); void idalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); +malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -787,6 +751,10 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } } } + +malloc_tsd_externs(thread_allocated, thread_allocated_t) +malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index e7370fec..7103e680 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -155,10 +155,8 @@ #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) #define malloc_snprintf JEMALLOC_N(malloc_snprintf) -#define malloc_tprintf JEMALLOC_N(malloc_tprintf) #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) -#define malloc_vtprintf JEMALLOC_N(malloc_vtprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) #define opt_abort JEMALLOC_N(opt_abort) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 48231928..231a3876 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -23,10 +23,13 @@ typedef struct prof_tdata_s prof_tdata_t; #define PROF_TCMAX 1024 /* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 +#define PROF_CKH_MINITEMS 64 /* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUF_SIZE 65536 +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -179,29 +182,6 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifdef JEMALLOC_TLS -extern __thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define PROF_TCACHE_GET() prof_tdata_tls -# define PROF_TCACHE_SET(v) do { \ - prof_tdata_tls = (v); \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#else -# define PROF_TCACHE_GET() \ - ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) -# define PROF_TCACHE_SET(v) do { \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#endif -/* - * Same contents as b2cnt_tls, but initialized such that the TSD destructor is - * called when a thread exits, so that prof_tdata_tls contents can be merged, - * unlinked, and deallocated. - */ -extern pthread_key_t prof_tdata_tsd; - void bt_init(prof_bt_t *bt, void **vec); void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); @@ -209,6 +189,7 @@ void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); +void prof_tdata_cleanup(void *arg); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); @@ -223,7 +204,7 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = PROF_TCACHE_GET(); \ + prof_tdata = *prof_tdata_tsd_get(); \ if (prof_tdata == NULL) { \ prof_tdata = prof_tdata_init(); \ if (prof_tdata == NULL) { \ @@ -270,6 +251,8 @@ bool prof_boot2(void); } while (0) #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) + void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -281,6 +264,11 @@ void prof_free(const void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +malloc_tsd_externs(prof_tdata, prof_tdata_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, + prof_tdata_cleanup) + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { @@ -359,7 +347,7 @@ prof_sample_accum_update(size_t size) /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); - prof_tdata = PROF_TCACHE_GET(); + prof_tdata = *prof_tdata_tsd_get(); assert(prof_tdata != NULL); /* Take care to avoid integer overflow. */ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index ed037cf9..30e63a50 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -75,23 +75,6 @@ extern ssize_t opt_lg_tcache_max; extern tcache_bin_info_t *tcache_bin_info; -/* Map of thread-specific caches. */ -#ifdef JEMALLOC_TLS -extern __thread tcache_t *tcache_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define TCACHE_GET() tcache_tls -# define TCACHE_SET(v) do { \ - tcache_tls = (tcache_t *)(v); \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#else -# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) -# define TCACHE_SET(v) do { \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#endif -extern pthread_key_t tcache_tsd; - /* * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. @@ -105,18 +88,24 @@ void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_destroy(tcache_t *tcache); +void tcache_thread_cleanup(void *arg); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot(void); +bool tcache_boot0(void); +bool tcache_boot1(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) + void tcache_event(tcache_t *tcache); tcache_t *tcache_get(void); void *tcache_alloc_easy(tcache_bin_t *tbin); @@ -127,6 +116,11 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +/* Map of thread-specific caches. */ +malloc_tsd_externs(tcache, tcache_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, + tcache_thread_cleanup) + JEMALLOC_INLINE tcache_t * tcache_get(void) { @@ -139,7 +133,7 @@ tcache_get(void) else if (opt_tcache == false) return (NULL); - tcache = TCACHE_GET(); + tcache = *tcache_tsd_get(); if ((uintptr_t)tcache <= (uintptr_t)2) { if (tcache == NULL) { tcache = tcache_create(choose_arena()); @@ -152,7 +146,8 @@ tcache_get(void) * called after the tcache_thread_cleanup() was * called. */ - TCACHE_SET((uintptr_t)2); + tcache = (tcache_t *)(uintptr_t)2; + tcache_tsd_set(&tcache); } return (NULL); } diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h new file mode 100644 index 00000000..5a174acd --- /dev/null +++ b/include/jemalloc/internal/tsd.h @@ -0,0 +1,319 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 8 + +typedef struct malloc_tsd_cleanup_s malloc_tsd_cleanup_t; +struct malloc_tsd_cleanup_s { + bool (*f)(void *); + void *arg; +}; + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are four macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_protos(, example, example_t *) + * malloc_tsd_externs(example, example_t *) + * In example.c: + * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * example_t **example_tsd_get() {...} + * void example_tsd_set(example_t **val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast *and* + * dereference the function argument, e.g.: + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = *(example_t **)arg; + * + * [...] + * if ([want the cleanup function to be called again]) { + * example_tsd_set(&example); + * } + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##_tsd_boot(void); \ +a_attr a_type * \ +a_name##_tsd_get(void); \ +a_attr void \ +a_name##_tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern __thread bool *a_name##_initialized; \ +extern bool a_name##_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_initialized = false; \ +a_attr bool a_name##_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ + a_name##_tls = a_initializer; \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + \ +} \ +bool \ +a_name##_tsd_cleanup_pending(void *arg) \ +{ \ + bool (*cleanup)(void *) = arg; \ + \ + if (a_name##_initialized) { \ + a_name##_initialized = false; \ + cleanup(&a_name##_tls); \ + } \ + return (a_name##_initialized); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_pending, a_cleanup); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + return (true); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)(&a_name##_tls))) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool isstatic; \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + if (wrapper->isstatic == false) \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (pthread_key_create(&a_name##_tsd, \ + a_name##_tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + pthread_getspecific(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + static a_name##_tsd_wrapper_t \ + a_name##_tsd_static_data = \ + {true, false, a_initializer}; \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + wrapper = &a_name##_tsd_static_data; \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->isstatic = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *); +void malloc_tsd_cleanup_register(bool (*f)(void *), void *arg); +void malloc_tsd_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index c5f7520c..fb354da5 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -5,11 +5,17 @@ #define BUFERROR_BUF 64 /* - * Size of static buffer used by malloc_[v]{,c,t}printf(). This must be large - * enough for all possible uses within jemalloc. + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. */ #define MALLOC_PRINTF_BUFSIZE 4096 +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_CONCAT(...) __VA_ARGS__ + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. @@ -77,13 +83,6 @@ int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); int malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -/* - * malloc_[v]tprintf() prints to a thread-local string buffer, so the result is - * overwritten by the next call to malloc_[v]{,c,t}printf(). - */ -const char * malloc_vtprintf(const char *format, va_list ap); -const char * malloc_tprintf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 434dd368..838f5615 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -59,6 +59,15 @@ */ #undef JEMALLOC_OSSPIN +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#undef JEMALLOC_MALLOC_THREAD_CLEANUP + /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR diff --git a/src/chunk.c b/src/chunk.c index b9086509..f50e8409 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -100,7 +100,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } bool -chunk_boot(void) +chunk_boot0(void) { /* Set variables according to the value of opt_lg_chunk. */ @@ -114,8 +114,6 @@ chunk_boot(void) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); } - if (chunk_mmap_boot()) - return (true); if (config_dss && chunk_dss_boot()) return (true); if (config_ivsalloc) { @@ -127,3 +125,13 @@ chunk_boot(void) return (false); } + +bool +chunk_boot1(void) +{ + + if (chunk_mmap_boot()) + return (true); + + return (false); +} diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 6ea21180..749a2dac 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -8,20 +8,9 @@ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and * potentially avoid some system calls. */ -#ifdef JEMALLOC_TLS -static __thread bool mmap_unaligned_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#define MMAP_UNALIGNED_GET() mmap_unaligned_tls -#define MMAP_UNALIGNED_SET(v) do { \ - mmap_unaligned_tls = (v); \ -} while (0) -#else -static pthread_key_t mmap_unaligned_tsd; -#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) -#define MMAP_UNALIGNED_SET(v) do { \ - pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ -} while (0) -#endif +malloc_tsd_data(static, mmap_unaligned, bool, false) +malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, + malloc_tsd_no_cleanup) /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -128,8 +117,10 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) * the next chunk_alloc_mmap() execution tries the fast allocation * method. */ - if (unaligned == false) - MMAP_UNALIGNED_SET(false); + if (unaligned == false && mmap_unaligned_booted) { + bool mu = false; + mmap_unaligned_tsd_set(&mu); + } return (ret); } @@ -167,7 +158,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * fast method next time. */ - if (MMAP_UNALIGNED_GET() == false) { + if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { size_t offset; ret = pages_map(NULL, size, noreserve); @@ -176,7 +167,8 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) offset = CHUNK_ADDR2OFFSET(ret); if (offset != 0) { - MMAP_UNALIGNED_SET(true); + bool mu = true; + mmap_unaligned_tsd_set(&mu); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset, noreserve) == NULL) { @@ -225,11 +217,15 @@ bool chunk_mmap_boot(void) { -#ifndef JEMALLOC_TLS - if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { - malloc_write(": Error in pthread_key_create()\n"); + /* + * XXX For the non-TLS implementation of tsd, the first access from + * each thread causes memory allocation. The result is a bootstrapping + * problem for this particular use case, so for now just disable it by + * leaving it in an unbooted state. + */ +#ifdef JEMALLOC_TLS + if (mmap_unaligned_tsd_boot()) return (true); - } #endif return (false); diff --git a/src/ctl.c b/src/ctl.c index 1ef84e80..e17e5034 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -978,13 +978,13 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, VOID(); - tcache = TCACHE_GET(); - if (tcache == NULL) { + if ((tcache = *tcache_tsd_get()) == NULL) { ret = 0; goto RETURN; } tcache_destroy(tcache); - TCACHE_SET(NULL); + tcache = NULL; + tcache_tsd_set(&tcache); ret = 0; RETURN: @@ -1012,23 +1012,26 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Initialize arena if necessary. */ malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL) - arena = arenas_extend(newind); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - if (arena == NULL) { + if ((arena = arenas[newind]) == NULL && (arena = + arenas_extend(newind)) == NULL) { + malloc_mutex_unlock(&arenas_lock); ret = EAGAIN; goto RETURN; } + assert(arena == arenas[newind]); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; + malloc_mutex_unlock(&arenas_lock); /* Set new arena association. */ - ARENA_SET(arena); if (config_tcache) { - tcache_t *tcache = TCACHE_GET(); - if (tcache != NULL) - tcache->arena = arena; + tcache_t *tcache; + if ((tcache = *tcache_tsd_get()) != NULL) { + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); + } } + arenas_tsd_set(&arena); } ret = 0; @@ -1036,11 +1039,14 @@ RETURN: return (ret); } -CTL_RO_NL_CGEN(config_stats, thread_allocated, ALLOCATED_GET(), uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_allocatedp, ALLOCATEDP_GET(), uint64_t *) -CTL_RO_NL_CGEN(config_stats, thread_deallocated, DEALLOCATED_GET(), uint64_t) -CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), - uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_allocated, + thread_allocated_tsd_get()->allocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, + &thread_allocated_tsd_get()->allocated, uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, + thread_allocated_tsd_get()->deallocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, + &thread_allocated_tsd_get()->deallocated, uint64_t *) /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 2610452e..331e4737 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -4,36 +4,9 @@ /******************************************************************************/ /* Data. */ -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas; - -pthread_key_t arenas_tsd; -#ifdef JEMALLOC_TLS -__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -#ifdef JEMALLOC_TLS -__thread thread_allocated_t thread_allocated_tls; -#endif -pthread_key_t thread_allocated_tsd; - -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; - -/* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; - -/* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; - -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - -unsigned ncpus; +malloc_tsd_data(, arenas, arena_t *, NULL) +malloc_tsd_data(, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER) /* Runtime configuration options. */ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); @@ -52,15 +25,32 @@ bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; +#ifdef DYNAMIC_PAGE_SHIFT +size_t pagesize; +size_t pagesize_mask; +size_t lg_pagesize; +#endif + +unsigned ncpus; + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas; + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +/* Used to let the initializing thread recursively allocate. */ +static pthread_t malloc_initializer = (unsigned long)0; + +/* Used to avoid initialization races. */ +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ static void stats_print_atexit(void); static unsigned malloc_ncpus(void); -static void arenas_cleanup(void *arg); -#ifndef JEMALLOC_TLS -static void thread_allocated_cleanup(void *arg); -#endif static bool malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, char const **v_p, size_t *vlen_p); static void malloc_conf_error(const char *msg, const char *k, size_t klen, @@ -156,7 +146,7 @@ choose_arena_hard(void) malloc_mutex_unlock(&arenas_lock); } - ARENA_SET(ret); + arenas_tsd_set(&ret); return (ret); } @@ -197,26 +187,6 @@ stats_print_atexit(void) je_malloc_stats_print(NULL, NULL, NULL); } -thread_allocated_t * -thread_allocated_get_hard(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - imalloc(sizeof(thread_allocated_t)); - if (thread_allocated == NULL) { - static thread_allocated_t static_thread_allocated = {0, 0}; - malloc_write(": Error allocating TSD;" - " mallctl(\"thread.{de,}allocated[p]\", ...)" - " will be inaccurate\n"); - if (opt_abort) - abort(); - return (&static_thread_allocated); - } - pthread_setspecific(thread_allocated_tsd, thread_allocated); - thread_allocated->allocated = 0; - thread_allocated->deallocated = 0; - return (thread_allocated); -} - /* * End miscellaneous support functions. */ @@ -241,32 +211,16 @@ malloc_ncpus(void) return (ret); } -static void +void arenas_cleanup(void *arg) { - arena_t *arena = (arena_t *)arg; + arena_t *arena = *(arena_t **)arg; malloc_mutex_lock(&arenas_lock); arena->nthreads--; malloc_mutex_unlock(&arenas_lock); } -#ifndef JEMALLOC_TLS -static void -thread_allocated_cleanup(void *arg) -{ - uint64_t *allocated = (uint64_t *)arg; - - if (allocated != NULL) - idalloc(allocated); -} -#endif - -/* - * FreeBSD's pthreads implementation calls malloc(3), so the malloc - * implementation has to take pains to avoid infinite recursion during - * initialization. - */ static inline bool malloc_init(void) { @@ -604,6 +558,7 @@ malloc_init_hard(void) } #endif + malloc_tsd_boot(); if (config_prof) prof_boot0(); @@ -631,7 +586,7 @@ malloc_init_hard(void) } } - if (chunk_boot()) { + if (chunk_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -646,7 +601,7 @@ malloc_init_hard(void) arena_boot(); - if (config_tcache && tcache_boot()) { + if (config_tcache && tcache_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -656,23 +611,9 @@ malloc_init_hard(void) return (true); } -#ifndef JEMALLOC_TLS - /* Initialize allocation counters before any allocations can occur. */ - if (config_stats && pthread_key_create(&thread_allocated_tsd, - thread_allocated_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - if (malloc_mutex_init(&arenas_lock)) return (true); - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). @@ -691,25 +632,38 @@ malloc_init_hard(void) return (true); } - /* - * Assign the initial arena to the initial thread, in order to avoid - * spurious creation of an extra arena if the application switches to - * threaded mode. - */ - ARENA_SET(arenas[0]); - arenas[0]->nthreads++; + /* Initialize allocation counters before any allocations can occur. */ + if (config_stats && thread_allocated_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); } + if (arenas_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_tcache && tcache_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* Get number of CPUs. */ malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); + if (chunk_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by @@ -844,7 +798,7 @@ OOM: prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ret); } @@ -939,7 +893,7 @@ imemalign(void **memptr, size_t alignment, size_t size, RETURN: if (config_stats && result != NULL) { assert(usize == isalloc(result)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); @@ -1044,7 +998,7 @@ RETURN: prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ret); } @@ -1173,8 +1127,11 @@ RETURN: if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { + thread_allocated_t *ta; assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, old_size); + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; } return (ret); } @@ -1197,7 +1154,7 @@ je_free(void *ptr) usize = isalloc(ptr); } if (config_stats) - ALLOCATED_ADD(0, usize); + thread_allocated_tsd_get()->deallocated += usize; idalloc(ptr); } } @@ -1412,7 +1369,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) *ptr = p; if (config_stats) { assert(usize == isalloc(p)); - ALLOCATED_ADD(usize, 0); + thread_allocated_tsd_get()->allocated += usize; } return (ALLOCM_SUCCESS); OOM: @@ -1502,8 +1459,12 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } *ptr = q; - if (config_stats) - ALLOCATED_ADD(usize, old_size); + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } return (ALLOCM_SUCCESS); ERR: if (no_move) @@ -1556,7 +1517,7 @@ je_dallocm(void *ptr, int flags) prof_free(ptr, usize); } if (config_stats) - ALLOCATED_ADD(0, usize); + thread_allocated_tsd_get()->deallocated += usize; idalloc(ptr); return (ALLOCM_SUCCESS); diff --git a/src/prof.c b/src/prof.c index 9c327379..ba0b64e1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -14,6 +14,8 @@ /******************************************************************************/ /* Data. */ +malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) + bool opt_prof = false; bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; @@ -26,12 +28,6 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; -#ifdef JEMALLOC_TLS -__thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#endif -pthread_key_t prof_tdata_tsd; - /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. @@ -50,7 +46,7 @@ static uint64_t prof_dump_useq; * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since * it must be locked anyway during dumping. */ -static char prof_dump_buf[PROF_DUMP_BUF_SIZE]; +static char prof_dump_buf[PROF_DUMP_BUFSIZE]; static unsigned prof_dump_buf_end; static int prof_dump_fd; @@ -91,7 +87,6 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); -static void prof_tdata_cleanup(void *arg); /******************************************************************************/ @@ -439,7 +434,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = PROF_TCACHE_GET(); + prof_tdata = *prof_tdata_tsd_get(); if (prof_tdata == NULL) { prof_tdata = prof_tdata_init(); if (prof_tdata == NULL) @@ -599,16 +594,16 @@ prof_write(bool propagate_err, const char *s) slen = strlen(s); while (i < slen) { /* Flush the buffer if it is full. */ - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) if (prof_flush(propagate_err) && propagate_err) return (true); - if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) { + if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { /* Finish writing. */ n = slen - i; } else { /* Write as much of s as will fit. */ - n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end; + n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; } memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); prof_dump_buf_end += n; @@ -624,10 +619,12 @@ prof_printf(bool propagate_err, const char *format, ...) { bool ret; va_list ap; + char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); - ret = prof_write(propagate_err, malloc_vtprintf(format, ap)); + malloc_snprintf(buf, sizeof(buf), format, ap); va_end(ap); + ret = prof_write(propagate_err, buf); return (ret); } @@ -795,11 +792,13 @@ static bool prof_dump_maps(bool propagate_err) { int mfd; + char filename[PATH_MAX + 1]; cassert(config_prof); - mfd = open(malloc_tprintf("/proc/%d/maps", (int)getpid()), - O_RDONLY); + malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", + (int)getpid()); + mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; @@ -809,13 +808,13 @@ prof_dump_maps(bool propagate_err) nread = 0; do { prof_dump_buf_end += nread; - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) { + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ if (prof_flush(propagate_err) && propagate_err) return (true); } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUF_SIZE - prof_dump_buf_end); + PROF_DUMP_BUFSIZE - prof_dump_buf_end); } while (nread > 0); close(mfd); } else @@ -1098,16 +1097,16 @@ prof_tdata_init(void) prof_tdata->threshold = 0; prof_tdata->accum = 0; - PROF_TCACHE_SET(prof_tdata); + prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); } -static void +void prof_tdata_cleanup(void *arg) { prof_thr_cnt_t *cnt; - prof_tdata_t *prof_tdata = (prof_tdata_t *)arg; + prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; cassert(config_prof); @@ -1127,7 +1126,8 @@ prof_tdata_cleanup(void *arg) idalloc(prof_tdata->vec); idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); + prof_tdata = NULL; + prof_tdata_tsd_set(&prof_tdata); } void @@ -1182,8 +1182,7 @@ prof_boot2(void) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); - if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) - != 0) { + if (prof_tdata_tsd_boot()) { malloc_write( ": Error in pthread_key_create()\n"); abort(); diff --git a/src/tcache.c b/src/tcache.c index f90308cd..3442406d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -4,30 +4,16 @@ /******************************************************************************/ /* Data. */ +malloc_tsd_data(, tcache, tcache_t *, NULL) + bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -/* Map of thread-specific caches. */ -#ifdef JEMALLOC_TLS -__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -/* - * Same contents as tcache, but initialized such that the TSD destructor is - * called when a thread exits, so that the cache can be cleaned up. - */ -pthread_key_t tcache_tsd; - -size_t nhbins; -size_t tcache_maxclass; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void tcache_thread_cleanup(void *arg); +size_t nhbins; +size_t tcache_maxclass; /******************************************************************************/ @@ -196,6 +182,33 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, tbin->low_water = tbin->ncached; } +void +tcache_arena_associate(tcache_t *tcache, arena_t *arena) +{ + + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } + tcache->arena = arena; +} + +void +tcache_arena_dissociate(tcache_t *tcache) +{ + + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); + } +} + tcache_t * tcache_create(arena_t *arena) { @@ -228,15 +241,8 @@ tcache_create(arena_t *arena) if (tcache == NULL) return (NULL); - if (config_stats) { - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); - } + tcache_arena_associate(tcache, arena); - tcache->arena = arena; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; @@ -245,7 +251,7 @@ tcache_create(arena_t *arena) stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } - TCACHE_SET(tcache); + tcache_tsd_set(&tcache); return (tcache); } @@ -256,13 +262,7 @@ tcache_destroy(tcache_t *tcache) unsigned i; size_t tcache_size; - if (config_stats) { - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); - } + tcache_arena_dissociate(tcache); for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; @@ -323,10 +323,10 @@ tcache_destroy(tcache_t *tcache) idalloc(tcache); } -static void +void tcache_thread_cleanup(void *arg) { - tcache_t *tcache = (tcache_t *)arg; + tcache_t *tcache = *(tcache_t **)arg; if (tcache == (void *)(uintptr_t)1) { /* @@ -341,11 +341,13 @@ tcache_thread_cleanup(void *arg) * destructor was called. Reset tcache to 1 in order to * receive another callback. */ - TCACHE_SET((uintptr_t)1); + tcache = (tcache_t *)(uintptr_t)1; + tcache_tsd_set(&tcache); } else if (tcache != NULL) { assert(tcache != (void *)(uintptr_t)1); tcache_destroy(tcache); - TCACHE_SET((uintptr_t)1); + tcache = (tcache_t *)(uintptr_t)1; + tcache_tsd_set(&tcache); } } @@ -374,7 +376,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } bool -tcache_boot(void) +tcache_boot0(void) { if (opt_tcache) { @@ -385,8 +387,8 @@ tcache_boot(void) * SMALL_MAXCLASS and arena_maxclass are known. * XXX Can this be done earlier? */ - if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < SMALL_MAXCLASS) + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < + SMALL_MAXCLASS) tcache_maxclass = SMALL_MAXCLASS; else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; @@ -416,13 +418,18 @@ tcache_boot(void) tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; stack_nelms += tcache_bin_info[i].ncached_max; } - - if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != - 0) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } + } + + return (false); +} + +bool +tcache_boot1(void) +{ + + if (opt_tcache) { + if (tcache_tsd_boot()) + return (true); } return (false); diff --git a/src/tsd.c b/src/tsd.c new file mode 100644 index 00000000..669ea8fc --- /dev/null +++ b/src/tsd.c @@ -0,0 +1,72 @@ +#define JEMALLOC_TSD_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static unsigned ncleanups; +static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; + +/******************************************************************************/ + +void * +malloc_tsd_malloc(size_t size) +{ + + /* Avoid choose_arena() in order to dodge bootstrapping issues. */ + return arena_malloc_prechosen(arenas[0], size, false); +} + +void +malloc_tsd_dalloc(void *wrapper) +{ + + idalloc(wrapper); +} + +void +malloc_tsd_no_cleanup(void *arg) +{ + + not_reached(); +} + +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +void +_malloc_thread_cleanup(void) +{ + bool pending[ncleanups], again; + unsigned i; + + for (i = 0; i < ncleanups; i++) + pending[i] = true; + + do { + again = false; + for (i = 0; i < ncleanups; i++) { + if (pending[i]) { + pending[i] = cleanups[i].f(cleanups[i].arg); + if (pending[i]) + again = true; + } + } + } while (again); +} +#endif + +void +malloc_tsd_cleanup_register(bool (*f)(void *), void *arg) +{ + + assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); + cleanups[ncleanups].f = f; + cleanups[ncleanups].arg = arg; + ncleanups++; +} + +void +malloc_tsd_boot(void) +{ + + ncleanups = 0; +} diff --git a/src/util.c b/src/util.c index 47e7b66e..96c87f78 100644 --- a/src/util.c +++ b/src/util.c @@ -222,6 +222,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'z': \ val = va_arg(ap, size_t); \ break; \ + case 'p': /* Synthetic; used for %p. */ \ + val = va_arg(ap, uintptr_t); \ + break; \ default: not_reached(); \ } \ } while (0) @@ -410,7 +413,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val; char buf[X2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, 'p'); s = x2s(val, true, false, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -466,34 +469,11 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) return (ret); } -const char * -malloc_vtprintf(const char *format, va_list ap) -{ - static __thread char buf[MALLOC_PRINTF_BUFSIZE]; - - malloc_vsnprintf(buf, sizeof(buf), format, ap); - - return (buf); -} - -JEMALLOC_ATTR(format(printf, 1, 2)) -const char * -malloc_tprintf(const char *format, ...) -{ - const char *ret; - va_list ap; - - va_start(ap, format); - ret = malloc_vtprintf(format, ap); - va_end(ap); - - return (ret); -} - void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap) { + char buf[MALLOC_PRINTF_BUFSIZE]; if (write_cb == NULL) { /* @@ -505,7 +485,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, cbopaque = NULL; } - write_cb(cbopaque, malloc_vtprintf(format, ap)); + malloc_vsnprintf(buf, sizeof(buf), format, ap); + write_cb(cbopaque, buf); } /* From 9225a1991a58190207cca2ff3cdba966bb322dd5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 15:39:07 -0700 Subject: [PATCH 066/205] Add JEMALLOC_CC_SILENCE_INIT(). Add JEMALLOC_CC_SILENCE_INIT(), which provides succinct syntax for initializing a variable to avoid a spurious compiler warning. --- .../jemalloc/internal/jemalloc_internal.h.in | 6 +-- include/jemalloc/internal/util.h | 11 +++++ src/jemalloc.c | 42 ++++--------------- src/util.c | 8 ++-- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 387aabbe..e0558140 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -611,11 +611,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) if (usize <= arena_maxclass && alignment <= PAGE_SIZE) ret = arena_malloc(usize, zero); else { - size_t run_size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; + size_t run_size JEMALLOC_CC_SILENCE_INIT(0); /* * Ideally we would only ever call sa2u() once per aligned diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index fb354da5..5156399f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -16,6 +16,17 @@ */ #define JEMALLOC_CONCAT(...) __VA_ARGS__ +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + /* * Define a custom assert() in order to reduce the chances of deadlock during * assertion failure. diff --git a/src/jemalloc.c b/src/jemalloc.c index 331e4737..f9451796 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -751,11 +751,7 @@ je_malloc(size_t size) { void *ret; size_t usize; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { ret = NULL; @@ -818,11 +814,7 @@ imemalign(void **memptr, size_t alignment, size_t size, int ret; size_t usize; void *result; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); assert(min_alignment != 0); @@ -932,11 +924,7 @@ je_calloc(size_t num, size_t size) void *ret; size_t num_size; size_t usize; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { num_size = 0; @@ -1010,16 +998,8 @@ je_realloc(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; - prof_thr_cnt_t *cnt -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; - prof_ctx_t *old_ctx -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); if (size == 0) { if (ptr != NULL) { @@ -1173,11 +1153,7 @@ JEMALLOC_ATTR(visibility("default")) void * je_memalign(size_t alignment, size_t size) { - void *ret -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, alignment, size, 1); return (ret); } @@ -1189,11 +1165,7 @@ JEMALLOC_ATTR(visibility("default")) void * je_valloc(size_t size) { - void *ret -#ifdef JEMALLOC_CC_SILENCE - = NULL -#endif - ; + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, PAGE_SIZE, size, 1); return (ret); } diff --git a/src/util.c b/src/util.c index 96c87f78..698b53a9 100644 --- a/src/util.c +++ b/src/util.c @@ -353,7 +353,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) char *s; size_t slen; case 'd': case 'i': { - intmax_t val; + intmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[D2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -363,7 +363,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'o': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[O2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -372,7 +372,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'u': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[U2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); @@ -381,7 +381,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f++; break; } case 'x': case 'X': { - uintmax_t val; + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[X2S_BUFSIZE]; GET_ARG_NUMERIC(val, len); From 06304a97854dbbf09075278fe2c90365254480da Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:09:56 -0700 Subject: [PATCH 067/205] Restructure atomic_*_z(). Restructure atomic_*_z() so that no casting within macros is necessary. This avoids warnings when compiling with clang. --- include/jemalloc/internal/atomic.h | 45 +++++++++++++++++++----------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 1dbb7d6a..afeb9cb7 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -11,22 +11,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) - -#if (LG_SIZEOF_PTR == 3) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) -#elif (LG_SIZEOF_PTR == 2) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) -#endif +#define atomic_read_z(p) atomic_add_z(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -37,6 +22,8 @@ uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) @@ -179,6 +166,32 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) #else # error "Missing implementation for 32-bit atomic operations" #endif + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} #endif #endif /* JEMALLOC_H_INLINES */ From 9022bf9bfd6ab907e5b019fed09fdc3acdf1c280 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:14:08 -0700 Subject: [PATCH 068/205] Remove -no-cpp-precomp compiler flag for OS X. Remove the -no-cpp-precomp compiler flag when compiling on OS X. clang does not support the flag, and gcc works fine without it. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 44ff6eec..e1a399b0 100644 --- a/configure.ac +++ b/configure.ac @@ -208,7 +208,7 @@ dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. case "${host}" in *-*-darwin*) - CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" + CFLAGS="$CFLAGS -fno-common" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" From b80581d30928e04b3d12b1fec2b989da44a07e2c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 16:17:43 -0700 Subject: [PATCH 069/205] Forcibly disable TLS on OS X. Forcibly disable TLS on OS X. gcc and llvm-gcc on OS X do not support TLS, but clang does. Unfortunately, the implementation calls malloc() internally during TLS initialization, which causes an unresolvable bootstrapping issue. --- configure.ac | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e1a399b0..7e4f2211 100644 --- a/configure.ac +++ b/configure.ac @@ -214,6 +214,7 @@ case "${host}" in RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" + force_tls="0" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -767,7 +768,6 @@ dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use dnl it rather than pthreads TSD cleanup functions to support cleanup during dnl thread exit, in order to avoid pthreads library recursion during dnl bootstrapping. -force_tls="0" AC_CHECK_FUNC([_malloc_thread_cleanup], [have__malloc_thread_cleanup="1"], [have__malloc_thread_cleanup="0"] @@ -813,6 +813,10 @@ if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues]) enable_tls="1" fi +if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then + AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues]) + enable_tls="0" +fi if test "x${enable_tls}" = "x1" ; then AC_MSG_CHECKING([for TLS]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM( From 6da5418ded9170b087c35960e0010006430117c1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 18:05:51 -0700 Subject: [PATCH 070/205] Remove ephemeral mutexes. Remove ephemeral mutexes from the prof machinery, and remove malloc_mutex_destroy(). This simplifies mutex management on systems that call malloc()/free() inside pthread_mutex_{create,destroy}(). Add atomic_*_u() for operation on unsigned values. Fix prof_printf() to call malloc_vsnprintf() rather than malloc_snprintf(). --- include/jemalloc/internal/atomic.h | 30 ++++++++++++++ include/jemalloc/internal/mutex.h | 1 - include/jemalloc/internal/prof.h | 16 +++++--- src/jemalloc.c | 10 ++--- src/mutex.c | 12 ------ src/prof.c | 63 ++++++++++++++++++++---------- 6 files changed, 89 insertions(+), 43 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index afeb9cb7..7a9cb61e 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -12,6 +12,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) #define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -24,6 +25,8 @@ uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) @@ -192,6 +195,33 @@ atomic_sub_z(size_t *p, size_t x) (uint32_t)-((int32_t)x))); #endif } + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 9d136585..10637e92 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -28,7 +28,6 @@ extern bool isthreaded; #endif bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_destroy(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 231a3876..34929e7e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -31,6 +31,12 @@ typedef struct prof_tdata_s prof_tdata_t; /* Size of stack-allocated buffer used by prof_printf(). */ #define PROF_PRINTF_BUFSIZE 128 +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -108,7 +114,7 @@ struct prof_ctx_s { prof_bt_t *bt; /* Protects cnt_merged and cnts_ql. */ - malloc_mutex_t lock; + malloc_mutex_t *lock; /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -444,10 +450,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&old_ctx->lock); + malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); + malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else @@ -516,10 +522,10 @@ prof_free(const void *ptr, size_t size) * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } } } diff --git a/src/jemalloc.c b/src/jemalloc.c index f9451796..b3e898c1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -638,11 +638,6 @@ malloc_init_hard(void) return (true); } - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (arenas_tsd_boot()) { malloc_mutex_unlock(&init_lock); return (true); @@ -653,6 +648,11 @@ malloc_init_hard(void) return (true); } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* Get number of CPUs. */ malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); diff --git a/src/mutex.c b/src/mutex.c index 243b7129..07d2a033 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -81,18 +81,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (false); } -void -malloc_mutex_destroy(malloc_mutex_t *mutex) -{ - -#ifndef JEMALLOC_OSSPIN - if (pthread_mutex_destroy(mutex) != 0) { - malloc_write(": Error in pthread_mutex_destroy()\n"); - abort(); - } -#endif -} - void malloc_mutex_prefork(malloc_mutex_t *mutex) { diff --git a/src/prof.c b/src/prof.c index ba0b64e1..bc21d894 100644 --- a/src/prof.c +++ b/src/prof.c @@ -28,6 +28,16 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; +/* + * Table of mutexes that are shared among ctx's. These are leaf locks, so + * there is no problem with using them for more than one ctx at the same time. + * The primary motivation for this sharing though is that ctx's are ephemeral, + * and destroying mutexes causes complications for systems that allocate when + * creating/destroying mutexes. + */ +static malloc_mutex_t *ctx_locks; +static unsigned cum_ctxs; /* Atomic counter. */ + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. @@ -87,6 +97,7 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); +static malloc_mutex_t *prof_ctx_mutex_choose(void); /******************************************************************************/ @@ -471,18 +482,12 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ctx.p->bt = btkey.p; - if (malloc_mutex_init(&ctx.p->lock)) { - prof_leave(); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } + ctx.p->lock = prof_ctx_mutex_choose(); memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(); - malloc_mutex_destroy(&ctx.p->lock); idalloc(btkey.v); idalloc(ctx.v); return (NULL); @@ -502,9 +507,9 @@ prof_lookup(prof_bt_t *bt) * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). */ - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ctx.p->cnt_merged.curobjs++; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } prof_leave(); @@ -547,10 +552,10 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); @@ -622,7 +627,7 @@ prof_printf(bool propagate_err, const char *format, ...) char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); - malloc_snprintf(buf, sizeof(buf), format, ap); + malloc_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); ret = prof_write(propagate_err, buf); @@ -637,7 +642,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cassert(config_prof); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { @@ -676,7 +681,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cnt_all->accumbytes += ctx->cnt_summed.accumbytes; } - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } static void @@ -693,7 +698,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_ctx_merge() and entry into this function. */ prof_enter(); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); @@ -703,9 +708,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(false); prof_leave(); /* Destroy ctx. */ - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); - malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { /* @@ -713,7 +717,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_lookup(). */ ctx->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); prof_leave(); } } @@ -726,7 +730,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) cassert(config_prof); /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; @@ -751,7 +755,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) destroy = true; } else destroy = false; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); if (destroy) prof_ctx_destroy(ctx); } @@ -1067,6 +1071,14 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + prof_tdata_t * prof_tdata_init(void) { @@ -1177,6 +1189,8 @@ prof_boot2(void) cassert(config_prof); if (opt_prof) { + unsigned i; + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); @@ -1202,6 +1216,15 @@ prof_boot2(void) if (opt_abort) abort(); } + + ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); + if (ctx_locks == NULL) + return (true); + for (i = 0; i < PROF_NCTX_LOCKS; i++) { + if (malloc_mutex_init(&ctx_locks[i])) + return (true); + } } #ifdef JEMALLOC_PROF_LIBGCC From 41b6afb834b1f5250223678c52bd4f013d4234f6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 2 Feb 2012 22:04:57 -0800 Subject: [PATCH 071/205] Port to FreeBSD. Use FreeBSD-specific functions (_pthread_mutex_init_calloc_cb(), _malloc_{pre,post}fork()) to avoid bootstrapping issues due to allocation in libc and libthr. Add malloc_strtoumax() and use it instead of strtoul(). Disable validation code in malloc_vsnprintf() and malloc_strtoumax() until jemalloc is initialized. This is necessary because locale initialization causes allocation for both vsnprintf() and strtoumax(). Force the lazy-lock feature on in order to avoid pthread_self(), because it causes allocation. Use syscall(SYS_write, ...) rather than write(...), because libthr wraps write() and causes allocation. Without this workaround, it would not be possible to print error messages in malloc_conf_init() without substantially reworking bootstrapping. Fix choose_arena_hard() to look at how many threads are assigned to the candidate choice, rather than checking whether the arena is uninitialized. This bug potentially caused more arenas to be initialized than necessary. --- README | 10 +- configure.ac | 11 ++ include/jemalloc/internal/base.h | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 3 + include/jemalloc/internal/mutex.h | 5 +- include/jemalloc/internal/tsd.h | 2 +- include/jemalloc/internal/util.h | 1 + include/jemalloc/jemalloc_defs.h.in | 14 ++ src/base.c | 11 ++ src/ctl.c | 8 +- src/jemalloc.c | 64 ++++++--- src/mutex.c | 18 ++- src/util.c | 136 ++++++++++++++++-- 13 files changed, 237 insertions(+), 47 deletions(-) diff --git a/README b/README index 4d7b552b..a7864f33 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. This distribution is a stand-alone "portable" implementation that currently -targets Linux and Apple OS X. jemalloc is included as the default allocator in -the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox -web browser on Microsoft Windows-related platforms. Depending on your needs, -one of the other divergent versions may suit your needs better than this -distribution. +targets FreeBSD, Linux and Apple OS X. jemalloc is included as the default +allocator in the FreeBSD and NetBSD operating systems, and it is used by the +Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending +on your needs, one of the other divergent versions may suit your needs better +than this distribution. The COPYING file contains copyright and licensing information. diff --git a/configure.ac b/configure.ac index 7e4f2211..478ae9d4 100644 --- a/configure.ac +++ b/configure.ac @@ -777,6 +777,17 @@ if test "x$have__malloc_thread_cleanup" = "x1" ; then force_tls="1" fi +dnl Check whether the BSD-specific _pthread_mutex_init_calloc_cb() exists. If +dnl so, mutex initialization causes allocation, and we need to implement this +dnl callback function in order to prevent recursive allocation. +AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb], + [have__pthread_mutex_init_calloc_cb="1"], + [have__pthread_mutex_init_calloc_cb="0"] + ) +if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then + AC_DEFINE([JEMALLOC_MUTEX_INIT_CB]) +fi + dnl Disable lazy locking by default. AC_ARG_ENABLE([lazy_lock], [AS_HELP_STRING([--enable-lazy-lock], diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h index 796a2835..9cf75ffb 100644 --- a/include/jemalloc/internal/base.h +++ b/include/jemalloc/internal/base.h @@ -10,6 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); +void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); void base_node_dealloc(extent_node_t *node); bool base_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e0558140..4f557794 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -370,6 +371,8 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern arena_t **arenas; extern unsigned narenas; +extern bool malloc_initialized; + arena_t *arenas_extend(unsigned ind); void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 10637e92..98f2cba5 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -6,9 +6,12 @@ typedef OSSpinLock malloc_mutex_t; #define MALLOC_MUTEX_INITIALIZER 0 #else typedef pthread_mutex_t malloc_mutex_t; -# ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP # define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT # define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER # endif #endif diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5a174acd..0e32c612 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -71,7 +71,7 @@ a_name##_tsd_set(a_type *val); #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_externs(a_name, a_type) \ extern __thread a_type a_name##_tls; \ -extern __thread bool *a_name##_initialized; \ +extern __thread bool a_name##_initialized; \ extern bool a_name##_booted; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_externs(a_name, a_type) \ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 5156399f..3d3ea3ab 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -85,6 +85,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); int buferror(int errnum, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); /* * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 838f5615..f150413e 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -68,6 +68,20 @@ */ #undef JEMALLOC_MALLOC_THREAD_CLEANUP +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#undef JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#undef JEMALLOC_MUTEX_INIT_CB + /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR diff --git a/src/base.c b/src/base.c index eb68334b..696c362a 100644 --- a/src/base.c +++ b/src/base.c @@ -66,6 +66,17 @@ base_alloc(size_t size) return (ret); } +void * +base_calloc(size_t number, size_t size) +{ + void *ret = base_alloc(number * size); + + if (ret != NULL) + memset(ret, 0, number * size); + + return (ret); +} + extent_node_t * base_node_alloc(void) { diff --git a/src/ctl.c b/src/ctl.c index e17e5034..943c2925 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -615,19 +615,19 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, goto RETURN; } } else { - unsigned long index; + uintmax_t index; const ctl_node_t *inode; /* Children are indexed. */ - index = strtoul(elm, NULL, 10); - if (index == ULONG_MAX) { + index = malloc_strtoumax(elm, NULL, 10); + if (index == UINTMAX_MAX || index > SIZE_T_MAX) { ret = ENOENT; goto RETURN; } inode = &node->u.named.children[0]; node = inode->u.indexed.index(mibp, *depthp, - index); + (size_t)index); if (node == NULL) { ret = ENOENT; goto RETURN; diff --git a/src/jemalloc.c b/src/jemalloc.c index b3e898c1..3e168fd0 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -38,10 +38,18 @@ arena_t **arenas; unsigned narenas; /* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; +bool malloc_initialized = false; +#ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ static pthread_t malloc_initializer = (unsigned long)0; +# define INITIALIZER pthread_self() +# define IS_INITIALIZER (malloc_initializer == pthread_self()) +#else +static bool malloc_initializer = false; +# define INITIALIZER true +# define IS_INITIALIZER malloc_initializer +#endif /* Used to avoid initialization races. */ static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; @@ -127,7 +135,7 @@ choose_arena_hard(void) } } - if (arenas[choose] == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 || first_null == narenas) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -413,22 +421,22 @@ malloc_conf_init(void) #define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ - unsigned long ul; \ + uintmax_t um; \ char *end; \ \ errno = 0; \ - ul = strtoul(v, &end, 0); \ + um = malloc_strtoumax(v, &end, 0); \ if (errno != 0 || (uintptr_t)end - \ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ k, klen, v, vlen); \ - } else if (ul < min || ul > max) { \ + } else if (um < min || um > max) { \ malloc_conf_error( \ "Out-of-range conf value", \ k, klen, v, vlen); \ } else \ - o = ul; \ + o = um; \ continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ @@ -519,7 +527,7 @@ malloc_init_hard(void) arena_t *init_arenas[1]; malloc_mutex_lock(&init_lock); - if (malloc_initialized || malloc_initializer == pthread_self()) { + if (malloc_initialized || IS_INITIALIZER) { /* * Another thread initialized the allocator before this one * acquired init_lock, or this thread is the initializing @@ -528,7 +536,8 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (false); } - if (malloc_initializer != (unsigned long)0) { +#ifdef JEMALLOC_THREADED_INIT + if (IS_INITIALIZER == false) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); @@ -538,6 +547,8 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (false); } +#endif + malloc_initializer = INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT /* Get page size. */ @@ -564,6 +575,7 @@ malloc_init_hard(void) malloc_conf_init(); +#ifndef JEMALLOC_MUTEX_INIT_CB /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { @@ -571,11 +583,7 @@ malloc_init_hard(void) if (opt_abort) abort(); } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } +#endif if (opt_stats_print) { /* Print statistics at exit. */ @@ -596,6 +604,11 @@ malloc_init_hard(void) return (true); } + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof) prof_boot1(); @@ -654,7 +667,6 @@ malloc_init_hard(void) } /* Get number of CPUs. */ - malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); @@ -1018,8 +1030,7 @@ je_realloc(void *ptr, size_t size) } if (ptr != NULL) { - assert(malloc_initialized || malloc_initializer == - pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_prof || config_stats) old_size = isalloc(ptr); @@ -1124,8 +1135,7 @@ je_free(void *ptr) if (ptr != NULL) { size_t usize; - assert(malloc_initialized || malloc_initializer == - pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_prof && opt_prof) { usize = isalloc(ptr); @@ -1208,7 +1218,7 @@ je_malloc_usable_size(const void *ptr) { size_t ret; - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) ret = ivsalloc(ptr); @@ -1372,7 +1382,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(*ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); p = *ptr; if (config_prof && opt_prof) { @@ -1457,7 +1467,7 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) { size_t sz; - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) sz = ivsalloc(ptr); @@ -1479,7 +1489,7 @@ je_dallocm(void *ptr, int flags) size_t usize; assert(ptr != NULL); - assert(malloc_initialized || malloc_initializer == pthread_self()); + assert(malloc_initialized || IS_INITIALIZER); if (config_stats) usize = isalloc(ptr); @@ -1528,8 +1538,13 @@ je_nallocm(size_t *rsize, size_t size, int flags) * malloc during fork(). */ +#ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_prefork(void) +#else +void +_malloc_prefork(void) +#endif { unsigned i; @@ -1544,8 +1559,13 @@ jemalloc_prefork(void) chunk_dss_prefork(); } +#ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_postfork_parent(void) +#else +void +_malloc_postfork(void) +#endif { unsigned i; diff --git a/src/mutex.c b/src/mutex.c index 07d2a033..0b20bbf3 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -56,21 +56,25 @@ pthread_create(pthread_t *__restrict thread, /******************************************************************************/ +#ifdef JEMALLOC_MUTEX_INIT_CB +int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); +#endif + bool malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef JEMALLOC_OSSPIN *mutex = 0; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + if (_pthread_mutex_init_calloc_cb(mutex, base_calloc) != 0) + return (true); #else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) return (true); -#ifdef PTHREAD_MUTEX_ADAPTIVE_NP - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -#else - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); -#endif + pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); if (pthread_mutex_init(mutex, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); @@ -99,10 +103,14 @@ void malloc_mutex_postfork_child(malloc_mutex_t *mutex) { +#ifdef JEMALLOC_MUTEX_INIT_CB + malloc_mutex_unlock(mutex); +#else if (malloc_mutex_init(mutex)) { malloc_printf(": Error re-initializing mutex in " "child\n"); if (opt_abort) abort(); } +#endif } diff --git a/src/util.c b/src/util.c index 698b53a9..090e1f06 100644 --- a/src/util.c +++ b/src/util.c @@ -44,7 +44,7 @@ JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); + UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); } void (*je_malloc_message)(void *, const char *s) @@ -69,6 +69,123 @@ buferror(int errnum, char *buf, size_t buflen) #endif } +uintmax_t +malloc_strtoumax(const char *nptr, char **endptr, int base) +{ + uintmax_t ret, digit; + int b; + bool neg; + const char *p, *ns; + + if (base < 0 || base == 1 || base > 36) { + errno = EINVAL; + return (UINTMAX_MAX); + } + b = base; + + /* Swallow leading whitespace and get sign, if any. */ + neg = false; + p = nptr; + while (true) { + switch (*p) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + p++; + break; + case '-': + neg = true; + /* Fall through. */ + case '+': + p++; + /* Fall through. */ + default: + goto PREFIX; + } + } + + /* Get prefix, if any. */ + PREFIX: + /* + * Note where the first non-whitespace/sign character is so that it is + * possible to tell whether any digits are consumed (e.g., " 0" vs. + * " -x"). + */ + ns = p; + if (*p == '0') { + switch (p[1]) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': + if (b == 0) + b = 8; + if (b == 8) + p++; + break; + case 'x': + switch (p[2]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + if (b == 0) + b = 16; + if (b == 16) + p += 2; + break; + default: + break; + } + break; + default: + break; + } + } + if (b == 0) + b = 10; + + /* Convert. */ + ret = 0; + while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b) + || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b) + || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) { + uintmax_t pret = ret; + ret *= b; + ret += digit; + if (ret < pret) { + /* Overflow. */ + errno = ERANGE; + return (UINTMAX_MAX); + } + p++; + } + if (neg) + ret = -ret; + + if (endptr != NULL) { + if (p == ns) { + /* No characters were converted. */ + *endptr = (char *)nptr; + } else + *endptr = (char *)p; + } + + if (config_debug && malloc_initialized) { + uintmax_t tret; + int perrno; + char *pend; + + perrno = errno; + if (endptr != NULL) + pend = *endptr; + tret = strtoumax(nptr, endptr, base); + assert(tret == ret); + assert(errno == perrno); + assert(endptr == NULL || *endptr == pend); + } + + return (ret); +} + static char * u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) { @@ -220,7 +337,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) val = va_arg(ap, ptrdiff_t); \ break; \ case 'z': \ - val = va_arg(ap, size_t); \ + val = va_arg(ap, ssize_t); \ break; \ case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ @@ -289,10 +406,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - unsigned long uwidth; + uintmax_t uwidth; errno = 0; - uwidth = strtoul(f, (char **)&f, 10); - assert(uwidth != ULONG_MAX || errno != ERANGE); + uwidth = malloc_strtoumax(f, (char **)&f, 10); + assert(uwidth != UINTMAX_MAX || errno != + ERANGE); width = (int)uwidth; if (*f == '.') { f++; @@ -314,10 +432,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - unsigned long uprec; + uintmax_t uprec; errno = 0; - uprec = strtoul(f, (char **)&f, 10); - assert(uprec != ULONG_MAX || errno != ERANGE); + uprec = malloc_strtoumax(f, (char **)&f, 10); + assert(uprec != UINTMAX_MAX || errno != ERANGE); prec = (int)uprec; break; } @@ -435,7 +553,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[size - 1] = '\0'; ret = i; - if (config_debug) { + if (config_debug && malloc_initialized) { char buf[MALLOC_PRINTF_BUFSIZE]; int tret; From 1e6138c88c7f3bb1f0e8fb785080ac5abc24210c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 24 Mar 2012 19:36:27 -0700 Subject: [PATCH 072/205] Remove malloc_mutex_trylock(). Remove malloc_mutex_trylock(); it has not been used for quite some time. --- include/jemalloc/internal/mutex.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 98f2cba5..ad4f9c24 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -41,7 +41,6 @@ void malloc_mutex_postfork_child(malloc_mutex_t *mutex); #ifndef JEMALLOC_ENABLE_INLINE void malloc_mutex_lock(malloc_mutex_t *mutex); -bool malloc_mutex_trylock(malloc_mutex_t *mutex); void malloc_mutex_unlock(malloc_mutex_t *mutex); #endif @@ -59,20 +58,6 @@ malloc_mutex_lock(malloc_mutex_t *mutex) } } -JEMALLOC_INLINE bool -malloc_mutex_trylock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - return (OSSpinLockTry(mutex) == false); -#else - return (pthread_mutex_trylock(mutex) != 0); -#endif - } else - return (false); -} - JEMALLOC_INLINE void malloc_mutex_unlock(malloc_mutex_t *mutex) { From c1e567bda042d94159026b96e7a77683606037fa Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 17:03:41 +0200 Subject: [PATCH 073/205] Use __sync_add_and_fetch and __sync_sub_and_fetch when they are available These functions may be available as inlines or as libgcc functions. In the former case, a __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macro is defined. But we still want to use these functions in the latter case, when we don't have our own implementation. --- configure.ac | 34 +++++++++++++++++++++++++++++ include/jemalloc/internal/atomic.h | 18 ++++++++++++--- include/jemalloc/jemalloc_defs.h.in | 16 ++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 478ae9d4..5999a33a 100644 --- a/configure.ac +++ b/configure.ac @@ -886,6 +886,40 @@ if test "x${je_cv_osatomic}" = "xyes" ; then AC_DEFINE([JEMALLOC_OSATOMIC], [ ]) fi +dnl ============================================================================ +dnl Check whether __sync_{add,sub}_and_fetch() are available despite +dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. + +AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ + AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()], + [je_cv_sync_compare_and_swap_$2], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + #include + ], + [ + #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 + { + uint$1_t x$1 = 0; + __sync_add_and_fetch(&x$1, 42); + __sync_sub_and_fetch(&x$1, 1); + } + #else + #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force + #endif + ])], + [je_cv_sync_compare_and_swap_$2=yes], + [je_cv_sync_compare_and_swap_$2=no])]) + + if test "x${je_cv_sync_compare_and_swap_$2}" = "xyes" ; then + AC_DEFINE([JE_FORCE_SYNC_COMPARE_AND_SWAP_$2], [ ]) + fi +]) + +if test "x${je_cv_osatomic}" != "xyes" ; then + JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4) + JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) +fi + dnl ============================================================================ dnl Check for spinlock(3) operations as provided on Darwin. diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 7a9cb61e..d8f6ca57 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -87,6 +87,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (x); } +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} #else # if (LG_SIZEOF_PTR == 3) # error "Missing implementation for 64-bit atomic operations" @@ -150,9 +164,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } -#elif (defined __SH4__ || defined __mips__) && (__GNUC__ > 4 || \ - (__GNUC__ == 4 && (__GNUC_MINOR__ > 1 || (__GNUC_MINOR__ == 1 && \ - __GNUC_PATCHLEVEL__ > 1)))) +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index f150413e..e4bfa04a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -53,6 +53,22 @@ */ #undef JEMALLOC_OSATOMIC +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 + /* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. From 5c89c50d1803dc0fb6544c1abd40552e76c8614d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 17:46:57 +0200 Subject: [PATCH 074/205] Fix glibc hooks when using both --with-jemalloc-prefix and --with-mangling --- src/jemalloc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 3e168fd0..d08e103b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1181,7 +1181,15 @@ je_valloc(size_t size) } #endif -#if (!defined(JEMALLOC_PREFIX) && defined(__GLIBC__) && !defined(__UCLIBC__)) +/* + * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has + * #define je_malloc malloc + */ +#define malloc_is_malloc 1 +#define is_malloc_(a) malloc_is_ ## a +#define is_malloc(a) is_malloc_(a) + +#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) /* * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible * to inconsistently reference libc's malloc(3)-compatible functions From 5b3db098f73f467a03f87a2242c692268f796a56 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 26 Mar 2012 18:39:35 +0200 Subject: [PATCH 075/205] Make zone_{free, realloc, free_definite_size} fallback to the system allocator if they are called with a pointer that jemalloc didn't allocate It turns out some OSX system libraries (like CoreGraphics on 10.6) like to call malloc_zone_* functions, but giving them pointers that weren't allocated with the zone they are using. Possibly, they do malloc_zone_malloc(malloc_default_zone()) before we register the jemalloc zone, and malloc_zone_realloc(malloc_default_zone()) after. malloc_default_zone() returning a different value in both cases. --- src/zone.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/zone.c b/src/zone.c index a8f09c98..d3107f85 100644 --- a/src/zone.c +++ b/src/zone.c @@ -80,14 +80,22 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - je_free(ptr); + if (ivsalloc(ptr) != 0) { + je_free(ptr); + return; + } + + free(ptr); } static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - return (je_realloc(ptr, size)); + if (ivsalloc(ptr) != 0) + return (je_realloc(ptr, size)); + + return (realloc(ptr, size)); } #if (JEMALLOC_ZONE_VERSION >= 5) @@ -107,8 +115,13 @@ static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { - assert(ivsalloc(ptr) == size); - je_free(ptr); + if (ivsalloc(ptr) != 0) { + assert(ivsalloc(ptr) == size); + je_free(ptr); + return; + } + + free(ptr); } #endif From 2465bdf4937ffba309e7289014443c6b51566f22 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 26 Mar 2012 13:13:55 -0700 Subject: [PATCH 076/205] Check for NULL ptr in malloc_usable_size(). Check for NULL ptr in malloc_usable_size(), rather than just asserting that ptr is non-NULL. This matches behavior of other implementations (e.g., glibc and tcmalloc). --- src/jemalloc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index d08e103b..ee771c78 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1230,10 +1230,8 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); - else { - assert(ptr != NULL); - ret = isalloc(ptr); - } + else + ret = (ptr != NULL) ? isalloc(ptr) : 0; return (ret); } From fd4fcefa004e04ea8672b11e280a6ced16c38dd2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 17:40:58 -0700 Subject: [PATCH 077/205] Force the lazy-lock feature on FreeBSD. Force the lazy-lock feature on FreeBSD in order to avoid pthread_self(), because it causes allocation. (This change was mistakenly omitted from 41b6afb834b1f5250223678c52bd4f013d4234f6.) --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 5999a33a..2616b0a6 100644 --- a/configure.ac +++ b/configure.ac @@ -221,6 +221,7 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="-Wl,-rpath," + force_lazy_lock="1" ;; *-*-linux*) CFLAGS="$CFLAGS" @@ -800,6 +801,10 @@ fi ], [enable_lazy_lock="0"] ) +if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then + AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues]) + enable_lazy_lock="1" +fi if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) AC_CHECK_FUNC([dlsym], [], From d4be8b7b6ee2e21d079180455d4ccbf45cc1cee7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 26 Mar 2012 18:54:44 -0700 Subject: [PATCH 078/205] Add the "thread.tcache.enabled" mallctl. --- Makefile.in | 3 +- doc/jemalloc.xml.in | 14 ++++ include/jemalloc/internal/tcache.h | 116 +++++++++++++++++++++++++---- src/ctl.c | 36 +++++++-- src/tcache.c | 27 ++++--- test/thread_tcache_enabled.c | 109 +++++++++++++++++++++++++++ test/thread_tcache_enabled.exp | 2 + 7 files changed, 271 insertions(+), 36 deletions(-) create mode 100644 test/thread_tcache_enabled.c create mode 100644 test/thread_tcache_enabled.exp diff --git a/Makefile.in b/Makefile.in index 494ac9a6..821c0634 100644 --- a/Makefile.in +++ b/Makefile.in @@ -66,7 +66,8 @@ DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c + @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ + @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c endif diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 3cbc851f..0b468b04 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1103,6 +1103,20 @@ malloc_conf = "xmalloc:true";]]> mallctl* calls. + + + thread.tcache.enabled + (bool) + rw + [] + + Enable/disable calling thread's tcache. The tcache is + implicitly flushed as a side effect of becoming + disabled (see thread.tcache.flush). + + + thread.tcache.flush diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 30e63a50..0d999f24 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -5,6 +5,16 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + /* * Absolute maximum number of cache slots for each small bin in the thread * cache. This is an additional constraint beyond that imposed as: twice the @@ -35,6 +45,12 @@ typedef struct tcache_s tcache_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + /* * Read-only information associated with each element of tcache_t's tbins array * is stored separately, mainly to reduce memory usage. @@ -105,9 +121,13 @@ bool tcache_boot1(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) void tcache_event(tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); tcache_t *tcache_get(void); +void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); @@ -120,6 +140,69 @@ void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); malloc_tsd_externs(tcache, tcache_t *) malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, tcache_thread_cleanup) +/* Per thread flag that allows thread caches to be disabled. */ +malloc_tsd_externs(tcache_enabled, tcache_enabled_t) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, + tcache_enabled_default, malloc_tsd_no_cleanup) + +JEMALLOC_INLINE void +tcache_flush(void) +{ + tcache_t *tcache; + + cassert(config_tcache); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) + return; + tcache_destroy(tcache); + tcache = NULL; + tcache_tsd_set(&tcache); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tcache_enabled = *tcache_enabled_tsd_get(); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tcache_enabled_tsd_set(&tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tcache_enabled_t tcache_enabled; + tcache_t *tcache; + + cassert(config_tcache); + + tcache_enabled = (tcache_enabled_t)enabled; + tcache_enabled_tsd_set(&tcache_enabled); + tcache = *tcache_tsd_get(); + if (enabled) { + if (tcache == TCACHE_STATE_DISABLED) { + tcache = NULL; + tcache_tsd_set(&tcache); + } + } else /* disabled */ { + if (tcache > TCACHE_STATE_MAX) { + tcache_destroy(tcache); + tcache = NULL; + } + if (tcache == NULL) { + tcache = TCACHE_STATE_DISABLED; + tcache_tsd_set(&tcache); + } + } +} JEMALLOC_INLINE tcache_t * tcache_get(void) @@ -128,29 +211,32 @@ tcache_get(void) if (config_tcache == false) return (NULL); - if (config_lazy_lock && (isthreaded & opt_tcache) == false) - return (NULL); - else if (opt_tcache == false) + if (config_lazy_lock && isthreaded == false) return (NULL); tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)2) { + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (tcache == TCACHE_STATE_DISABLED) + return (NULL); if (tcache == NULL) { - tcache = tcache_create(choose_arena()); - if (tcache == NULL) + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ return (NULL); - } else { - if (tcache == (void *)(uintptr_t)1) { - /* - * Make a note that an allocator function was - * called after the tcache_thread_cleanup() was - * called. - */ - tcache = (tcache_t *)(uintptr_t)2; - tcache_tsd_set(&tcache); } + return (tcache_create(choose_arena())); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); return (NULL); } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); } return (tcache); diff --git a/src/ctl.c b/src/ctl.c index 943c2925..08011616 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -39,6 +39,7 @@ static int ctl_lookup(const char *name, ctl_node_t const **nodesp, CTL_PROTO(version) CTL_PROTO(epoch) +CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_arena) CTL_PROTO(thread_allocated) @@ -151,6 +152,7 @@ CTL_PROTO(stats_mapped) #define INDEX(i) false, {.indexed = {i##_index}}, NULL static const ctl_node_t tcache_node[] = { + {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; @@ -966,25 +968,43 @@ RETURN: return (ret); } +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_tcache == false) + return (ENOENT); + + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto RETURN; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); + +RETURN: + ret = 0; + return (ret); +} + static int thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - tcache_t *tcache; if (config_tcache == false) return (ENOENT); VOID(); - if ((tcache = *tcache_tsd_get()) == NULL) { - ret = 0; - goto RETURN; - } - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); + tcache_flush(); ret = 0; RETURN: diff --git a/src/tcache.c b/src/tcache.c index 3442406d..bc911a6e 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -5,6 +5,7 @@ /* Data. */ malloc_tsd_data(, tcache, tcache_t *, NULL) +malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; @@ -328,25 +329,27 @@ tcache_thread_cleanup(void *arg) { tcache_t *tcache = *(tcache_t **)arg; - if (tcache == (void *)(uintptr_t)1) { + if (tcache == TCACHE_STATE_DISABLED) { + /* Do nothing. */ + } else if (tcache == TCACHE_STATE_REINCARNATED) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } else if (tcache == TCACHE_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key * to 1 so that other destructors wouldn't cause re-creation of * the tcache. This time, do nothing, so that the destructor * will not be called again. */ - } else if (tcache == (void *)(uintptr_t)2) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to 1 in order to - * receive another callback. - */ - tcache = (tcache_t *)(uintptr_t)1; - tcache_tsd_set(&tcache); } else if (tcache != NULL) { - assert(tcache != (void *)(uintptr_t)1); + assert(tcache != TCACHE_STATE_PURGATORY); tcache_destroy(tcache); - tcache = (tcache_t *)(uintptr_t)1; + tcache = TCACHE_STATE_PURGATORY; tcache_tsd_set(&tcache); } } @@ -428,7 +431,7 @@ tcache_boot1(void) { if (opt_tcache) { - if (tcache_tsd_boot()) + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) return (true); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c new file mode 100644 index 00000000..46540385 --- /dev/null +++ b/test/thread_tcache_enabled.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +void * +thread_start(void *arg) +{ + int err; + size_t sz; + bool e0, e1; + + sz = sizeof(bool); + if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_TCACHE + assert(false); +#endif + } + goto RETURN; + } + + if (e0) { + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) + == 0); + assert(e0); + } + + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); + e1 = true; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + free(malloc(1)); + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0); + + free(malloc(1)); + e1 = false; + assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0); + assert(e0 == false); + + free(malloc(1)); +RETURN: + return (NULL); +} + +int +main(void) +{ + int ret = 0; + pthread_t thread; + + fprintf(stderr, "Test begin\n"); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/test/thread_tcache_enabled.exp b/test/thread_tcache_enabled.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/test/thread_tcache_enabled.exp @@ -0,0 +1,2 @@ +Test begin +Test end From e77fa59ece7e23de586f08980f627b8102511755 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 28 Mar 2012 09:53:16 +0200 Subject: [PATCH 079/205] Don't use pthread_atfork to register prefork/postfork handlers on OSX OSX libc calls zone allocators' force_lock/force_unlock already. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index ee771c78..9eae1372 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -575,7 +575,7 @@ malloc_init_hard(void) malloc_conf_init(); -#ifndef JEMALLOC_MUTEX_INIT_CB +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { From 1a0e7770243e0539fa8fef7bb1512f784f93389f Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:48:58 +0200 Subject: [PATCH 080/205] Add a SYS_write definition on systems where it is not defined in headers Namely, in the Android NDK headers, SYS_write is not defined; but __NR_write is. --- include/jemalloc/internal/jemalloc_internal.h.in | 3 +++ src/util.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 4f557794..b7b8df8b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,6 +1,9 @@ #include #include #include +#if !defined(SYS_write) && defined(__NR_write) +#define SYS_write __NR_write +#endif #include #include #include diff --git a/src/util.c b/src/util.c index 090e1f06..895aa198 100644 --- a/src/util.c +++ b/src/util.c @@ -44,7 +44,17 @@ JEMALLOC_CATTR(visibility("hidden"), static) void wrtmessage(void *cbopaque, const char *s) { + +#ifdef SYS_write + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + */ UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); +#else + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +#endif } void (*je_malloc_message)(void *, const char *s) From 2cfe6d67ef6a622eeb47ba48b431bdafc0c45b35 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 15:03:07 +0200 Subject: [PATCH 081/205] Change AC_COMPILE_IFELSE into AC_LINK_IFELSE for the __sync_{add, sub}_and_fetch() test With the Android NDK, __sync_{add,sub}_and_fetch() compile fine for uint64_t, but the corresponding libgcc function aren't there. --- configure.ac | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/configure.ac b/configure.ac index 2616b0a6..c1b46dc3 100644 --- a/configure.ac +++ b/configure.ac @@ -898,20 +898,20 @@ dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()], [je_cv_sync_compare_and_swap_$2], - [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ - #include - ], - [ - #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 - { - uint$1_t x$1 = 0; - __sync_add_and_fetch(&x$1, 42); - __sync_sub_and_fetch(&x$1, 1); - } - #else - #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force - #endif - ])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([ + #include + ], + [ + #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 + { + uint$1_t x$1 = 0; + __sync_add_and_fetch(&x$1, 42); + __sync_sub_and_fetch(&x$1, 1); + } + #else + #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force + #endif + ])], [je_cv_sync_compare_and_swap_$2=yes], [je_cv_sync_compare_and_swap_$2=no])]) From 71a93b8725fb52ae393ab88e2fccd5afa84c66a0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:20:12 +0200 Subject: [PATCH 082/205] Move zone registration to zone.c --- include/jemalloc/internal/private_namespace.h | 2 +- include/jemalloc/internal/zone.h | 3 +-- src/jemalloc.c | 24 ++----------------- src/zone.c | 22 ++++++++++++++--- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 7103e680..ed34e328 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -82,7 +82,6 @@ #define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) #define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) #define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define create_zone JEMALLOC_N(create_zone) #define ctl_boot JEMALLOC_N(ctl_boot) #define ctl_bymib JEMALLOC_N(ctl_bymib) #define ctl_byname JEMALLOC_N(ctl_byname) @@ -195,6 +194,7 @@ #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) +#define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) diff --git a/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h index 859b529d..9eb4252f 100644 --- a/include/jemalloc/internal/zone.h +++ b/include/jemalloc/internal/zone.h @@ -12,8 +12,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -malloc_zone_t *create_zone(void); -void szone2ozone(malloc_zone_t *zone); +void register_zone(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 9eae1372..908485a7 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -712,26 +712,6 @@ malloc_init_hard(void) /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; -#ifdef JEMALLOC_ZONE - /* Register the custom zone. At this point it won't be the default. */ - malloc_zone_t *jemalloc_zone = create_zone(); - malloc_zone_register(jemalloc_zone); - - /* - * Unregister and reregister the default zone. On OSX >= 10.6, - * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone - * then becomes the default. - */ - do { - malloc_zone_t *default_zone = malloc_default_zone(); - malloc_zone_unregister(default_zone); - malloc_zone_register(default_zone); - } while (malloc_default_zone() != jemalloc_zone); -#endif - malloc_initialized = true; malloc_mutex_unlock(&init_lock); return (false); @@ -743,8 +723,8 @@ void jemalloc_darwin_init(void) { - if (malloc_init_hard()) - abort(); + if (malloc_init_hard() == false) + register_zone(); } #endif diff --git a/src/zone.c b/src/zone.c index d3107f85..4b6c75e4 100644 --- a/src/zone.c +++ b/src/zone.c @@ -159,8 +159,8 @@ zone_force_unlock(malloc_zone_t *zone) jemalloc_postfork_parent(); } -malloc_zone_t * -create_zone(void) +void +register_zone(void) { zone.size = (void *)zone_size; @@ -206,5 +206,21 @@ create_zone(void) zone_introspect.enumerate_unavailable_without_blocks = NULL; #endif #endif - return (&zone); + + /* Register the custom zone. At this point it won't be the default. */ + malloc_zone_register(&zone); + + /* + * Unregister and reregister the default zone. On OSX >= 10.6, + * unregistering takes the last registered zone and places it at the + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone + * then becomes the default. + */ + do { + malloc_zone_t *default_zone = malloc_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + } while (malloc_default_zone() != &zone); } From 3c2ba0dcbc2f4896a892fad84d5dcf5bd4c30a81 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 27 Mar 2012 14:20:13 +0200 Subject: [PATCH 083/205] Avoid crashes when system libraries use the purgeable zone allocator --- src/jemalloc.c | 2 +- src/zone.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 908485a7..c0fe6c91 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1175,7 +1175,7 @@ je_valloc(size_t size) * to inconsistently reference libc's malloc(3)-compatible functions * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). * - * These definitions interpose hooks in glibc.  The functions are actually + * These definitions interpose hooks in glibc. The functions are actually * passed an extra argument for the caller return address, which will be * ignored. */ diff --git a/src/zone.c b/src/zone.c index 4b6c75e4..9fc87bb7 100644 --- a/src/zone.c +++ b/src/zone.c @@ -3,6 +3,13 @@ # error "This source file is for zones on Darwin (OS X)." #endif +/* + * The malloc_default_purgeable_zone function is only available on >= 10.6. + * We need to check whether it is present at runtime, thus the weak_import. + */ +extern malloc_zone_t *malloc_default_purgeable_zone(void) +JEMALLOC_ATTR(weak_import); + /******************************************************************************/ /* Data. */ @@ -207,15 +214,29 @@ register_zone(void) #endif #endif - /* Register the custom zone. At this point it won't be the default. */ + /* + * The default purgeable zone is created lazily by OSX's libc. It uses + * the default zone when it is created for "small" allocations + * (< 15 KiB), but assumes the default zone is a scalable_zone. This + * obviously fails when the default zone is the jemalloc zone, so + * malloc_default_purgeable_zone is called beforehand so that the + * default purgeable zone is created when the default zone is still + * a scalable_zone. As purgeable zones only exist on >= 10.6, we need + * to check for the existence of malloc_default_purgeable_zone() at + * run time. + */ + if (malloc_default_purgeable_zone != NULL) + malloc_default_purgeable_zone(); + + /* Register the custom zone. At this point it won't be the default. */ malloc_zone_register(&zone); /* - * Unregister and reregister the default zone. On OSX >= 10.6, + * Unregister and reregister the default zone. On OSX >= 10.6, * unregistering takes the last registered zone and places it at the - * location of the specified zone. Unregistering the default zone thus - * makes the last registered one the default. On OSX < 10.6, - * unregistering shifts all registered zones. The first registered zone + * location of the specified zone. Unregistering the default zone thus + * makes the last registered one the default. On OSX < 10.6, + * unregistering shifts all registered zones. The first registered zone * then becomes the default. */ do { From 09a0769ba7a3d139168e606e4295f8002861355f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Mar 2012 12:11:03 -0700 Subject: [PATCH 084/205] Work around TLS deallocation via free(). glibc uses memalign()/free() to allocate/deallocate TLS, which means that it is unsafe to set TLS variables as a side effect of free() -- they may already be deallocated. Work around this by avoiding tcache_create() within free(). Reported by Mike Hommey. --- include/jemalloc/internal/arena.h | 7 ++++--- include/jemalloc/internal/tcache.h | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index c5214893..2592e89d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -556,7 +556,7 @@ arena_malloc(size_t size, bool zero) assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { - if ((tcache = tcache_get()) != NULL) + if ((tcache = tcache_get(true)) != NULL) return (tcache_alloc_small(tcache, size, zero)); else return (arena_malloc_small(choose_arena(), size, zero)); @@ -565,7 +565,8 @@ arena_malloc(size_t size, bool zero) * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL) + if (size <= tcache_maxclass && (tcache = tcache_get(true)) != + NULL) return (tcache_alloc_large(tcache, size, zero)); else return (arena_malloc_large(choose_arena(), size, zero)); @@ -590,7 +591,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { size_t pageind; arena_chunk_map_t *mapelm; - tcache_t *tcache = tcache_get(); + tcache_t *tcache = tcache_get(false); assert(arena != NULL); assert(chunk->arena == arena); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 0d999f24..12552d81 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -126,7 +126,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) void tcache_event(tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); -tcache_t *tcache_get(void); +tcache_t *tcache_get(bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); @@ -205,7 +205,7 @@ tcache_enabled_set(bool enabled) } JEMALLOC_INLINE tcache_t * -tcache_get(void) +tcache_get(bool create) { tcache_t *tcache; @@ -219,6 +219,18 @@ tcache_get(void) if (tcache == TCACHE_STATE_DISABLED) return (NULL); if (tcache == NULL) { + if (create == false) { + /* + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle TLS corruption could result. + */ + return (NULL); + } if (tcache_enabled_get() == false) { tcache_enabled_set(false); /* Memoize. */ return (NULL); From f2296deb57cdda01685f0d0ccf3c6e200378c673 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 30 Mar 2012 12:36:52 -0700 Subject: [PATCH 085/205] Clean up tsd (no functional changes). --- include/jemalloc/internal/tcache.h | 4 +++- include/jemalloc/internal/tsd.h | 14 ++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 12552d81..efae7003 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -227,7 +227,9 @@ tcache_get(bool create) * tcache_create() failure is a soft failure * that doesn't propagate. However, if TLS * data are freed via free() as in glibc, - * subtle TLS corruption could result. + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. */ return (NULL); } diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 0e32c612..60aaa427 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -109,13 +109,8 @@ a_attr bool a_name##_booted = false; #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ -a_attr void \ +a_attr bool \ a_name##_tsd_cleanup_wrapper(void *arg) \ -{ \ - \ -} \ -bool \ -a_name##_tsd_cleanup_pending(void *arg) \ { \ bool (*cleanup)(void *) = arg; \ \ @@ -131,7 +126,7 @@ a_name##_tsd_boot(void) \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_pending, a_cleanup); \ + &a_name##_tsd_cleanup_wrapper, a_cleanup); \ } \ a_name##_booted = true; \ return (false); \ @@ -157,11 +152,6 @@ a_name##_tsd_set(a_type *val) \ #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ -a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ -{ \ - \ -} \ a_attr bool \ a_name##_tsd_boot(void) \ { \ From 4eeb52f080edb1f4b518249388f6c617386c00e5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 01:46:25 -0700 Subject: [PATCH 086/205] Remove vsnprintf() and strtoumax() validation. Remove code that validates malloc_vsnprintf() and malloc_strtoumax() against their namesakes. The validation code has adequately served its usefulness at this point, and it isn't worth dealing with the different formatting for %p with glibc versus other implementations for NULL pointers ("(nil)" vs. "0x0"). Reported by Mike Hommey. --- .../jemalloc/internal/jemalloc_internal.h.in | 2 -- src/jemalloc.c | 2 +- src/util.c | 27 ------------------- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b7b8df8b..9e57b62b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -374,8 +374,6 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ extern arena_t **arenas; extern unsigned narenas; -extern bool malloc_initialized; - arena_t *arenas_extend(unsigned ind); void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); diff --git a/src/jemalloc.c b/src/jemalloc.c index c0fe6c91..5b1e0fdd 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -38,7 +38,7 @@ arena_t **arenas; unsigned narenas; /* Set to true once the allocator has been initialized. */ -bool malloc_initialized = false; +static bool malloc_initialized = false; #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ diff --git a/src/util.c b/src/util.c index 895aa198..107bdcff 100644 --- a/src/util.c +++ b/src/util.c @@ -179,20 +179,6 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) *endptr = (char *)p; } - if (config_debug && malloc_initialized) { - uintmax_t tret; - int perrno; - char *pend; - - perrno = errno; - if (endptr != NULL) - pend = *endptr; - tret = strtoumax(nptr, endptr, base); - assert(tret == ret); - assert(errno == perrno); - assert(endptr == NULL || *endptr == pend); - } - return (ret); } @@ -563,19 +549,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[size - 1] = '\0'; ret = i; - if (config_debug && malloc_initialized) { - char buf[MALLOC_PRINTF_BUFSIZE]; - int tret; - - /* - * Verify that the resulting string matches what vsnprintf() - * would have created. - */ - tret = vsnprintf(buf, sizeof(buf), format, tap); - assert(tret == ret); - assert(strcmp(buf, str) == 0); - } - #undef APPEND_C #undef APPEND_S #undef APPEND_PADDED_S From 722b370399fd6734de6781285ce9a0cffd547bdd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 14:09:07 -0700 Subject: [PATCH 087/205] Use ffsl() in ALLOCM_ALIGN(). Use ffsl() rather than ffs() plus bitshifting in ALLOCM_ALIGN(). The original rational for using ffs() was portability, but the bitmap code has since induced a hard dependency on ffsl(). --- include/jemalloc/jemalloc.h.in | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f0581dbd..8825a943 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -4,7 +4,6 @@ extern "C" { #endif -#include #include #define JEMALLOC_VERSION "@jemalloc_version@" @@ -18,11 +17,7 @@ extern "C" { #ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif +#define ALLOCM_ALIGN(a) (ffsl(a)-1) #define ALLOCM_ZERO ((int)0x40) #define ALLOCM_NO_MOVE ((int)0x80) From 80b25932ca52e9506d4e2b8ee0fa58aa5ae3306d Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 2 Apr 2012 09:04:54 +0200 Subject: [PATCH 088/205] Move last bit of zone initialization in zone.c, and lazy-initialize --- .../jemalloc/internal/jemalloc_internal.h.in | 12 ---------- include/jemalloc/internal/zone.h | 22 ------------------- src/jemalloc.c | 11 ---------- src/zone.c | 1 + 4 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 include/jemalloc/internal/zone.h diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 9e57b62b..db2deb03 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -304,9 +304,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES @@ -332,9 +329,6 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" typedef struct { @@ -400,9 +394,6 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -565,9 +556,6 @@ choose_arena(void) #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); diff --git a/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h deleted file mode 100644 index 9eb4252f..00000000 --- a/include/jemalloc/internal/zone.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void register_zone(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index 5b1e0fdd..1deabcd9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -717,17 +717,6 @@ malloc_init_hard(void) return (false); } -#ifdef JEMALLOC_ZONE -JEMALLOC_ATTR(constructor) -void -jemalloc_darwin_init(void) -{ - - if (malloc_init_hard() == false) - register_zone(); -} -#endif - /* * End initialization functions. */ diff --git a/src/zone.c b/src/zone.c index 9fc87bb7..6c1e415b 100644 --- a/src/zone.c +++ b/src/zone.c @@ -166,6 +166,7 @@ zone_force_unlock(malloc_zone_t *zone) jemalloc_postfork_parent(); } +JEMALLOC_ATTR(constructor) void register_zone(void) { From 96d4120ac08db3f2d566e8e5c3bc134a24aa0afc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 14:50:03 -0700 Subject: [PATCH 089/205] Avoid NULL check in free() and malloc_usable_size(). Generalize isalloc() to handle NULL pointers in such a way that the NULL checking overhead is only paid when introspecting huge allocations (or NULL). This allows free() and malloc_usable_size() to no longer check for NULL. Submitted by Igor Bukanov and Mike Hommey. --- Makefile.in | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 10 +++---- src/jemalloc.c | 26 ++++++++----------- test/null.c | 19 ++++++++++++++ test/null.exp | 2 ++ 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 test/null.c create mode 100644 test/null.exp diff --git a/Makefile.in b/Makefile.in index 821c0634..d8b671ad 100644 --- a/Makefile.in +++ b/Makefile.in @@ -65,7 +65,7 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c @srcroot@test/null.c \ @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index db2deb03..ed21bbe7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -633,8 +633,6 @@ isalloc(const void *ptr) size_t ret; arena_chunk_t *chunk; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ @@ -642,8 +640,10 @@ isalloc(const void *ptr) ret = arena_salloc_demote(ptr); else ret = arena_salloc(ptr); - } else + } else if (ptr != NULL) ret = huge_salloc(ptr); + else + ret = 0; return (ret); } @@ -664,12 +664,10 @@ idalloc(void *ptr) { arena_chunk_t *chunk; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) arena_dalloc(chunk->arena, chunk, ptr); - else + else if (ptr != NULL) huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 1deabcd9..86ce695b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1100,22 +1100,18 @@ JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { + size_t usize; - if (ptr != NULL) { - size_t usize; + assert(malloc_initialized || IS_INITIALIZER); - assert(malloc_initialized || IS_INITIALIZER); - - if (config_prof && opt_prof) { - usize = isalloc(ptr); - prof_free(ptr, usize); - } else if (config_stats) { - usize = isalloc(ptr); - } - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); - } + if (config_prof && opt_prof) { + usize = isalloc(ptr); + prof_free(ptr, usize); + } else if (config_stats) + usize = isalloc(ptr); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + idalloc(ptr); } /* @@ -1200,7 +1196,7 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); else - ret = (ptr != NULL) ? isalloc(ptr) : 0; + ret = isalloc(ptr); return (ret); } diff --git a/test/null.c b/test/null.c new file mode 100644 index 00000000..ccd7ced3 --- /dev/null +++ b/test/null.c @@ -0,0 +1,19 @@ +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +int +main(void) +{ + + fprintf(stderr, "Test begin\n"); + + free(malloc(1)); + free(NULL); + assert(malloc_usable_size(NULL) == 0); + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/null.exp b/test/null.exp new file mode 100644 index 00000000..369a88dd --- /dev/null +++ b/test/null.exp @@ -0,0 +1,2 @@ +Test begin +Test end From f0047372673da7f213f733465dab0d8825eb1c9f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 15:18:24 -0700 Subject: [PATCH 090/205] Revert "Avoid NULL check in free() and malloc_usable_size()." This reverts commit 96d4120ac08db3f2d566e8e5c3bc134a24aa0afc. ivsalloc() depends on chunks_rtree being initialized. This can be worked around via a NULL pointer check. However, thread_allocated_tsd_get() also depends on initialization having occurred, and there is no way to guard its call in free() that is cheaper than checking whether ptr is NULL. --- Makefile.in | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 10 ++++--- src/jemalloc.c | 26 +++++++++++-------- test/null.c | 19 -------------- test/null.exp | 2 -- 5 files changed, 22 insertions(+), 37 deletions(-) delete mode 100644 test/null.c delete mode 100644 test/null.exp diff --git a/Makefile.in b/Makefile.in index d8b671ad..821c0634 100644 --- a/Makefile.in +++ b/Makefile.in @@ -65,7 +65,7 @@ DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c @srcroot@test/null.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ @srcroot@test/thread_tcache_enabled.c ifeq (@enable_experimental@, 1) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ed21bbe7..db2deb03 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -633,6 +633,8 @@ isalloc(const void *ptr) size_t ret; arena_chunk_t *chunk; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ @@ -640,10 +642,8 @@ isalloc(const void *ptr) ret = arena_salloc_demote(ptr); else ret = arena_salloc(ptr); - } else if (ptr != NULL) + } else ret = huge_salloc(ptr); - else - ret = 0; return (ret); } @@ -664,10 +664,12 @@ idalloc(void *ptr) { arena_chunk_t *chunk; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) arena_dalloc(chunk->arena, chunk, ptr); - else if (ptr != NULL) + else huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 86ce695b..1deabcd9 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1100,18 +1100,22 @@ JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { - size_t usize; - assert(malloc_initialized || IS_INITIALIZER); + if (ptr != NULL) { + size_t usize; - if (config_prof && opt_prof) { - usize = isalloc(ptr); - prof_free(ptr, usize); - } else if (config_stats) - usize = isalloc(ptr); - if (config_stats) - thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr); + prof_free(ptr, usize); + } else if (config_stats) { + usize = isalloc(ptr); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + idalloc(ptr); + } } /* @@ -1196,7 +1200,7 @@ je_malloc_usable_size(const void *ptr) if (config_ivsalloc) ret = ivsalloc(ptr); else - ret = isalloc(ptr); + ret = (ptr != NULL) ? isalloc(ptr) : 0; return (ret); } diff --git a/test/null.c b/test/null.c deleted file mode 100644 index ccd7ced3..00000000 --- a/test/null.c +++ /dev/null @@ -1,19 +0,0 @@ -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - - fprintf(stderr, "Test begin\n"); - - free(malloc(1)); - free(NULL); - assert(malloc_usable_size(NULL) == 0); - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/test/null.exp b/test/null.exp deleted file mode 100644 index 369a88dd..00000000 --- a/test/null.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end From ae4c7b4b4092906c641d69b4bf9fcb4a7d50790d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 07:04:34 -0700 Subject: [PATCH 091/205] Clean up *PAGE* macros. s/PAGE_SHIFT/LG_PAGE/g and s/PAGE_SIZE/PAGE/g. Remove remnants of the dynamic-page-shift code. Rename the "arenas.pagesize" mallctl to "arenas.page". Remove the "arenas.chunksize" mallctl, which is redundant with "opt.lg_chunk". --- doc/jemalloc.xml.in | 11 +- include/jemalloc/internal/arena.h | 22 +- .../jemalloc/internal/jemalloc_internal.h.in | 45 +--- include/jemalloc/internal/size_classes.sh | 2 +- include/jemalloc/internal/tcache.h | 10 +- src/arena.c | 206 +++++++++--------- src/chunk.c | 4 +- src/ctl.c | 13 +- src/jemalloc.c | 31 +-- src/prof.c | 2 +- src/stats.c | 3 + src/tcache.c | 10 +- src/zone.c | 2 +- 13 files changed, 148 insertions(+), 213 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 0b468b04..28760b21 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1167,22 +1167,13 @@ malloc_conf = "xmalloc:true";]]> - arenas.pagesize + arenas.page (size_t) r- Page size. - - - arenas.chunksize - (size_t) - r- - - Chunk size. - - arenas.tcache_max diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2592e89d..41df11fd 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -130,10 +130,10 @@ struct arena_chunk_map_s { * xxxxxxxx xxxxxxxx xxxx---- ----xxxx * -------- -------- -------- ----D-LA * - * Large (sampled, size <= PAGE_SIZE): + * Large (sampled, size <= PAGE): * ssssssss ssssssss sssscccc ccccD-LA * - * Large (not sampled, size == PAGE_SIZE): + * Large (not sampled, size == PAGE): * ssssssss ssssssss ssss---- ----D-LA */ size_t bits; @@ -486,7 +486,7 @@ arena_prof_ctx_get(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { @@ -494,8 +494,8 @@ arena_prof_ctx_get(const void *ptr) ret = (prof_ctx_t *)(uintptr_t)1U; else { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -522,14 +522,14 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; size_t binind; arena_bin_info_t *bin_info; @@ -598,7 +598,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapelm = &chunk->map[pageind-map_bias]; assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { @@ -610,8 +610,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_bin_t *bin; run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> - PAGE_SHIFT)) << PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); bin = run->bin; if (config_debug) { size_t binind = arena_bin_index(arena, bin); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index db2deb03..0c22bfb7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -58,13 +58,6 @@ static const bool config_dss = false #endif ; -static const bool config_dynamic_page_shift = -#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT - true -#else - false -#endif - ; static const bool config_fill = #ifdef JEMALLOC_FILL true @@ -266,20 +259,12 @@ static const bool config_ivsalloc = (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) /* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ -#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) -#ifdef PAGE_SHIFT -# undef PAGE_SHIFT -#endif -#ifdef PAGE_SIZE -# undef PAGE_SIZE -#endif #ifdef PAGE_MASK # undef PAGE_MASK #endif -#define PAGE_SHIFT STATIC_PAGE_SHIFT -#define PAGE_SIZE STATIC_PAGE_SIZE -#define PAGE_MASK STATIC_PAGE_MASK +#define LG_PAGE STATIC_PAGE_SHIFT +#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE_MASK ((size_t)(PAGE - 1)) /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ @@ -351,12 +336,6 @@ extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; -#ifdef DYNAMIC_PAGE_SHIFT -extern size_t pagesize; -extern size_t pagesize_mask; -extern size_t lg_pagesize; -#endif - /* Number of CPUs. */ extern unsigned ncpus; @@ -479,7 +458,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) return (0); } - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= arena_maxclass && alignment <= PAGE) { if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); @@ -494,7 +473,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) usize = PAGE_CEILING(size); /* * (usize < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. + * PAGE of SIZE_T_MAX. * * (usize + alignment < usize) protects against the * combination of maximal alignment and usize large enough @@ -514,18 +493,18 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * would need to allocate in order to guarantee the alignment. */ if (usize >= alignment) - run_size = usize + alignment - PAGE_SIZE; + run_size = usize + alignment - PAGE; else { /* * It is possible that (alignment << 1) will cause * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for + * subtract PAGE, which in the case of overflow leaves + * us with a very large run_size. That causes the + * first conditional below to fail, which means that + * the bogus run_size value never gets used for * anything important. */ - run_size = (alignment << 1) - PAGE_SIZE; + run_size = (alignment << 1) - PAGE; } if (run_size_p != NULL) *run_size_p = run_size; @@ -600,7 +579,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) assert(usize != 0); assert(usize == sa2u(usize, alignment, NULL)); - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + if (usize <= arena_maxclass && alignment <= PAGE) ret = arena_malloc(usize, zero); else { size_t run_size JEMALLOC_CC_SILENCE_INIT(0); diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 79b4ba23..9829a2b3 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -36,7 +36,7 @@ while [ ${lg_q} -le ${lg_qmax} ] ; do lg_p=${lg_pmin} while [ ${lg_p} -le ${lg_pmax} ] ; do cat <> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -386,7 +386,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); + LG_PAGE); chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; } @@ -426,10 +426,10 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); @@ -462,7 +462,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; if (config_fill && opt_junk) memset(ptr, 0x5a, size); diff --git a/src/arena.c b/src/arena.c index 898f8c7d..b7e14228 100644 --- a/src/arena.c +++ b/src/arena.c @@ -176,10 +176,9 @@ static inline void arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) { size_t i; - UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << - PAGE_SHIFT)); + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); - for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + for (i = 0; i < PAGE / sizeof(size_t); i++) assert(p[i] == 0); } @@ -193,16 +192,15 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> - PAGE_SHIFT; + LG_PAGE; assert((chunk->map[run_ind+total_pages-1-map_bias].bits & CHUNK_MAP_DIRTY) == flag_dirty); - need_pages = (size >> PAGE_SHIFT); + need_pages = (size >> LG_PAGE); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; @@ -214,8 +212,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * multiple. */ size_t cactive_diff = CHUNK_CEILING((arena->nactive + - need_pages) << PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << - PAGE_SHIFT); + need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } @@ -225,16 +223,16 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, if (rem_pages > 0) { if (flag_dirty != 0) { chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; } else { chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | + (rem_pages << LG_PAGE) | (chunk->map[run_ind+need_pages-map_bias].bits & CHUNK_MAP_UNZEROED); chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | + (rem_pages << LG_PAGE) | (chunk->map[run_ind+total_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED); } @@ -264,8 +262,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, & CHUNK_MAP_UNZEROED) != 0) { memset((void *)((uintptr_t) chunk + ((run_ind+i) << - PAGE_SHIFT)), 0, - PAGE_SIZE); + LG_PAGE)), 0, PAGE); } else if (config_debug) { arena_chunk_validate_zeroed( chunk, run_ind+i); @@ -277,8 +274,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed. */ memset((void *)((uintptr_t)chunk + (run_ind << - PAGE_SHIFT)), 0, (need_pages << - PAGE_SHIFT)); + LG_PAGE)), 0, (need_pages << LG_PAGE)); } } @@ -310,7 +306,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) + chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) | (chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; if (config_debug && flag_dirty == 0 && @@ -319,7 +315,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_chunk_validate_zeroed(chunk, run_ind+i); } chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << PAGE_SHIFT) | + - 1) << LG_PAGE) | (chunk->map[run_ind+need_pages-1-map_bias].bits & CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; if (config_debug && flag_dirty == 0 && @@ -460,7 +456,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -472,7 +468,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -482,8 +478,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) */ chunk = arena_chunk_alloc(arena); if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << - PAGE_SHIFT)); + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -501,7 +496,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -513,7 +508,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) + map_bias; run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); + LG_PAGE)); arena_run_split(arena, run, size, large, zero); return (run); } @@ -582,7 +577,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { size_t npages; - npages = mapelm->bits >> PAGE_SHIFT; + npages = mapelm->bits >> LG_PAGE; assert(pageind + npages <= chunk_npages); if (mapelm->bits & CHUNK_MAP_DIRTY) { size_t i; @@ -590,7 +585,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - mapelm->bits = (npages << PAGE_SHIFT) | + mapelm->bits = (npages << LG_PAGE) | flag_unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; /* @@ -615,9 +610,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) */ size_t cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - + npages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << - PAGE_SHIFT); + LG_PAGE); if (cactive_diff != 0) stats_cactive_add(cactive_diff); } @@ -631,17 +626,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) } else { /* Skip allocated run. */ if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> PAGE_SHIFT; + pageind += mapelm->bits >> LG_PAGE; else { arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << PAGE_SHIFT)); + chunk + (uintptr_t)(pageind << LG_PAGE)); - assert((mapelm->bits >> PAGE_SHIFT) == 0); + assert((mapelm->bits >> LG_PAGE) == 0); size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - pageind += bin_info->run_size >> PAGE_SHIFT; + pageind += bin_info->run_size >> LG_PAGE; } } } @@ -662,7 +657,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> PAGE_SHIFT; + size_t npages = mapelm->bits >> LG_PAGE; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); @@ -676,8 +671,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) #else # error "No method defined for purging unused dirty pages." #endif - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_PURGE); + madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), + (npages << LG_PAGE), MADV_PURGE); #undef MADV_PURGE if (config_stats) nmadvise++; @@ -693,7 +688,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << PAGE_SHIFT)); + (uintptr_t)(pageind << LG_PAGE)); ql_remove(&mapelms, mapelm, u.ql_link); arena_run_dalloc(arena, run, false); @@ -804,33 +799,31 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; - assert(size == PAGE_SIZE || - (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert(size == PAGE || + (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); } else { size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; size = bin_info->run_size; } - run_pages = (size >> PAGE_SHIFT); + run_pages = (size >> LG_PAGE); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk * multiple. */ - size_t cactive_diff = CHUNK_CEILING(arena->nactive << - PAGE_SHIFT) - CHUNK_CEILING((arena->nactive - run_pages) << - PAGE_SHIFT); + size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); if (cactive_diff != 0) stats_cactive_sub(cactive_diff); } @@ -869,7 +862,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY) == flag_dirty) { size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & ~PAGE_MASK; - size_t nrun_pages = nrun_size >> PAGE_SHIFT; + size_t nrun_pages = nrun_size >> LG_PAGE; /* * Remove successor from runs_avail; the coalesced run is @@ -900,7 +893,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY) == flag_dirty) { size_t prun_size = chunk->map[run_ind-1-map_bias].bits & ~PAGE_MASK; - size_t prun_pages = prun_size >> PAGE_SHIFT; + size_t prun_pages = prun_size >> LG_PAGE; run_ind -= prun_pages; @@ -970,8 +963,8 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; assert(oldsize > newsize); @@ -991,7 +984,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; if (config_debug) { - UNUSED size_t tail_npages = newsize >> PAGE_SHIFT; + UNUSED size_t tail_npages = newsize >> LG_PAGE; assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] .bits & ~PAGE_MASK) == 0); assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] @@ -1012,9 +1005,9 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty) { - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = newsize >> PAGE_SHIFT; - size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = newsize >> LG_PAGE; + size_t tail_npages = (oldsize - newsize) >> LG_PAGE; size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; @@ -1064,8 +1057,8 @@ arena_bin_runs_first(arena_bin_t *bin) pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); return (run); } @@ -1076,7 +1069,7 @@ static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); @@ -1088,7 +1081,7 @@ static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); @@ -1331,9 +1324,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } if (config_prof) arena_prof_accum(arena, size); @@ -1401,9 +1394,9 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); @@ -1428,13 +1421,12 @@ arena_salloc(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1458,11 +1450,11 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr) == PAGE_SIZE); + assert(isalloc(ptr) == PAGE); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & @@ -1480,13 +1472,12 @@ arena_salloc_demote(const void *ptr) assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; mapbits = chunk->map[pageind-map_bias].bits; assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + @@ -1496,7 +1487,7 @@ arena_salloc_demote(const void *ptr) } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; - if (prof_promote && ret == PAGE_SIZE && (mapbits & + if (prof_promote && ret == PAGE && (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; @@ -1542,18 +1533,18 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert(run != bin->runcur); assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); + (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; malloc_mutex_unlock(&bin->lock); /******************************/ - npages = bin_info->run_size >> PAGE_SHIFT; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); + npages = bin_info->run_size >> LG_PAGE; + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); past = (size_t)(PAGE_CEILING((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); + bin_info->reg_size) - (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); /* @@ -1573,8 +1564,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, chunk->map[run_ind-map_bias].bits = bin_info->run_size | CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), - ((past - run_ind) << PAGE_SHIFT), false); + arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), + ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ } arena_run_dalloc(arena, run, true); @@ -1615,9 +1606,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_bin_t *bin; size_t size; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; size_t binind = arena_bin_index(arena, bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -1692,8 +1683,7 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - PAGE_SHIFT; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; if (config_fill && config_stats && opt_junk) @@ -1701,8 +1691,8 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; + arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; } } @@ -1726,15 +1716,15 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); } @@ -1743,8 +1733,8 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t npages = oldsize >> PAGE_SHIFT; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t npages = oldsize >> LG_PAGE; size_t followsize; assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); @@ -1766,10 +1756,10 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); + ((pageind+npages) << LG_PAGE)), splitsize, true, zero); size = oldsize + splitsize; - npages = size >> PAGE_SHIFT; + npages = size >> LG_PAGE; /* * Mark the extended run as dirty if either portion of the run @@ -1791,18 +1781,18 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); return (false); @@ -2023,7 +2013,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) uint32_t try_ctx0_offset, good_ctx0_offset; uint32_t try_reg0_offset, good_reg0_offset; - assert(min_run_size >= PAGE_SIZE); + assert(min_run_size >= PAGE); assert(min_run_size <= arena_maxclass); /* @@ -2076,7 +2066,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_reg0_offset = try_reg0_offset; /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; + try_run_size += PAGE; try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + 1; /* Counter-act try_nregs-- in loop. */ @@ -2127,7 +2117,7 @@ static void bin_info_init(void) { arena_bin_info_t *bin_info; - size_t prev_run_size = PAGE_SIZE; + size_t prev_run_size = PAGE; #define SIZE_CLASS(bin, delta, size) \ bin_info = &arena_bin_info[bin]; \ @@ -2158,14 +2148,14 @@ arena_boot(void) */ map_bias = 0; for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) - + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); - map_bias = (header_size >> PAGE_SHIFT) + ((header_size & - PAGE_MASK) != 0); + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) + != 0); } assert(map_bias > 0); - arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); + arena_maxclass = chunksize - (map_bias << LG_PAGE); bin_info_init(); } diff --git a/src/chunk.c b/src/chunk.c index f50e8409..8fcd61e4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -105,9 +105,9 @@ chunk_boot0(void) /* Set variables according to the value of opt_lg_chunk. */ chunksize = (ZU(1) << opt_lg_chunk); - assert(chunksize >= PAGE_SIZE); + assert(chunksize >= PAGE); chunksize_mask = chunksize - 1; - chunk_npages = (chunksize >> PAGE_SHIFT); + chunk_npages = (chunksize >> LG_PAGE); if (config_stats || config_prof) { if (malloc_mutex_init(&chunks_mtx)) diff --git a/src/ctl.c b/src/ctl.c index 08011616..2afca51a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -83,8 +83,7 @@ INDEX_PROTO(arenas_lrun_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_pagesize) -CTL_PROTO(arenas_chunksize) +CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) @@ -227,8 +226,7 @@ static const ctl_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("pagesize"), CTL(arenas_pagesize)}, - {NAME("chunksize"), CTL(arenas_chunksize)}, + {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, @@ -520,7 +518,7 @@ ctl_refresh(void) + ctl_stats.arenas[narenas].astats.allocated_large + ctl_stats.huge.allocated; ctl_stats.active = (ctl_stats.arenas[narenas].pactive << - PAGE_SHIFT) + ctl_stats.huge.allocated; + LG_PAGE) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -1116,7 +1114,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_bin_i_node); } -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) const ctl_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1155,8 +1153,7 @@ RETURN: } CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) -CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) +CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) diff --git a/src/jemalloc.c b/src/jemalloc.c index 1deabcd9..c7019220 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -25,12 +25,6 @@ bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - unsigned ncpus; malloc_mutex_t arenas_lock; @@ -477,7 +471,7 @@ malloc_conf_init(void) * Chunks always require at least one * header page, * plus one data page. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, PAGE_SHIFT+1, + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE+1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, @@ -550,25 +544,6 @@ malloc_init_hard(void) #endif malloc_initializer = INITIALIZER; -#ifdef DYNAMIC_PAGE_SHIFT - /* Get page size. */ - { - long result; - - result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (size_t)result; - - /* - * We assume that pagesize is a power of 2 when calculating - * pagesize_mask and lg_pagesize. - */ - assert(((result - 1) & result) == 0); - pagesize_mask = result - 1; - lg_pagesize = ffs((int)result) - 1; - } -#endif - malloc_tsd_boot(); if (config_prof) prof_boot0(); @@ -1145,7 +1120,7 @@ void * je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); - imemalign(&ret, PAGE_SIZE, size, 1); + imemalign(&ret, PAGE, size, 1); return (ret); } #endif @@ -1386,7 +1361,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) alignment, zero, no_move); if (q == NULL) goto ERR; - if (max_usize < PAGE_SIZE) { + if (max_usize < PAGE) { usize = max_usize; arena_prof_promoted(q, usize); } else diff --git a/src/prof.c b/src/prof.c index bc21d894..d2532ec1 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1179,7 +1179,7 @@ prof_boot1(void) prof_interval = 0; } - prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT); + prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); } bool diff --git a/src/stats.c b/src/stats.c index f494974b..ca6d408c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -412,6 +412,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.quantum", &sv, size_t); malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + CTL_GET("arenas.page", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { malloc_cprintf(write_cb, cbopaque, diff --git a/src/tcache.c b/src/tcache.c index bc911a6e..6a7f17bf 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -72,7 +72,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) { size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> PAGE_SHIFT; + (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_dalloc_bin(arena, chunk, ptr, mapelm); @@ -303,11 +303,11 @@ tcache_destroy(tcache_t *tcache) arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - PAGE_SHIFT; + LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << - PAGE_SHIFT)); + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; malloc_mutex_lock(&bin->lock); @@ -398,7 +398,7 @@ tcache_boot0(void) else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * diff --git a/src/zone.c b/src/zone.c index 6c1e415b..a50c129c 100644 --- a/src/zone.c +++ b/src/zone.c @@ -78,7 +78,7 @@ zone_valloc(malloc_zone_t *zone, size_t size) { void *ret = NULL; /* Assignment avoids useless compiler warning. */ - je_posix_memalign(&ret, PAGE_SIZE, size); + je_posix_memalign(&ret, PAGE, size); return (ret); } From 9d4d76874d888986e611bd9c9a0c905841956c4d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 2 Apr 2012 07:15:42 -0700 Subject: [PATCH 092/205] Finish renaming "arenas.pagesize" to "arenas.page". --- src/stats.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/stats.c b/src/stats.c index ca6d408c..83baf568 100644 --- a/src/stats.c +++ b/src/stats.c @@ -57,11 +57,11 @@ static void stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t pagesize; + size_t page; bool config_tcache; unsigned nbins, j, gap_start; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { @@ -129,7 +129,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / page, allocated, nmalloc, ndalloc, nrequests, nfills, nflushes, nruns, reruns, curruns); } else { @@ -137,7 +137,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / page, allocated, nmalloc, ndalloc, nruns, reruns, curruns); } @@ -159,10 +159,10 @@ static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i) { - size_t pagesize, nlruns, j; + size_t page, nlruns, j; ssize_t gap_start; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); malloc_cprintf(write_cb, cbopaque, "large: size pages nmalloc ndalloc nrequests" @@ -193,7 +193,7 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - run_size, run_size / pagesize, nmalloc, ndalloc, + run_size, run_size / page, nmalloc, ndalloc, nrequests, curruns); } } @@ -206,14 +206,14 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i, bool bins, bool large) { unsigned nthreads; - size_t pagesize, pactive, pdirty, mapped; + size_t page, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; uint64_t small_nmalloc, small_ndalloc, small_nrequests; size_t large_allocated; uint64_t large_nmalloc, large_ndalloc, large_nrequests; - CTL_GET("arenas.pagesize", &pagesize, size_t); + CTL_GET("arenas.page", &page, size_t); CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, @@ -251,8 +251,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, small_nmalloc + large_nmalloc, small_ndalloc + large_ndalloc, small_nrequests + large_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", - pactive * pagesize ); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); From 12a6845b6c91cf76caf3199495d76b16bba1f2fe Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 13:20:21 -0700 Subject: [PATCH 093/205] Use $((...)) instead of expr. Use $((...)) for math in size_classes.h rather than expr, because it is much faster. This is not supported syntax in the classic Bourne shell, but all modern sh implementations support it, including bash, zsh, and ash. --- include/jemalloc/internal/size_classes.sh | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 9829a2b3..3d236136 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -17,8 +17,8 @@ pow2() { e=$1 pow2_result=1 while [ ${e} -gt 0 ] ; do - pow2_result=`expr ${pow2_result} + ${pow2_result}` - e=`expr ${e} - 1` + pow2_result=$((${pow2_result} + ${pow2_result})) + e=$((${e} - 1)) done } @@ -45,7 +45,7 @@ EOF bin=0 psz=0 sz=${t} - delta=`expr ${sz} - ${psz}` + delta=$((${sz} - ${psz})) cat < Date: Tue, 3 Apr 2012 01:33:55 -0700 Subject: [PATCH 094/205] Remove obsolete "config.dynamic_page_shift" mallctl documentation. --- doc/jemalloc.xml.in | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 28760b21..7a2d033d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -630,16 +630,6 @@ for (i = 0; i < nbins; i++) { build configuration. - - - config.dynamic_page_shift - (bool) - r- - - was - specified during build configuration. - - config.fill From 633aaff96787db82c06d35baf012de197a1a1902 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 08:47:07 -0700 Subject: [PATCH 095/205] Postpone mutex initialization on FreeBSD. Postpone mutex initialization on FreeBSD until after base allocation is safe. --- include/jemalloc/internal/mutex.h | 32 +++++++++++++++++++++-------- src/jemalloc.c | 5 +++++ src/mutex.c | 34 +++++++++++++++++++++++++++---- 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index ad4f9c24..c46feee3 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -1,18 +1,20 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +typedef struct malloc_mutex_s malloc_mutex_t; + #ifdef JEMALLOC_OSSPIN -typedef OSSpinLock malloc_mutex_t; -#define MALLOC_MUTEX_INITIALIZER 0 +#define MALLOC_MUTEX_INITIALIZER {0} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -typedef pthread_mutex_t malloc_mutex_t; # if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} # else # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} # endif #endif @@ -20,6 +22,17 @@ typedef pthread_mutex_t malloc_mutex_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct malloc_mutex_s { +#ifdef JEMALLOC_OSSPIN + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -34,6 +47,7 @@ bool malloc_mutex_init(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -51,9 +65,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef JEMALLOC_OSSPIN - OSSpinLockLock(mutex); + OSSpinLockLock(&mutex->lock); #else - pthread_mutex_lock(mutex); + pthread_mutex_lock(&mutex->lock); #endif } } @@ -64,9 +78,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef JEMALLOC_OSSPIN - OSSpinLockUnlock(mutex); + OSSpinLockUnlock(&mutex->lock); #else - pthread_mutex_unlock(mutex); + pthread_mutex_unlock(&mutex->lock); #endif } } diff --git a/src/jemalloc.c b/src/jemalloc.c index c7019220..a6d2df57 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -651,6 +651,11 @@ malloc_init_hard(void) return (true); } + if (mutex_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (opt_narenas == 0) { /* * For SMP systems, create more than one arena per CPU by diff --git a/src/mutex.c b/src/mutex.c index 0b20bbf3..4b8ce570 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -11,6 +11,10 @@ #ifdef JEMALLOC_LAZY_LOCK bool isthreaded = false; #endif +#ifdef JEMALLOC_MUTEX_INIT_CB +static bool postpone_init = true; +static malloc_mutex_t *postponed_mutexes = NULL; +#endif #ifdef JEMALLOC_LAZY_LOCK static void pthread_create_once(void); @@ -65,17 +69,23 @@ bool malloc_mutex_init(malloc_mutex_t *mutex) { #ifdef JEMALLOC_OSSPIN - *mutex = 0; + mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) - if (_pthread_mutex_init_calloc_cb(mutex, base_calloc) != 0) - return (true); + if (postpone_init) { + mutex->postponed_next = postponed_mutexes; + postponed_mutexes = mutex; + } else { + if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != + 0) + return (true); + } #else pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr) != 0) return (true); pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); - if (pthread_mutex_init(mutex, &attr) != 0) { + if (pthread_mutex_init(&mutex->lock, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); } @@ -114,3 +124,19 @@ malloc_mutex_postfork_child(malloc_mutex_t *mutex) } #endif } + +bool +mutex_boot(void) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + postpone_init = false; + while (postponed_mutexes != NULL) { + if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, + base_calloc) != 0) + return (true); + postponed_mutexes = postponed_mutexes->postponed_next; + } +#endif + return (false); +} From 01b3fe55ff3ac8e4aa689f09fcb0729da8037638 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 09:28:00 -0700 Subject: [PATCH 096/205] Add a0malloc(), a0calloc(), and a0free(). Add a0malloc(), a0calloc(), and a0free(), which are used by FreeBSD's libc to allocate/deallocate TLS in static binaries. --- include/jemalloc/internal/arena.h | 36 +++++++------ .../jemalloc/internal/jemalloc_internal.h.in | 19 ++++--- include/jemalloc/internal/tcache.h | 2 +- src/arena.c | 6 +-- src/ctl.c | 2 +- src/jemalloc.c | 53 ++++++++++++++++++- 6 files changed, 89 insertions(+), 29 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 41df11fd..03e3f3ce 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -373,7 +373,7 @@ void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); + size_t alignment, bool zero, bool try_tcache); bool arena_new(arena_t *arena, unsigned ind); void arena_boot(void); void arena_prefork(arena_t *arena); @@ -390,9 +390,10 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -void *arena_malloc(size_t size, bool zero); +void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, + bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -548,7 +549,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } JEMALLOC_INLINE void * -arena_malloc(size_t size, bool zero) +arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) { tcache_t *tcache; @@ -556,20 +557,24 @@ arena_malloc(size_t size, bool zero) assert(size <= arena_maxclass); if (size <= SMALL_MAXCLASS) { - if ((tcache = tcache_get(true)) != NULL) + if (try_tcache && (tcache = tcache_get(true)) != NULL) return (tcache_alloc_small(tcache, size, zero)); - else - return (arena_malloc_small(choose_arena(), size, zero)); + else { + return (arena_malloc_small(choose_arena(arena), size, + zero)); + } } else { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (size <= tcache_maxclass && (tcache = tcache_get(true)) != - NULL) + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(true)) != NULL) return (tcache_alloc_large(tcache, size, zero)); - else - return (arena_malloc_large(choose_arena(), size, zero)); + else { + return (arena_malloc_large(choose_arena(arena), size, + zero)); + } } } @@ -587,11 +592,11 @@ arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) } JEMALLOC_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind; arena_chunk_map_t *mapelm; - tcache_t *tcache = tcache_get(false); + tcache_t *tcache; assert(arena != NULL); assert(chunk->arena == arena); @@ -603,7 +608,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - if (tcache != NULL) + if (try_tcache && (tcache = tcache_get(false)) != NULL) tcache_dalloc_small(tcache, ptr); else { arena_run_t *run; @@ -630,7 +635,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass && tcache != NULL) { + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); } else { malloc_mutex_lock(&arena->lock); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 0c22bfb7..c8e40198 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -399,7 +399,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -arena_t *choose_arena(void); +arena_t *choose_arena(arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -517,10 +517,13 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(void) +choose_arena(arena_t *arena) { arena_t *ret; + if (arena != NULL) + return (arena); + if ((ret = *arenas_tsd_get()) == NULL) { ret = choose_arena_hard(); assert(ret != NULL); @@ -556,7 +559,7 @@ imalloc(size_t size) assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(size, false)); + return (arena_malloc(NULL, size, false, true)); else return (huge_malloc(size, false)); } @@ -566,7 +569,7 @@ icalloc(size_t size) { if (size <= arena_maxclass) - return (arena_malloc(size, true)); + return (arena_malloc(NULL, size, true, true)); else return (huge_malloc(size, true)); } @@ -580,7 +583,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) assert(usize == sa2u(usize, alignment, NULL)); if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(usize, zero); + ret = arena_malloc(NULL, usize, zero, true); else { size_t run_size JEMALLOC_CC_SILENCE_INIT(0); @@ -594,7 +597,7 @@ ipalloc(size_t usize, size_t alignment, bool zero) */ sa2u(usize, alignment, &run_size); if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, + ret = arena_palloc(choose_arena(NULL), usize, run_size, alignment, zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); @@ -647,7 +650,7 @@ idalloc(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr); + arena_dalloc(chunk->arena, chunk, ptr, true); else huge_dalloc(ptr, true); } @@ -711,7 +714,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } else { if (size + extra <= arena_maxclass) { return (arena_ralloc(ptr, oldsize, size, extra, - alignment, zero)); + alignment, zero, true)); } else { return (huge_ralloc(ptr, oldsize, size, extra, alignment, zero)); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index a1d9aae3..93e721d5 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -237,7 +237,7 @@ tcache_get(bool create) tcache_enabled_set(false); /* Memoize. */ return (NULL); } - return (tcache_create(choose_arena())); + return (tcache_create(choose_arena(NULL))); } if (tcache == TCACHE_STATE_PURGATORY) { /* diff --git a/src/arena.c b/src/arena.c index b7e14228..64440996 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1888,7 +1888,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, void * arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) + size_t alignment, bool zero, bool try_tcache) { void *ret; size_t copysize; @@ -1909,7 +1909,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); ret = ipalloc(usize, alignment, zero); } else - ret = arena_malloc(size + extra, zero); + ret = arena_malloc(NULL, size + extra, zero, try_tcache); if (ret == NULL) { if (extra == 0) @@ -1921,7 +1921,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); ret = ipalloc(usize, alignment, zero); } else - ret = arena_malloc(size, zero); + ret = arena_malloc(NULL, size, zero, try_tcache); if (ret == NULL) return (NULL); diff --git a/src/ctl.c b/src/ctl.c index 2afca51a..6777688a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1016,7 +1016,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; unsigned newind, oldind; - newind = oldind = choose_arena()->ind; + newind = oldind = choose_arena(NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { diff --git a/src/jemalloc.c b/src/jemalloc.c index a6d2df57..690cf082 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1487,7 +1487,6 @@ je_nallocm(size_t *rsize, size_t size, int flags) * End experimental functions. */ /******************************************************************************/ - /* * The following functions are used by threading libraries for protection of * malloc during fork(). @@ -1552,3 +1551,55 @@ jemalloc_postfork_child(void) } /******************************************************************************/ +/* + * The following functions are used for TLS allocation/deallocation in static + * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() + * is that these avoid accessing TLS variables. + */ + +static void * +a0alloc(size_t size, bool zero) +{ + + if (malloc_init()) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + return (arena_malloc(arenas[0], size, zero, false)); + else + return (huge_malloc(size, zero)); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, false); + else + huge_dalloc(ptr, true); +} + +/******************************************************************************/ From 3cc1f1aa6981d6647aa01cec725fb2c134c1b0e9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 3 Apr 2012 22:30:05 -0700 Subject: [PATCH 097/205] Add tls_model configuration. The tls_model attribute isn't supporte by clang (yet?), so add a configure test that defines JEMALLOC_TLS_MODEL appropriately. --- configure.ac | 17 ++++++++++++++++- include/jemalloc/internal/tsd.h | 6 +++--- include/jemalloc/jemalloc_defs.h.in | 3 +++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index c1b46dc3..e704d2de 100644 --- a/configure.ac +++ b/configure.ac @@ -285,6 +285,21 @@ if test "x${je_cv_attribute}" = "xyes" ; then JE_CFLAGS_APPEND([-fvisibility=hidden]) fi fi +dnl Check for tls_model attribute support (clang 3.0 still lacks support). +SAVED_CFLAGS="${CFLAGS}" +JE_CFLAGS_APPEND([-Werror]) +JE_COMPILABLE([tls_model attribute], [], + [static __thread int + __attribute__((tls_model("initial-exec"))) foo; + foo = 0;], + [je_cv_tls_model]) +CFLAGS="${SAVED_CFLAGS}" +if test "x${je_cv_tls_model}" = "xyes" ; then + AC_DEFINE([JEMALLOC_TLS_MODEL], + [__attribute__((tls_model("initial-exec")))]) +else + AC_DEFINE([JEMALLOC_TLS_MODEL], [ ]) +fi JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ #define _GNU_SOURCE @@ -719,7 +734,7 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], return 1; } fprintf(f, "%u\n", ffs((int)result) - 1); - close(f); + fclose(f); return 0; ]])], diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 60aaa427..35ae5e3c 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -87,14 +87,14 @@ extern bool a_name##_booted; /* malloc_tsd_data(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ a_name##_initialized = false; \ a_attr bool a_name##_booted = false; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_ATTR(tls_model("initial-exec")) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ a_attr pthread_key_t a_name##_tsd; \ a_attr bool a_name##_booted = false; diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index e4bfa04a..040753a6 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ # define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif +/* Non-empty if the tls_model attribute is supported. */ +#undef JEMALLOC_TLS_MODEL + /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ #undef JEMALLOC_CC_SILENCE From bbe53b1c16d523d3c70cf8e942249f7d76f90e73 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 15:24:01 -0700 Subject: [PATCH 098/205] Revert "Use ffsl() in ALLOCM_ALIGN()." This reverts commit 722b370399fd6734de6781285ce9a0cffd547bdd. Unfortunately, glibc requires _GNU_SOURCE to be defined before including string.h, but there is no reliable way to get the prototype within jemalloc.h unless _GNU_SOURCE was already defined. --- include/jemalloc/jemalloc.h.in | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 8825a943..f0581dbd 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -4,6 +4,7 @@ extern "C" { #endif +#include #include #define JEMALLOC_VERSION "@jemalloc_version@" @@ -17,7 +18,11 @@ extern "C" { #ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) -#define ALLOCM_ALIGN(a) (ffsl(a)-1) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif #define ALLOCM_ZERO ((int)0x40) #define ALLOCM_NO_MOVE ((int)0x80) From 382132eeacd9311a7a25d5b8f126d82ef453d60d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 15:25:43 -0700 Subject: [PATCH 099/205] Add missing include for ffsl() test. --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index e704d2de..8e6a3099 100644 --- a/configure.ac +++ b/configure.ac @@ -874,6 +874,7 @@ dnl Check for ffsl(3), and fail if not found. This function exists on all dnl platforms that jemalloc currently has a chance of functioning on without dnl modification. JE_COMPILABLE([a program using ffsl], [ +#include #include ], [ { From f3ca7c8386f6a21347aed68053117c2c59939551 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 4 Apr 2012 16:16:09 -0700 Subject: [PATCH 100/205] Add missing "opt.lg_tcache_max" mallctl implementation. --- src/ctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ctl.c b/src/ctl.c index 6777688a..df7affdc 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -66,6 +66,7 @@ CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -188,6 +189,7 @@ static const ctl_node_t opt_node[] = { {NAME("zero"), CTL(opt_zero)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1091,6 +1093,7 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ From 02b231205e802a7c4f33899a569adcb1312a85d5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Apr 2012 11:06:23 -0700 Subject: [PATCH 101/205] Fix threaded initialization and enable it on Linux. Reported by Mike Hommey. --- configure.ac | 1 + src/jemalloc.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 8e6a3099..16f03784 100644 --- a/configure.ac +++ b/configure.ac @@ -228,6 +228,7 @@ case "${host}" in CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) + AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) RPATH="-Wl,-rpath," ;; *-*-netbsd*) diff --git a/src/jemalloc.c b/src/jemalloc.c index 690cf082..a531a216 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -36,13 +36,15 @@ static bool malloc_initialized = false; #ifdef JEMALLOC_THREADED_INIT /* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; +# define NO_INITIALIZER ((unsigned long)0) # define INITIALIZER pthread_self() # define IS_INITIALIZER (malloc_initializer == pthread_self()) +static pthread_t malloc_initializer = NO_INITIALIZER; #else -static bool malloc_initializer = false; +# define NO_INITIALIZER false # define INITIALIZER true # define IS_INITIALIZER malloc_initializer +static bool malloc_initializer = NO_INITIALIZER; #endif /* Used to avoid initialization races. */ @@ -531,7 +533,7 @@ malloc_init_hard(void) return (false); } #ifdef JEMALLOC_THREADED_INIT - if (IS_INITIALIZER == false) { + if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { /* Busy-wait until the initializing thread completes. */ do { malloc_mutex_unlock(&init_lock); From b147611b5253921a873191bb0589d3b18f613946 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 5 Apr 2012 13:36:17 -0700 Subject: [PATCH 102/205] Add utrace(2)-based tracing (--enable-utrace). --- INSTALL | 4 ++ configure.ac | 29 ++++++++++++++ doc/jemalloc.xml.in | 25 ++++++++++++ .../jemalloc/internal/jemalloc_internal.h.in | 12 ++++++ include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/jemalloc_defs.h.in | 3 ++ src/ctl.c | 6 +++ src/jemalloc.c | 38 ++++++++++++++++++- src/stats.c | 1 + 9 files changed, 118 insertions(+), 1 deletion(-) diff --git a/INSTALL b/INSTALL index c0ae106a..8a825df9 100644 --- a/INSTALL +++ b/INSTALL @@ -119,6 +119,10 @@ any of the following arguments (not a definitive list) to 'configure': --disable-experimental Disable support for the experimental API (*allocm()). +--enable-utrace + Enable utrace(2)-based allocation tracing. This feature is not broadly + portable (FreeBSD has it, but Linux and OS X do not). + --enable-xmalloc Enable support for optional immediate termination due to out-of-memory errors, as is commonly implemented by "xmalloc" wrapper function for malloc. diff --git a/configure.ac b/configure.ac index 16f03784..8e94b5c9 100644 --- a/configure.ac +++ b/configure.ac @@ -699,6 +699,34 @@ if test "x$enable_fill" = "x1" ; then fi AC_SUBST([enable_fill]) +dnl Disable utrace(2)-based tracing by default. +AC_ARG_ENABLE([utrace], + [AS_HELP_STRING([--enable-utrace], [Enable utrace(2)-based tracing])], +[if test "x$enable_utrace" = "xno" ; then + enable_utrace="0" +else + enable_utrace="1" +fi +], +[enable_utrace="0"] +) +JE_COMPILABLE([utrace(2)], [ +#include +#include +#include +#include +#include +], [ + utrace((void *)0, 0); +], [je_cv_utrace]) +if test "x${je_cv_utrace}" = "xno" ; then + enable_utrace="0" +fi +if test "x$enable_utrace" = "x1" ; then + AC_DEFINE([JEMALLOC_UTRACE], [ ]) +fi +AC_SUBST([enable_utrace]) + dnl Do not support the xmalloc option by default. AC_ARG_ENABLE([xmalloc], [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], @@ -1061,6 +1089,7 @@ AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 7a2d033d..8ae82621 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -710,6 +710,16 @@ for (i = 0; i < nbins; i++) { build configuration. + + + config.utrace + (bool) + r- + + was specified during + build configuration. + + config.xmalloc @@ -826,6 +836,19 @@ for (i = 0; i < nbins; i++) { + + + opt.utrace + (bool) + r- + [] + + Allocation tracing based on + utrace + 2 enabled/disabled. This option + is disabled by default. + + opt.xmalloc @@ -1958,6 +1981,8 @@ malloc_conf = "lg_chunk:24";]]> 2, sbrk 2, + utrace + 2, alloca 3, atexit diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index c8e40198..66dd357f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -36,6 +36,10 @@ #define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" +#ifdef JEMALLOC_UTRACE +#include +#endif + #include "jemalloc/internal/private_namespace.h" #ifdef JEMALLOC_CC_SILENCE @@ -114,6 +118,13 @@ static const bool config_tls = false #endif ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; static const bool config_xmalloc = #ifdef JEMALLOC_XMALLOC true @@ -332,6 +343,7 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; +extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index ed34e328..de3042eb 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -173,6 +173,7 @@ #define opt_prof_leak JEMALLOC_N(opt_prof_leak) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_utrace JEMALLOC_N(opt_utrace) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define pow2_ceil JEMALLOC_N(pow2_ceil) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 040753a6..8e7442d6 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -154,6 +154,9 @@ /* Support the experimental API. */ #undef JEMALLOC_EXPERIMENTAL +/* Support utrace(2)-based tracing. */ +#undef JEMALLOC_UTRACE + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/ctl.c b/src/ctl.c index df7affdc..1aaf1971 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -56,6 +56,7 @@ CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) +CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_lg_chunk) @@ -64,6 +65,7 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) +CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) @@ -176,6 +178,7 @@ static const ctl_node_t config_node[] = { {NAME("stats"), CTL(config_stats)}, {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -187,6 +190,7 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, + {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, @@ -1080,6 +1084,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1091,6 +1096,7 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index a531a216..e0510209 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -21,6 +21,7 @@ bool opt_junk = false; bool opt_abort = false; bool opt_junk = false; #endif +bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -50,6 +51,26 @@ static bool malloc_initializer = NO_INITIALIZER; /* Used to avoid initialization races. */ static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +typedef struct { + void *p; /* Input pointer (as in realloc(p, s)). */ + size_t s; /* Request size. */ + void *r; /* Result pointer. */ +} malloc_utrace_t; + +#ifdef JEMALLOC_UTRACE +# define UTRACE(a, b, c) do { \ + if (opt_utrace) { \ + malloc_utrace_t ut; \ + ut.p = (a); \ + ut.s = (b); \ + ut.r = (c); \ + utrace(&ut, sizeof(ut)); \ + } \ +} while (0) +#else +# define UTRACE(a, b, c) +#endif + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -483,6 +504,9 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_junk, junk) CONF_HANDLE_BOOL(opt_zero, zero) } + if (config_utrace) { + CONF_HANDLE_BOOL(opt_utrace, utrace) + } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } @@ -759,6 +783,7 @@ OOM: assert(usize == isalloc(ret)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, size, ret); return (ret); } @@ -852,6 +877,7 @@ RETURN: } if (config_prof && opt_prof && result != NULL) prof_malloc(result, usize, cnt); + UTRACE(0, size, result); return (ret); } @@ -951,6 +977,7 @@ RETURN: assert(usize == isalloc(ret)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, num_size, ret); return (ret); } @@ -1075,6 +1102,7 @@ RETURN: ta->allocated += usize; ta->deallocated += old_size; } + UTRACE(ptr, size, ret); return (ret); } @@ -1083,6 +1111,7 @@ void je_free(void *ptr) { + UTRACE(ptr, 0, 0); if (ptr != NULL) { size_t usize; @@ -1310,6 +1339,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(usize == isalloc(p)); thread_allocated_tsd_get()->allocated += usize; } + UTRACE(0, size, p); return (ALLOCM_SUCCESS); OOM: if (config_xmalloc && opt_xmalloc) { @@ -1318,6 +1348,7 @@ OOM: abort(); } *ptr = NULL; + UTRACE(0, size, 0); return (ALLOCM_ERR_OOM); } @@ -1404,16 +1435,20 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) ta->allocated += usize; ta->deallocated += old_size; } + UTRACE(p, size, q); return (ALLOCM_SUCCESS); ERR: - if (no_move) + if (no_move) { + UTRACE(p, size, q); return (ALLOCM_ERR_NOT_MOVED); + } OOM: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); abort(); } + UTRACE(p, size, 0); return (ALLOCM_ERR_OOM); } @@ -1448,6 +1483,7 @@ je_dallocm(void *ptr, int flags) assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + UTRACE(ptr, 0, 0); if (config_stats) usize = isalloc(ptr); if (config_prof && opt_prof) { diff --git a/src/stats.c b/src/stats.c index 83baf568..0cd70b0d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -383,6 +383,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(utrace) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_max) From fad100bc35efba262f2c98cefc134899d393c734 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 12:24:46 -0700 Subject: [PATCH 103/205] Remove arena_malloc_prechosen(). Remove arena_malloc_prechosen(), now that arena_malloc() can be invoked in a way that is semantically equivalent. --- include/jemalloc/internal/arena.h | 14 -------------- src/tsd.c | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 03e3f3ce..d25a2b1d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -391,7 +391,6 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); -void *arena_malloc_prechosen(arena_t *arena, size_t size, bool zero); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif @@ -578,19 +577,6 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } } -JEMALLOC_INLINE void * -arena_malloc_prechosen(arena_t *arena, size_t size, bool zero) -{ - - assert(size != 0); - assert(size <= arena_maxclass); - - if (size <= SMALL_MAXCLASS) - return (arena_malloc_small(arena, size, zero)); - else - return (arena_malloc_large(arena, size, zero)); -} - JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { diff --git a/src/tsd.c b/src/tsd.c index 669ea8fc..0838dc86 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return arena_malloc_prechosen(arenas[0], size, false); + return arena_malloc(arenas[0], size, false, false); } void From 3701367e4ca6b77109e1cce0a5b98a8ac69cf505 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 12:41:55 -0700 Subject: [PATCH 104/205] Always initialize tcache data structures. Always initialize tcache data structures if the tcache configuration option is enabled, regardless of opt_tcache. This fixes "thread.tcache.enabled" mallctl manipulation in the case when opt_tcache is false. --- src/tcache.c | 82 ++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/src/tcache.c b/src/tcache.c index 6a7f17bf..99a657b6 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -334,17 +334,17 @@ tcache_thread_cleanup(void *arg) } else if (tcache == TCACHE_STATE_REINCARNATED) { /* * Another destructor called an allocator function after this - * destructor was called. Reset tcache to 1 in order to - * receive another callback. + * destructor was called. Reset tcache to + * TCACHE_STATE_PURGATORY in order to receive another callback. */ tcache = TCACHE_STATE_PURGATORY; tcache_tsd_set(&tcache); } else if (tcache == TCACHE_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key - * to 1 so that other destructors wouldn't cause re-creation of - * the tcache. This time, do nothing, so that the destructor - * will not be called again. + * to TCACHE_STATE_PURGATORY so that other destructors wouldn't + * cause re-creation of the tcache. This time, do nothing, so + * that the destructor will not be called again. */ } else if (tcache != NULL) { assert(tcache != TCACHE_STATE_PURGATORY); @@ -381,46 +381,40 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) bool tcache_boot0(void) { + unsigned i; - if (opt_tcache) { - unsigned i; + /* + * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * known. + */ + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; + else if ((1U << opt_lg_tcache_max) > arena_maxclass) + tcache_maxclass = arena_maxclass; + else + tcache_maxclass = (1U << opt_lg_tcache_max); - /* - * If necessary, clamp opt_lg_tcache_max, now that - * SMALL_MAXCLASS and arena_maxclass are known. - * XXX Can this be done earlier? - */ - if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < - SMALL_MAXCLASS) - tcache_maxclass = SMALL_MAXCLASS; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; - else - tcache_maxclass = (1U << opt_lg_tcache_max); + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); - nhbins = NBINS + (tcache_maxclass >> LG_PAGE); - - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= - TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < NBINS; i++) { + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; } return (false); @@ -430,10 +424,8 @@ bool tcache_boot1(void) { - if (opt_tcache) { - if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) - return (true); - } + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) + return (true); return (false); } From a8683fbaf9056c16a1c5bb2606316ebc35f592a4 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 15:29:18 +0200 Subject: [PATCH 105/205] Ignore whitespaces when comparing test results with expected output In mingw, the test result may contain CRLF while the .exp files don't, or the other way around. --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 821c0634..7df4fc60 100644 --- a/Makefile.in +++ b/Makefile.in @@ -205,7 +205,7 @@ check: tests $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \ > @objroot@$${t}.out 2>&1; \ if test -e "@srcroot@$${t}.exp"; then \ - diff -u @srcroot@$${t}.exp \ + diff -w -u @srcroot@$${t}.exp \ @objroot@$${t}.out >/dev/null 2>&1; \ fail=$$?; \ if test "$${fail}" -eq "1" ; then \ From c5851eaf6e0edb35a499d62d30199e336da5ccb6 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 18:19:45 +0200 Subject: [PATCH 106/205] Remove MAP_NORESERVE support It was only used by the swap feature, and that is gone. --- include/jemalloc/internal/chunk_mmap.h | 1 - src/chunk_mmap.c | 41 +++++++++----------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 07b50a4d..3f603158 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -10,7 +10,6 @@ #ifdef JEMALLOC_H_EXTERNS void *chunk_alloc_mmap(size_t size); -void *chunk_alloc_mmap_noreserve(size_t size); void chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 749a2dac..3a8105df 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -15,16 +15,15 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *pages_map(void *addr, size_t size, bool noreserve); +static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, - bool noreserve); -static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); +static void *chunk_alloc_mmap_internal(size_t size); /******************************************************************************/ static void * -pages_map(void *addr, size_t size, bool noreserve) +pages_map(void *addr, size_t size) { void *ret; @@ -32,12 +31,8 @@ pages_map(void *addr, size_t size, bool noreserve) * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. */ - int flags = MAP_PRIVATE | MAP_ANON; -#ifdef MAP_NORESERVE - if (noreserve) - flags |= MAP_NORESERVE; -#endif - ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); assert(ret != NULL); if (ret == MAP_FAILED) @@ -78,7 +73,7 @@ pages_unmap(void *addr, size_t size) } static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) +chunk_alloc_mmap_slow(size_t size, bool unaligned) { void *ret; size_t offset; @@ -87,7 +82,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) if (size + chunksize <= size) return (NULL); - ret = pages_map(NULL, size + chunksize, noreserve); + ret = pages_map(NULL, size + chunksize); if (ret == NULL) return (NULL); @@ -126,7 +121,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) } static void * -chunk_alloc_mmap_internal(size_t size, bool noreserve) +chunk_alloc_mmap_internal(size_t size) { void *ret; @@ -161,7 +156,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { size_t offset; - ret = pages_map(NULL, size, noreserve); + ret = pages_map(NULL, size); if (ret == NULL) return (NULL); @@ -171,14 +166,13 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) mmap_unaligned_tsd_set(&mu); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset, noreserve) == NULL) { + chunksize - offset) == NULL) { /* * Extension failed. Clean up, then revert to * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true, - noreserve); + ret = chunk_alloc_mmap_slow(size, true); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -187,7 +181,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) } } } else - ret = chunk_alloc_mmap_slow(size, false, noreserve); + ret = chunk_alloc_mmap_slow(size, false); return (ret); } @@ -196,14 +190,7 @@ void * chunk_alloc_mmap(size_t size) { - return (chunk_alloc_mmap_internal(size, false)); -} - -void * -chunk_alloc_mmap_noreserve(size_t size) -{ - - return (chunk_alloc_mmap_internal(size, true)); + return (chunk_alloc_mmap_internal(size)); } void From eae269036c9f702d9fa9be497a1a2aa1be13a29e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 10 Apr 2012 19:50:33 +0200 Subject: [PATCH 107/205] Add alignment support to chunk_alloc(). --- include/jemalloc/internal/chunk.h | 2 +- include/jemalloc/internal/chunk_dss.h | 2 +- include/jemalloc/internal/chunk_mmap.h | 2 +- src/arena.c | 3 +- src/base.c | 2 +- src/chunk.c | 7 +- src/chunk_dss.c | 80 ++++++++++++++-------- src/chunk_mmap.c | 35 +++++----- src/huge.c | 92 +++----------------------- 9 files changed, 87 insertions(+), 138 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index 8e24e8f3..e047c2b1 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -42,7 +42,7 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool base, bool *zero); +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot0(void); bool chunk_boot1(void); diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index a39a2031..16ea9542 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -9,7 +9,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_dss(size_t size, bool *zero); +void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 3f603158..148fefef 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,7 +9,7 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size); +void *chunk_alloc_mmap(size_t size, size_t alignment); void chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/arena.c b/src/arena.c index 64440996..c84aaf47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -357,7 +357,8 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, + false, &zero); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); diff --git a/src/base.c b/src/base.c index 696c362a..bafaa743 100644 --- a/src/base.c +++ b/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, true, &zero); + base_pages = chunk_alloc(csize, chunksize, true, &zero); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/src/chunk.c b/src/chunk.c index 8fcd61e4..797bd341 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -27,19 +27,20 @@ size_t arena_maxclass; /* Max size class for arenas. */ * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, bool base, bool *zero) +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) { void *ret; assert(size != 0); assert((size & chunksize_mask) == 0); + assert((alignment & chunksize_mask) == 0); if (config_dss) { - ret = chunk_alloc_dss(size, zero); + ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) goto RETURN; } - ret = chunk_alloc_mmap(size); + ret = chunk_alloc_mmap(size, alignment); if (ret != NULL) { *zero = true; goto RETURN; diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 405dc29b..a81a271c 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -28,41 +28,50 @@ static extent_tree_t dss_chunks_ad; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle_dss(size_t size, bool *zero); +static void *chunk_recycle_dss(size_t size, size_t alignment, bool *zero); static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle_dss(size_t size, bool *zero) +chunk_recycle_dss(size_t size, size_t alignment, bool *zero) { extent_node_t *node, key; cassert(config_dss); key.addr = NULL; - key.size = size; + key.size = size + alignment - chunksize; malloc_mutex_lock(&dss_mtx); node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); if (node != NULL) { - void *ret = node->addr; + size_t offset = (size_t)((uintptr_t)(node->addr) & (alignment - + 1)); + void *ret; + if (offset > 0) + offset = alignment - offset; + ret = (void *)((uintptr_t)(node->addr) + offset); /* Remove node from the tree. */ extent_tree_szad_remove(&dss_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within dss_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; + extent_tree_ad_remove(&dss_chunks_ad, node); + if (offset > 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = offset; extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); } + if (alignment - chunksize > offset) { + if (offset > 0) + node = base_node_alloc(); + /* Insert the trailing space as a smaller chunk. */ + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = alignment - chunksize - offset; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + } else if (offset == 0) + base_node_dealloc(node); + malloc_mutex_unlock(&dss_mtx); if (*zero) @@ -75,13 +84,15 @@ chunk_recycle_dss(size_t size, bool *zero) } void * -chunk_alloc_dss(size_t size, bool *zero) +chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { void *ret; cassert(config_dss); + assert(size > 0 && (size & chunksize_mask) == 0); + assert(alignment > 0 && (alignment & chunksize_mask) == 0); - ret = chunk_recycle_dss(size, zero); + ret = chunk_recycle_dss(size, alignment, zero); if (ret != NULL) return (ret); @@ -94,6 +105,8 @@ chunk_alloc_dss(size_t size, bool *zero) malloc_mutex_lock(&dss_mtx); if (dss_prev != (void *)-1) { + size_t gap_size, cpad_size; + void *cpad, *dss_next; intptr_t incr; /* @@ -104,25 +117,36 @@ chunk_alloc_dss(size_t size, bool *zero) do { /* Get the current end of the DSS. */ dss_max = sbrk(0); - /* * Calculate how much padding is necessary to * chunk-align the end of the DSS. */ - incr = (intptr_t)size - - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); - if (incr == (intptr_t)size) - ret = dss_max; - else { - ret = (void *)((intptr_t)dss_max + incr); - incr += size; + gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & + chunksize_mask; + /* + * Compute how much chunk-aligned pad space (if any) is + * necessary to satisfy alignment. This space can be + * recycled for later use. + */ + cpad = (void *)((uintptr_t)dss_max + gap_size); + ret = (void *)(((uintptr_t)dss_max + (alignment - 1)) & + ~(alignment - 1)); + cpad_size = (uintptr_t)ret - (uintptr_t)cpad; + dss_next = (void *)((uintptr_t)ret + size); + if ((uintptr_t)ret < (uintptr_t)dss_max || + (uintptr_t)dss_next < (uintptr_t)dss_max) { + /* Wrap-around. */ + malloc_mutex_unlock(&dss_mtx); + return (NULL); } - + incr = gap_size + cpad_size + size; dss_prev = sbrk(incr); if (dss_prev == dss_max) { /* Success. */ - dss_max = (void *)((intptr_t)dss_prev + incr); + dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); + if (cpad_size != 0) + chunk_dealloc_dss(cpad, cpad_size); *zero = true; return (ret); } diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 3a8105df..37dad204 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -17,8 +17,9 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); -static void *chunk_alloc_mmap_internal(size_t size); +static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, + bool unaligned); +static void *chunk_alloc_mmap_internal(size_t size, size_t alignment); /******************************************************************************/ @@ -73,7 +74,7 @@ pages_unmap(void *addr, size_t size) } static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { void *ret; size_t offset; @@ -82,29 +83,26 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) if (size + chunksize <= size) return (NULL); - ret = pages_map(NULL, size + chunksize); + ret = pages_map(NULL, size + alignment); if (ret == NULL) return (NULL); /* Clean up unneeded leading/trailing space. */ - offset = CHUNK_ADDR2OFFSET(ret); + offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); if (offset != 0) { /* Note that mmap() returned an unaligned mapping. */ unaligned = true; /* Leading space. */ - pages_unmap(ret, chunksize - offset); + pages_unmap(ret, alignment - offset); - ret = (void *)((uintptr_t)ret + - (chunksize - offset)); + ret = (void *)((uintptr_t)ret + (alignment - offset)); /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), - offset); + pages_unmap((void *)((uintptr_t)ret + size), offset); } else { /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), - chunksize); + pages_unmap((void *)((uintptr_t)ret + size), alignment); } /* @@ -121,7 +119,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned) } static void * -chunk_alloc_mmap_internal(size_t size) +chunk_alloc_mmap_internal(size_t size, size_t alignment) { void *ret; @@ -160,7 +158,7 @@ chunk_alloc_mmap_internal(size_t size) if (ret == NULL) return (NULL); - offset = CHUNK_ADDR2OFFSET(ret); + offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); if (offset != 0) { bool mu = true; mmap_unaligned_tsd_set(&mu); @@ -172,7 +170,8 @@ chunk_alloc_mmap_internal(size_t size) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true); + ret = chunk_alloc_mmap_slow(size, alignment, + true); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -181,16 +180,16 @@ chunk_alloc_mmap_internal(size_t size) } } } else - ret = chunk_alloc_mmap_slow(size, false); + ret = chunk_alloc_mmap_slow(size, alignment, false); return (ret); } void * -chunk_alloc_mmap(size_t size) +chunk_alloc_mmap(size_t size, size_t alignment) { - return (chunk_alloc_mmap_internal(size)); + return (chunk_alloc_mmap_internal(size, alignment)); } void diff --git a/src/huge.c b/src/huge.c index a4e6cc8f..43c8f3b0 100644 --- a/src/huge.c +++ b/src/huge.c @@ -17,6 +17,13 @@ static extent_tree_t huge; void * huge_malloc(size_t size, bool zero) +{ + + return (huge_palloc(size, chunksize, zero)); +} + +void * +huge_palloc(size_t size, size_t alignment, bool zero) { void *ret; size_t csize; @@ -35,7 +42,7 @@ huge_malloc(size_t size, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, false, &zero); + ret = chunk_alloc(csize, alignment, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -64,89 +71,6 @@ huge_malloc(size_t size, bool zero) return (ret); } -/* Only handles large allocations that require more than chunk alignment. */ -void * -huge_palloc(size_t size, size_t alignment, bool zero) -{ - void *ret; - size_t alloc_size, chunk_size, offset; - extent_node_t *node; - - /* - * This allocation requires alignment that is even larger than chunk - * alignment. This means that huge_malloc() isn't good enough. - * - * Allocate almost twice as many chunks as are demanded by the size or - * alignment, in order to assure the alignment can be achieved, then - * unmap leading and trailing chunks. - */ - assert(alignment > chunksize); - - chunk_size = CHUNK_CEILING(size); - - if (size >= alignment) - alloc_size = chunk_size + alignment - chunksize; - else - alloc_size = (alignment << 1) - chunksize; - - /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); - if (node == NULL) - return (NULL); - - ret = chunk_alloc(alloc_size, false, &zero); - if (ret == NULL) { - base_node_dealloc(node); - return (NULL); - } - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & chunksize_mask) == 0); - assert(offset < alloc_size); - if (offset == 0) { - /* Trim trailing space. */ - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size, true); - } else { - size_t trailsize; - - /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset, true); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - trailsize = alloc_size - (alignment - offset) - chunk_size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize, true); - } - } - - /* Insert node into huge. */ - node->addr = ret; - node->size = chunk_size; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); - if (config_stats) { - stats_cactive_add(chunk_size); - huge_nmalloc++; - huge_allocated += chunk_size; - } - malloc_mutex_unlock(&huge_mtx); - - if (config_fill && zero == false) { - if (opt_junk) - memset(ret, 0xa5, chunk_size); - else if (opt_zero) - memset(ret, 0, chunk_size); - } - - return (ret); -} - void * huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) { From a1ee7838e14b321a97bfacb1f1cf5004198f2203 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 10 Apr 2012 15:07:44 -0700 Subject: [PATCH 108/205] Rename labels. Rename labels from FOO to label_foo in order to avoid system macro definitions, in particular OUT and ERROR on mingw. Reported by Mike Hommey. --- src/chunk.c | 6 +-- src/chunk_dss.c | 4 +- src/ckh.c | 16 +++---- src/ctl.c | 92 ++++++++++++++++++------------------ src/jemalloc.c | 58 +++++++++++------------ src/prof.c | 14 +++--- src/util.c | 24 +++++----- test/allocated.c | 16 +++---- test/mremap.c | 10 ++-- test/thread_arena.c | 8 ++-- test/thread_tcache_enabled.c | 10 ++-- 11 files changed, 129 insertions(+), 129 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 797bd341..b0641294 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -38,17 +38,17 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) - goto RETURN; + goto label_return; } ret = chunk_alloc_mmap(size, alignment); if (ret != NULL) { *zero = true; - goto RETURN; + goto label_return; } /* All strategies for allocation failed. */ ret = NULL; -RETURN: +label_return: if (config_ivsalloc && base == false && ret != NULL) { if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { chunk_dealloc(ret, size, true); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index a81a271c..ccd86b91 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -293,11 +293,11 @@ chunk_dealloc_dss(void *chunk, size_t size) madvise(chunk, size, MADV_DONTNEED); ret = false; - goto RETURN; + goto label_return; } ret = true; -RETURN: +label_return: malloc_mutex_unlock(&dss_mtx); return (ret); } diff --git a/src/ckh.c b/src/ckh.c index 39925ced..169fc0d4 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -267,12 +267,12 @@ ckh_grow(ckh_t *ckh) usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); if (usize == 0) { ret = true; - goto RETURN; + goto label_return; } tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (tab == NULL) { ret = true; - goto RETURN; + goto label_return; } /* Swap in new table. */ ttab = ckh->tab; @@ -292,7 +292,7 @@ ckh_grow(ckh_t *ckh) } ret = false; -RETURN: +label_return: return (ret); } @@ -385,16 +385,16 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); if (usize == 0) { ret = true; - goto RETURN; + goto label_return; } ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); if (ckh->tab == NULL) { ret = true; - goto RETURN; + goto label_return; } ret = false; -RETURN: +label_return: return (ret); } @@ -466,12 +466,12 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data) while (ckh_try_insert(ckh, &key, &data)) { if (ckh_grow(ckh)) { ret = true; - goto RETURN; + goto label_return; } } ret = false; -RETURN: +label_return: return (ret); } diff --git a/src/ctl.c b/src/ctl.c index 1aaf1971..a75ffef3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -546,7 +546,7 @@ ctl_init(void) (narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; - goto RETURN; + goto label_return; } memset(ctl_stats.arenas, 0, (narenas + 1) * sizeof(ctl_arena_stats_t)); @@ -561,7 +561,7 @@ ctl_init(void) for (i = 0; i <= narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { ret = true; - goto RETURN; + goto label_return; } } } @@ -573,7 +573,7 @@ ctl_init(void) } ret = false; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -593,7 +593,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); if (elen == 0) { ret = ENOENT; - goto RETURN; + goto label_return; } node = super_root_node; for (i = 0; i < *depthp; i++) { @@ -618,7 +618,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } if (node == pnode) { ret = ENOENT; - goto RETURN; + goto label_return; } } else { uintmax_t index; @@ -628,7 +628,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, index = malloc_strtoumax(elm, NULL, 10); if (index == UINTMAX_MAX || index > SIZE_T_MAX) { ret = ENOENT; - goto RETURN; + goto label_return; } inode = &node->u.named.children[0]; @@ -636,7 +636,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, (size_t)index); if (node == NULL) { ret = ENOENT; - goto RETURN; + goto label_return; } if (nodesp != NULL) @@ -652,7 +652,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, * in this path through the tree. */ ret = ENOENT; - goto RETURN; + goto label_return; } /* Complete lookup successful. */ *depthp = i + 1; @@ -663,7 +663,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, if (*dot == '\0') { /* No more elements. */ ret = ENOENT; - goto RETURN; + goto label_return; } elm = &dot[1]; dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : @@ -672,7 +672,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -687,22 +687,22 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } depth = CTL_MAX_DEPTH; ret = ctl_lookup(name, nodes, mib, &depth); if (ret != 0) - goto RETURN; + goto label_return; if (nodes[depth-1]->ctl == NULL) { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; - goto RETURN; + goto label_return; } ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); -RETURN: +label_return: return(ret); } @@ -713,11 +713,11 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } ret = ctl_lookup(name, NULL, mibp, miblenp); -RETURN: +label_return: return(ret); } @@ -731,7 +731,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; - goto RETURN; + goto label_return; } /* Iterate down the tree. */ @@ -741,7 +741,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Children are named. */ if (node->u.named.nchildren <= mib[i]) { ret = ENOENT; - goto RETURN; + goto label_return; } node = &node->u.named.children[mib[i]]; } else { @@ -752,7 +752,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, node = inode->u.indexed.index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; - goto RETURN; + goto label_return; } } } @@ -761,11 +761,11 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (node->ctl == NULL) { /* Partial MIB. */ ret = ENOENT; - goto RETURN; + goto label_return; } ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); -RETURN: +label_return: return(ret); } @@ -787,14 +787,14 @@ ctl_boot(void) #define READONLY() do { \ if (newp != NULL || newlen != 0) { \ ret = EPERM; \ - goto RETURN; \ + goto label_return; \ } \ } while (0) #define WRITEONLY() do { \ if (oldp != NULL || oldlenp != NULL) { \ ret = EPERM; \ - goto RETURN; \ + goto label_return; \ } \ } while (0) @@ -810,7 +810,7 @@ ctl_boot(void) ? sizeof(t) : *oldlenp; \ memcpy(oldp, (void *)&v, copylen); \ ret = EINVAL; \ - goto RETURN; \ + goto label_return; \ } else \ *(t *)oldp = v; \ } \ @@ -820,7 +820,7 @@ ctl_boot(void) if (newp != NULL) { \ if (newlen != sizeof(t)) { \ ret = EINVAL; \ - goto RETURN; \ + goto label_return; \ } \ v = *(t *)newp; \ } \ @@ -847,7 +847,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ if (l) \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ @@ -869,7 +869,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -888,7 +888,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -912,7 +912,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -929,7 +929,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -946,7 +946,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, bool); \ \ ret = 0; \ -RETURN: \ +label_return: \ return (ret); \ } @@ -967,7 +967,7 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READ(ctl_epoch, uint64_t); ret = 0; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -986,13 +986,13 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, if (newp != NULL) { if (newlen != sizeof(bool)) { ret = EINVAL; - goto RETURN; + goto label_return; } tcache_enabled_set(*(bool *)newp); } READ(oldval, bool); -RETURN: +label_return: ret = 0; return (ret); } @@ -1011,7 +1011,7 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, tcache_flush(); ret = 0; -RETURN: +label_return: return (ret); } @@ -1031,7 +1031,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (newind >= narenas) { /* New arena index is out of range. */ ret = EFAULT; - goto RETURN; + goto label_return; } /* Initialize arena if necessary. */ @@ -1040,7 +1040,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, arenas_extend(newind)) == NULL) { malloc_mutex_unlock(&arenas_lock); ret = EAGAIN; - goto RETURN; + goto label_return; } assert(arena == arenas[newind]); arenas[oldind]->nthreads--; @@ -1059,7 +1059,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -1156,7 +1156,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, for (i = 0; i < nread; i++) ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1180,7 +1180,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, WRITE(arena, unsigned); if (newp != NULL && arena >= narenas) { ret = EFAULT; - goto RETURN; + goto label_return; } else { arena_t *tarenas[narenas]; @@ -1202,7 +1202,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } ret = 0; -RETURN: +label_return: return (ret); } @@ -1232,7 +1232,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READ(oldval, bool); ret = 0; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1252,11 +1252,11 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, if (prof_mdump(filename)) { ret = EFAULT; - goto RETURN; + goto label_return; } ret = 0; -RETURN: +label_return: return (ret); } @@ -1354,11 +1354,11 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) malloc_mutex_lock(&ctl_mtx); if (ctl_stats.arenas[i].initialized == false) { ret = NULL; - goto RETURN; + goto label_return; } ret = super_stats_arenas_i_node; -RETURN: +label_return: malloc_mutex_unlock(&ctl_mtx); return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index e0510209..cde998c4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -742,7 +742,7 @@ je_malloc(size_t size) if (malloc_init()) { ret = NULL; - goto OOM; + goto label_oom; } if (size == 0) @@ -753,7 +753,7 @@ je_malloc(size_t size) PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { ret = NULL; - goto OOM; + goto label_oom; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -768,7 +768,7 @@ je_malloc(size_t size) ret = imalloc(size); } -OOM: +label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in malloc(): " @@ -822,14 +822,14 @@ imemalign(void **memptr, size_t alignment, size_t size, } result = NULL; ret = EINVAL; - goto RETURN; + goto label_return; } usize = sa2u(size, alignment, NULL); if (usize == 0) { result = NULL; ret = ENOMEM; - goto RETURN; + goto label_return; } if (config_prof && opt_prof) { @@ -864,13 +864,13 @@ imemalign(void **memptr, size_t alignment, size_t size, abort(); } ret = ENOMEM; - goto RETURN; + goto label_return; } *memptr = result; ret = 0; -RETURN: +label_return: if (config_stats && result != NULL) { assert(usize == isalloc(result)); thread_allocated_tsd_get()->allocated += usize; @@ -918,7 +918,7 @@ je_calloc(size_t num, size_t size) if (malloc_init()) { num_size = 0; ret = NULL; - goto RETURN; + goto label_return; } num_size = num * size; @@ -927,7 +927,7 @@ je_calloc(size_t num, size_t size) num_size = 1; else { ret = NULL; - goto RETURN; + goto label_return; } /* * Try to avoid division here. We know that it isn't possible to @@ -938,7 +938,7 @@ je_calloc(size_t num, size_t size) && (num_size / size != num)) { /* size_t overflow. */ ret = NULL; - goto RETURN; + goto label_return; } if (config_prof && opt_prof) { @@ -946,7 +946,7 @@ je_calloc(size_t num, size_t size) PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) { ret = NULL; - goto RETURN; + goto label_return; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -961,7 +961,7 @@ je_calloc(size_t num, size_t size) ret = icalloc(num_size); } -RETURN: +label_return: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in calloc(): out of " @@ -1002,7 +1002,7 @@ je_realloc(void *ptr, size_t size) } idalloc(ptr); ret = NULL; - goto RETURN; + goto label_return; } else size = 1; } @@ -1019,7 +1019,7 @@ je_realloc(void *ptr, size_t size) if (cnt == NULL) { old_ctx = NULL; ret = NULL; - goto OOM; + goto label_oom; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { @@ -1040,7 +1040,7 @@ je_realloc(void *ptr, size_t size) ret = iralloc(ptr, size, 0, 0, false, false); } -OOM: +label_oom: if (ret == NULL) { if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in realloc(): " @@ -1092,7 +1092,7 @@ OOM: } } -RETURN: +label_return: if (config_prof && opt_prof) prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { @@ -1300,16 +1300,16 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(size != 0); if (malloc_init()) - goto OOM; + goto label_oom; usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); if (usize == 0) - goto OOM; + goto label_oom; if (config_prof && opt_prof) { PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) - goto OOM; + goto label_oom; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? @@ -1318,18 +1318,18 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; arena_prof_promoted(p, usize); } else { p = iallocm(usize, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; } prof_malloc(p, usize, cnt); } else { p = iallocm(usize, alignment, zero); if (p == NULL) - goto OOM; + goto label_oom; } if (rsize != NULL) *rsize = usize; @@ -1341,7 +1341,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) } UTRACE(0, size, p); return (ALLOCM_SUCCESS); -OOM: +label_oom: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in allocm(): " "out of memory\n"); @@ -1387,7 +1387,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) - goto OOM; + goto label_oom; /* * Use minimum usize to determine whether promotion may happen. */ @@ -1398,7 +1398,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; if (max_usize < PAGE) { usize = max_usize; arena_prof_promoted(q, usize); @@ -1407,7 +1407,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; usize = isalloc(q); } prof_realloc(q, usize, cnt, old_size, old_ctx); @@ -1418,7 +1418,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p); q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) - goto ERR; + goto label_err; if (config_stats) usize = isalloc(q); if (rsize != NULL) { @@ -1437,12 +1437,12 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } UTRACE(p, size, q); return (ALLOCM_SUCCESS); -ERR: +label_err: if (no_move) { UTRACE(p, size, q); return (ALLOCM_ERR_NOT_MOVED); } -OOM: +label_oom: if (config_xmalloc && opt_xmalloc) { malloc_write(": Error in rallocm(): " "out of memory\n"); diff --git a/src/prof.c b/src/prof.c index d2532ec1..b509aaef 100644 --- a/src/prof.c +++ b/src/prof.c @@ -854,7 +854,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (opt_abort) abort(); } - goto ERROR; + goto label_error; } /* Merge per thread profile stats, and sum them in cnt_all. */ @@ -870,7 +870,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) " [%"PRIu64": %"PRIu64"] @ heapprofile\n", cnt_all.curobjs, cnt_all.curbytes, cnt_all.accumobjs, cnt_all.accumbytes)) - goto ERROR; + goto label_error; } else { if (prof_printf(propagate_err, "heap profile: %"PRId64": %"PRId64 @@ -878,22 +878,22 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cnt_all.curobjs, cnt_all.curbytes, cnt_all.accumobjs, cnt_all.accumbytes, ((uint64_t)1U << opt_lg_prof_sample))) - goto ERROR; + goto label_error; } /* Dump per ctx profile stats. */ for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) == false;) { if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) - goto ERROR; + goto label_error; } /* Dump /proc//maps if possible. */ if (prof_dump_maps(propagate_err)) - goto ERROR; + goto label_error; if (prof_flush(propagate_err)) - goto ERROR; + goto label_error; close(prof_dump_fd); prof_leave(); @@ -909,7 +909,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) } return (false); -ERROR: +label_error: prof_leave(); return (true); } diff --git a/src/util.c b/src/util.c index 107bdcff..2aab61fe 100644 --- a/src/util.c +++ b/src/util.c @@ -108,12 +108,12 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) p++; /* Fall through. */ default: - goto PREFIX; + goto label_prefix; } } /* Get prefix, if any. */ - PREFIX: + label_prefix: /* * Note where the first non-whitespace/sign character is so that it is * possible to tell whether any digits are consumed (e.g., " 0" vs. @@ -349,7 +349,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) f = format; while (true) { switch (*f) { - case '\0': goto OUT; + case '\0': goto label_out; case '%': { bool alt_form = false; bool zero_pad = false; @@ -389,12 +389,12 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(plus_plus == false); plus_plus = true; break; - default: goto WIDTH; + default: goto label_width; } f++; } /* Width. */ - WIDTH: + label_width: switch (*f) { case '*': width = va_arg(ap, int); @@ -410,17 +410,17 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) width = (int)uwidth; if (*f == '.') { f++; - goto PRECISION; + goto label_precision; } else - goto LENGTH; + goto label_length; break; } case '.': f++; - goto PRECISION; - default: goto LENGTH; + goto label_precision; + default: goto label_length; } /* Precision. */ - PRECISION: + label_precision: switch (*f) { case '*': prec = va_arg(ap, int); @@ -438,7 +438,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) default: break; } /* Length. */ - LENGTH: + label_length: switch (*f) { case 'l': f++; @@ -542,7 +542,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) break; }} } - OUT: + label_out: if (i < size) str[i] = '\0'; else diff --git a/test/allocated.c b/test/allocated.c index 701c1754..921ab3ae 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -25,7 +25,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -37,7 +37,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -51,7 +51,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -63,7 +63,7 @@ thread_start(void *arg) #ifdef JEMALLOC_STATS assert(false); #endif - goto RETURN; + goto label_return; } fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -98,7 +98,7 @@ thread_start(void *arg) assert(d0 + usize <= d1); -RETURN: +label_return: return (NULL); } @@ -116,7 +116,7 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); @@ -126,13 +126,13 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); thread_start(NULL); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/mremap.c b/test/mremap.c index 8d35a64e..cac3bd82 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -22,7 +22,7 @@ main(void) fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; - goto RETURN; + goto label_return; } chunksize = ((size_t)1U) << lg_chunk; @@ -30,7 +30,7 @@ main(void) if (p == NULL) { fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); ret = 1; - goto RETURN; + goto label_return; } memset(p, 'a', chunksize); @@ -39,7 +39,7 @@ main(void) fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, q); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < chunksize; i++) { assert(q[i] == 'a'); @@ -51,7 +51,7 @@ main(void) if (q == NULL) { fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < chunksize; i++) { assert(q[i] == 'a'); @@ -60,7 +60,7 @@ main(void) free(q); ret = 0; -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 2922d1b4..9b7b2ddc 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -61,7 +61,7 @@ main(void) if (p == NULL) { fprintf(stderr, "%s(): Error in malloc()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } size = sizeof(arena_ind); @@ -69,7 +69,7 @@ main(void) fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; - goto RETURN; + goto label_return; } for (i = 0; i < NTHREADS; i++) { @@ -78,14 +78,14 @@ main(void) fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } } for (i = 0; i < NTHREADS; i++) pthread_join(threads[i], (void *)&ret); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 46540385..0a3e45a9 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -22,7 +22,7 @@ thread_start(void *arg) assert(false); #endif } - goto RETURN; + goto label_return; } if (e0) { @@ -69,7 +69,7 @@ thread_start(void *arg) assert(e0 == false); free(malloc(1)); -RETURN: +label_return: return (NULL); } @@ -87,7 +87,7 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); @@ -97,13 +97,13 @@ main(void) != 0) { fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); ret = 1; - goto RETURN; + goto label_return; } pthread_join(thread, (void *)&ret); thread_start(NULL); -RETURN: +label_return: fprintf(stderr, "Test end\n"); return (ret); } From 122449b073bcbaa504c4f592ea2d733503c272d2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 6 Apr 2012 00:35:09 -0700 Subject: [PATCH 109/205] Implement Valgrind support, redzones, and quarantine. Implement Valgrind support, as well as the redzone and quarantine features, which help Valgrind detect memory errors. Redzones are only implemented for small objects because the changes necessary to support redzones around large and huge objects are complicated by in-place reallocation, to the point that it isn't clear that the maintenance burden is worth the incremental improvement to Valgrind support. Merge arena_salloc() and arena_salloc_demote(). Refactor i[v]salloc() to expose the 'demote' option. --- INSTALL | 8 +- Makefile.in | 6 +- configure.ac | 38 +++- doc/jemalloc.xml.in | 79 ++++++- include/jemalloc/internal/arena.h | 81 +++++-- .../jemalloc/internal/jemalloc_internal.h.in | 153 ++++++++++++- include/jemalloc/internal/private_namespace.h | 8 +- include/jemalloc/internal/prof.h | 6 +- include/jemalloc/internal/quarantine.h | 24 ++ include/jemalloc/internal/tcache.h | 29 ++- include/jemalloc/internal/util.h | 1 - include/jemalloc/jemalloc_defs.h.in | 5 +- src/arena.c | 208 ++++++++++++------ src/ctl.c | 15 +- src/huge.c | 2 +- src/jemalloc.c | 165 ++++++++++---- src/quarantine.c | 163 ++++++++++++++ src/stats.c | 3 + src/tcache.c | 6 +- src/zone.c | 10 +- 20 files changed, 840 insertions(+), 170 deletions(-) create mode 100644 include/jemalloc/internal/quarantine.h create mode 100644 src/quarantine.c diff --git a/INSTALL b/INSTALL index 8a825df9..a5942ec8 100644 --- a/INSTALL +++ b/INSTALL @@ -113,8 +113,12 @@ any of the following arguments (not a definitive list) to 'configure': mmap(2). --disable-fill - Disable support for junk/zero filling of memory. See the "opt.junk"/ - "opt.zero" option documentation for usage details. + Disable support for junk/zero filling of memory, quarantine, and redzones. + See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option + documentation for usage details. + +--disable-valgrind + Disable support for Valgrind. --disable-experimental Disable support for the experimental API (*allocm()). diff --git a/Makefile.in b/Makefile.in index 7df4fc60..8aa94253 100644 --- a/Makefile.in +++ b/Makefile.in @@ -49,9 +49,9 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ - @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/rtree.c \ - @srcroot@src/stats.c @srcroot@src/tcache.c @srcroot@src/util.c \ - @srcroot@src/tsd.c + @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/quarantine.c \ + @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c \ + @srcroot@src/util.c @srcroot@src/tsd.c ifeq (macho, @abi@) CSRCS += @srcroot@src/zone.c endif diff --git a/configure.ac b/configure.ac index 8e94b5c9..a272ecd0 100644 --- a/configure.ac +++ b/configure.ac @@ -685,7 +685,8 @@ AC_SUBST([enable_dss]) dnl Support the junk/zero filling option by default. AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])], + [AS_HELP_STRING([--disable-fill], + [Disable support for junk/zero filling, quarantine, and redzones])], [if test "x$enable_fill" = "xno" ; then enable_fill="0" else @@ -727,6 +728,38 @@ if test "x$enable_utrace" = "x1" ; then fi AC_SUBST([enable_utrace]) +dnl Support Valgrind by default. +AC_ARG_ENABLE([valgrind], + [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])], +[if test "x$enable_valgrind" = "xno" ; then + enable_valgrind="0" +else + enable_valgrind="1" +fi +], +[enable_valgrind="1"] +) +if test "x$enable_valgrind" = "x1" ; then + JE_COMPILABLE([valgrind], [ +#include +#include + +#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ + && (__VALGRIND_MAJOR__ > 3 || (__VALGRIND_MAJOR__ == 3 && \ + __VALGRIND_MINOR__ >= 6)) +#else +# error "Incompatible Valgrind version" +#endif +], [], [je_cv_valgrind]) + if test "x${je_cv_valgrind}" = "xno" ; then + enable_valgrind="0" + fi + if test "x$enable_valgrind" = "x1" ; then + AC_DEFINE([JEMALLOC_VALGRIND], [ ]) + fi +fi +AC_SUBST([enable_valgrind]) + dnl Do not support the xmalloc option by default. AC_ARG_ENABLE([xmalloc], [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], @@ -1088,8 +1121,9 @@ AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) -AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) +AC_MSG_RESULT([valgrind : ${enable_valgrind}]) +AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 8ae82621..a47c7635 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -720,6 +720,16 @@ for (i = 0; i < nbins; i++) { build configuration. + + + config.valgrind + (bool) + r- + + was specified during + build configuration. + + config.xmalloc @@ -819,6 +829,47 @@ for (i = 0; i < nbins; i++) { configuration, in which case it is enabled by default. + + + opt.quarantine + (size_t) + r- + [] + + Per thread quarantine size in bytes. If non-zero, each + thread maintains a FIFO object quarantine that stores up to the + specified number of bytes of memory. The quarantined memory is not + freed until it is released from quarantine, though it is immediately + junk-filled if the opt.junk option is + enabled. This feature is of particular use in combination with Valgrind, which can detect + attempts to access quarantined objects. This is intended for debugging + and will impact performance negatively. The default quarantine size is + 0. + + + + + opt.redzone + (bool) + r- + [] + + Redzones enabled/disabled. If enabled, small + allocations have redzones before and after them. Furthermore, if the + opt.junk option is + enabled, the redzones are checked for corruption during deallocation. + However, the primary intended purpose of this feature is to be used in + combination with Valgrind, which needs + redzones in order to do effective buffer overflow/underflow detection. + This option is intended for debugging and will impact performance + negatively. This option is disabled by default unless + is specified during configuration, in + which case it is enabled by default. + + opt.zero @@ -849,6 +900,25 @@ for (i = 0; i < nbins; i++) { is disabled by default. + + + opt.valgrind + (bool) + r- + [] + + Valgrind support + enabled/disabled. If enabled, several other options are automatically + modified during options processing to work well with Valgrind: opt.junk and opt.zero are set to false, + opt.quarantine + is set to 16 MiB, and opt.redzone is set to + true. This option is disabled by default. + + opt.xmalloc @@ -1764,10 +1834,11 @@ malloc_conf = "xmalloc:true";]]> This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information - would be prohibitive. There are a number of allocator implementations - available on the Internet which focus on detecting and pinpointing problems - by trading performance for extra sanity checks and detailed - diagnostics. + would be prohibitive. However, jemalloc does integrate with the most + excellent Valgrind tool if + the configuration option is enabled and + the opt.valgrind + option is enabled. DIAGNOSTIC MESSAGES diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index d25a2b1d..f52fac42 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -16,7 +16,7 @@ * constraint is relaxed (ignored) for runs that are so small that the * per-region overhead is greater than: * - * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) */ #define RUN_BFP 12 /* \/ Implicit binary fixed point. */ @@ -27,6 +27,12 @@ #define LG_RUN_MAXREGS 11 #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) +/* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + /* * The minimum ratio of active:dirty pages per arena is computed as: * @@ -192,11 +198,50 @@ struct arena_run_s { * Read-only information associated with each element of arena_t's bins array * is stored separately, partly to reduce memory usage (only one copy, rather * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | arena_run_t header | + * | ... | + * bitmap_offset | bitmap | + * | ... | + * ctx0_offset | ctx map | + * | ... | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ size_t reg_size; + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + /* Total size of a run for this bin's size class. */ size_t run_size; @@ -357,13 +402,15 @@ void arena_purge_all(arena_t *arena); void arena_prof_accum(arena_t *arena, uint64_t accumbytes); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, bool zero); -size_t arena_salloc(const void *ptr); +size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); -size_t arena_salloc_demote(const void *ptr); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); @@ -408,7 +455,7 @@ JEMALLOC_INLINE unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; - size_t size; + size_t interval; /* * Freeing a pointer lower than region zero can cause assertion @@ -425,12 +472,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ - size = bin_info->reg_size; - shift = ffs(size) - 1; + interval = bin_info->reg_interval; + shift = ffs(interval) - 1; diff >>= shift; - size >>= shift; + interval >>= shift; - if (size == 1) { + if (interval == 1) { /* The divisor was a power of 2. */ regind = diff; } else { @@ -442,7 +489,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * * becomes * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT * * We can omit the first three elements, because we never * divide by 0, and 1 and 2 are both powers of two, which are @@ -450,7 +497,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) */ #define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { + static const unsigned interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -461,14 +508,16 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; + if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + + 2)) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; #undef SIZE_INV #undef SIZE_INV_SHIFT } - assert(diff == regind * size); + assert(diff == regind * interval); assert(regind < bin_info->nregs); return (regind); @@ -610,7 +659,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_size == 0); + bin_info->reg_interval == 0); } malloc_mutex_lock(&bin->lock); arena_dalloc_bin(arena, chunk, ptr, mapelm); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 66dd357f..a16e5e27 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -40,6 +40,11 @@ #include #endif +#ifdef JEMALLOC_VALGRIND +#include +#include +#endif + #include "jemalloc/internal/private_namespace.h" #ifdef JEMALLOC_CC_SILENCE @@ -125,6 +130,13 @@ static const bool config_utrace = false #endif ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; static const bool config_xmalloc = #ifdef JEMALLOC_XMALLOC true @@ -281,6 +293,77 @@ static const bool config_ivsalloc = #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +#ifdef JEMALLOC_VALGRIND +/* + * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions + * so that when Valgrind reports errors, there are no extra stack frames + * in the backtraces. + * + * The size that is reported to valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (config_valgrind && opt_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do { \ + if (config_valgrind && opt_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (old_ptr != NULL) { \ + VALGRIND_FREELIKE_BLOCK(old_ptr, \ + old_rzsize); \ + } \ + if (ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (config_valgrind && opt_valgrind) \ + VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ +} while (0) +#else +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) +#endif + #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -300,6 +383,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES @@ -325,6 +409,7 @@ static const bool config_ivsalloc = #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" typedef struct { @@ -343,7 +428,10 @@ typedef struct { extern bool opt_abort; extern bool opt_junk; +extern size_t opt_quarantine; +extern bool opt_redzone; extern bool opt_utrace; +extern bool opt_valgrind; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; @@ -385,6 +473,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -550,14 +639,18 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr); -size_t ivsalloc(const void *ptr); +size_t isalloc(const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(const void *ptr); void idalloc(void *ptr); +void iqalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) @@ -621,21 +714,25 @@ ipalloc(size_t usize, size_t alignment, bool zero) return (ret); } +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ JEMALLOC_INLINE size_t -isalloc(const void *ptr) +isalloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || demote == false); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - if (config_prof) - ret = arena_salloc_demote(ptr); - else - ret = arena_salloc(ptr); + ret = arena_salloc(ptr, demote); } else ret = huge_salloc(ptr); @@ -643,14 +740,36 @@ isalloc(const void *ptr) } JEMALLOC_INLINE size_t -ivsalloc(const void *ptr) +ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) return (0); - return (isalloc(ptr)); + return (isalloc(ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + size_t binind = SMALL_SIZE2BIN(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(const void *ptr) +{ + size_t usize = isalloc(ptr, false); + + return (u2rz(usize)); } JEMALLOC_INLINE void @@ -667,6 +786,16 @@ idalloc(void *ptr) huge_dalloc(ptr, true); } +JEMALLOC_INLINE void +iqalloc(void *ptr) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloc(ptr); +} + JEMALLOC_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) @@ -677,14 +806,14 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr); + oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { size_t usize, copysize; /* - * Existing object alignment is inadquate; allocate new space + * Existing object alignment is inadequate; allocate new space * and copy. */ if (no_move) @@ -711,7 +840,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); return (ret); } diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index de3042eb..a962192c 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -1,7 +1,9 @@ +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) @@ -20,7 +22,6 @@ #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) #define arena_run_regind JEMALLOC_N(arena_run_regind) #define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) @@ -136,6 +137,7 @@ #define idalloc JEMALLOC_N(idalloc) #define imalloc JEMALLOC_N(imalloc) #define ipalloc JEMALLOC_N(ipalloc) +#define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) @@ -176,6 +178,7 @@ #define opt_utrace JEMALLOC_N(opt_utrace) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) +#define p2rz JEMALLOC_N(p2rz) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) @@ -195,6 +198,8 @@ #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define pthread_create JEMALLOC_N(pthread_create) +#define quarantine JEMALLOC_N(quarantine) +#define quarantine_boot JEMALLOC_N(quarantine_boot) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -229,3 +234,4 @@ #define thread_allocated_get JEMALLOC_N(thread_allocated_get) #define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) #define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define u2rz JEMALLOC_N(u2rz) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 34929e7e..a37bb448 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -378,7 +378,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { @@ -427,7 +427,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { /* @@ -500,7 +500,7 @@ prof_free(const void *ptr, size_t size) cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h new file mode 100644 index 00000000..38f3d696 --- /dev/null +++ b/include/jemalloc/internal/quarantine.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void quarantine(void *ptr); +bool quarantine_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 93e721d5..9d8c992d 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -340,17 +340,24 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); + assert(arena_salloc(ret, false) == arena_bin_info[binind].reg_size); if (zero == false) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) memset(ret, 0, size); } - } else + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } if (config_stats) tbin->tstats.nrequests++; @@ -397,8 +404,10 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) else if (opt_zero) memset(ret, 0, size); } - } else + } else { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } if (config_stats) tbin->tstats.nrequests++; @@ -422,7 +431,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr) <= SMALL_MAXCLASS); + assert(arena_salloc(ptr, false) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -436,7 +445,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) assert(binind < NBINS); if (config_fill && opt_junk) - memset(ptr, 0x5a, arena_bin_info[binind].reg_size); + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; @@ -459,8 +468,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > SMALL_MAXCLASS); - assert(arena_salloc(ptr) <= tcache_maxclass); + assert(arena_salloc(ptr, false) > SMALL_MAXCLASS); + assert(arena_salloc(ptr, false) <= tcache_maxclass); binind = NBINS + (size >> LG_PAGE) - 1; diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 3d3ea3ab..d360ae3f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -140,7 +140,6 @@ malloc_write(const char *s) je_malloc_message(NULL, s); } - #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 8e7442d6..7770a7af 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -148,7 +148,7 @@ */ #undef JEMALLOC_DSS -/* Support memory filling (junk/zero). */ +/* Support memory filling (junk/zero/quarantine/redzone). */ #undef JEMALLOC_FILL /* Support the experimental API. */ @@ -157,6 +157,9 @@ /* Support utrace(2)-based tracing. */ #undef JEMALLOC_UTRACE +/* Support Valgrind. */ +#undef JEMALLOC_VALGRIND + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC diff --git a/src/arena.c b/src/arena.c index c84aaf47..1d4f61ea 100644 --- a/src/arena.c +++ b/src/arena.c @@ -140,7 +140,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_size * regind)); + (uintptr_t)(bin_info->reg_interval * regind)); run->nfree--; if (regind == run->nextind) run->nextind++; @@ -161,8 +161,8 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size - == 0); + (uintptr_t)bin_info->reg0_offset)) % + (uintptr_t)bin_info->reg_interval == 0); assert((uintptr_t)ptr >= (uintptr_t)run + (uintptr_t)bin_info->reg0_offset); /* Freeing an unallocated pointer can cause assertion failure. */ @@ -260,10 +260,18 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, for (i = 0; i < need_pages; i++) { if ((chunk->map[run_ind+i-map_bias].bits & CHUNK_MAP_UNZEROED) != 0) { + VALGRIND_MAKE_MEM_UNDEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); memset((void *)((uintptr_t) chunk + ((run_ind+i) << LG_PAGE)), 0, PAGE); } else if (config_debug) { + VALGRIND_MAKE_MEM_DEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); arena_chunk_validate_zeroed( chunk, run_ind+i); } @@ -273,6 +281,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * The run is dirty, so all pages must be * zeroed. */ + VALGRIND_MAKE_MEM_UNDEFINED((void + *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (need_pages << LG_PAGE)); memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0, (need_pages << LG_PAGE)); } @@ -1245,6 +1256,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, ptr = arena_bin_malloc_hard(arena, bin); if (ptr == NULL) break; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, &arena_bin_info[binind], + true); + } /* Insert such that low regions get used first. */ tbin->avail[nfill - 1 - i] = ptr; } @@ -1259,6 +1274,55 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, tbin->ncached = i; } +void +arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) +{ + + if (zero) { + size_t redzone_size = bin_info->redzone_size; + memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, + redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, + redzone_size); + } else { + memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, + bin_info->reg_interval); + } +} + +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + bool error = false; + + for (i = 1; i <= redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s before %p (size %zu), byte=%#x\n", i, + (i == 1) ? "" : "s", ptr, size, byte); + } + } + for (i = 0; i < redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s after end of %p (size %zu), byte=%#x\n", + i, (i == 1) ? "" : "s", ptr, size, byte); + } + } + if (opt_abort && error) + abort(); + + memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, + bin_info->reg_interval); +} + void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { @@ -1297,13 +1361,20 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) if (zero == false) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) memset(ret, 0, size); } - } else + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } return (ret); } @@ -1412,7 +1483,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, /* Return the size of the allocation pointed to by ptr. */ size_t -arena_salloc(const void *ptr) +arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; @@ -1431,12 +1502,19 @@ arena_salloc(const void *ptr) size_t binind = arena_bin_index(chunk->arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); ret = bin_info->reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; + if (demote && prof_promote && ret == PAGE && (mapbits & + CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < NBINS); + ret = arena_bin_info[binind].reg_size; + } assert(ret != 0); } @@ -1449,9 +1527,11 @@ arena_prof_promoted(const void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; + assert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr) == PAGE); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == PAGE); assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); @@ -1460,45 +1540,9 @@ arena_prof_promoted(const void *ptr, size_t size) assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); -} -size_t -arena_salloc_demote(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (prof_promote && ret == PAGE && (mapbits & - CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } - assert(ret != 0); - } - - return (ret); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == size); } static void @@ -1545,7 +1589,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); past = (size_t)(PAGE_CEILING((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> LG_PAGE); + bin_info->reg_interval - bin_info->redzone_size) - + (uintptr_t)chunk) >> LG_PAGE); malloc_mutex_lock(&arena->lock); /* @@ -1617,7 +1662,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size = bin_info->reg_size; if (config_fill && opt_junk) - memset(ptr, 0x5a, size); + arena_dalloc_junk_small(ptr, bin_info); arena_run_reg_dalloc(run, ptr); if (run->nfree == bin_info->nregs) { @@ -1936,7 +1981,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); return (ret); } @@ -2007,16 +2052,40 @@ arena_new(arena_t *arena, unsigned ind) static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) { + size_t pad_size; size_t try_run_size, good_run_size; uint32_t try_nregs, good_nregs; uint32_t try_hdr_size, good_hdr_size; uint32_t try_bitmap_offset, good_bitmap_offset; uint32_t try_ctx0_offset, good_ctx0_offset; - uint32_t try_reg0_offset, good_reg0_offset; + uint32_t try_redzone0_offset, good_redzone0_offset; assert(min_run_size >= PAGE); assert(min_run_size <= arena_maxclass); + /* + * Determine redzone size based on minimum alignment and minimum + * redzone size. Add padding to the end of the run if it is needed to + * align the regions. The padding allows each redzone to be half the + * minimum alignment; without the padding, each redzone would have to + * be twice as large in order to maintain alignment. + */ + if (config_fill && opt_redzone) { + size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + if (align_min <= REDZONE_MINSIZE) { + bin_info->redzone_size = REDZONE_MINSIZE; + pad_size = 0; + } else { + bin_info->redzone_size = align_min >> 1; + pad_size = bin_info->redzone_size; + } + } else { + bin_info->redzone_size = 0; + pad_size = 0; + } + bin_info->reg_interval = bin_info->reg_size + + (bin_info->redzone_size << 1); + /* * Calculate known-valid settings before entering the run_size * expansion loop, so that the first part of the loop always copies @@ -2028,7 +2097,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) * header's mask length and the number of regions. */ try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_interval) + 1; /* Counter-act try_nregs-- in loop. */ if (try_nregs > RUN_MAXREGS) { try_nregs = RUN_MAXREGS @@ -2050,9 +2120,9 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } else try_ctx0_offset = 0; - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); /* run_size expansion loop. */ do { @@ -2064,12 +2134,12 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) good_hdr_size = try_hdr_size; good_bitmap_offset = try_bitmap_offset; good_ctx0_offset = try_ctx0_offset; - good_reg0_offset = try_reg0_offset; + good_redzone0_offset = try_redzone0_offset; /* Try more aggressive settings. */ try_run_size += PAGE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_size) + try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / + bin_info->reg_interval) + 1; /* Counter-act try_nregs-- in loop. */ if (try_nregs > RUN_MAXREGS) { try_nregs = RUN_MAXREGS @@ -2093,23 +2163,27 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) try_hdr_size += try_nregs * sizeof(prof_ctx_t *); } - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); } while (try_run_size <= arena_maxclass && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > + RUN_MAX_OVRHD_RELAX + && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size && try_nregs < RUN_MAXREGS); - assert(good_hdr_size <= good_reg0_offset); + assert(good_hdr_size <= good_redzone0_offset); /* Copy final settings. */ bin_info->run_size = good_run_size; bin_info->nregs = good_nregs; bin_info->bitmap_offset = good_bitmap_offset; bin_info->ctx0_offset = good_ctx0_offset; - bin_info->reg0_offset = good_reg0_offset; + bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + + assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs + * bin_info->reg_interval) + pad_size == bin_info->run_size); return (good_run_size); } diff --git a/src/ctl.c b/src/ctl.c index a75ffef3..6be40561 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -57,6 +57,7 @@ CTL_PROTO(config_stats) CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_utrace) +CTL_PROTO(config_valgrind) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_lg_chunk) @@ -65,7 +66,10 @@ CTL_PROTO(opt_lg_dirty_mult) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) +CTL_PROTO(opt_quarantine) +CTL_PROTO(opt_redzone) CTL_PROTO(opt_utrace) +CTL_PROTO(opt_valgrind) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) @@ -179,6 +183,7 @@ static const ctl_node_t config_node[] = { {NAME("tcache"), CTL(config_tcache)}, {NAME("tls"), CTL(config_tls)}, {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -190,7 +195,10 @@ static const ctl_node_t opt_node[] = { {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, {NAME("utrace"), CTL(opt_utrace)}, + {NAME("valgrind"), CTL(opt_valgrind)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, @@ -1050,7 +1058,8 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Set new arena association. */ if (config_tcache) { tcache_t *tcache; - if ((tcache = *tcache_tsd_get()) != NULL) { + if ((uintptr_t)(tcache = *tcache_tsd_get()) > + (uintptr_t)TCACHE_STATE_MAX) { tcache_arena_dissociate(tcache); tcache_arena_associate(tcache, arena); } @@ -1085,6 +1094,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_stats) CTL_RO_BOOL_CONFIG_GEN(config_tcache) CTL_RO_BOOL_CONFIG_GEN(config_tls) CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ @@ -1096,7 +1106,10 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) diff --git a/src/huge.c b/src/huge.c index 43c8f3b0..daf0c622 100644 --- a/src/huge.c +++ b/src/huge.c @@ -174,7 +174,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); } return (ret); } diff --git a/src/jemalloc.c b/src/jemalloc.c index cde998c4..237dd589 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -14,14 +14,19 @@ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; +bool opt_redzone = true; # else bool opt_junk = false; +bool opt_redzone = false; # endif #else bool opt_abort = false; bool opt_junk = false; +bool opt_redzone = false; #endif +size_t opt_quarantine = ZU(0); bool opt_utrace = false; +bool opt_valgrind = false; bool opt_xmalloc = false; bool opt_zero = false; size_t opt_narenas = 0; @@ -419,7 +424,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { -#define CONF_HANDLE_BOOL(o, n) \ +#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ @@ -433,12 +438,19 @@ malloc_conf_init(void) "Invalid conf value", \ k, klen, v, vlen); \ } \ + hit = true; \ + } else \ + hit = false; +#define CONF_HANDLE_BOOL(o, n) { \ + bool hit; \ + CONF_HANDLE_BOOL_HIT(o, n, hit); \ + if (hit) \ continue; \ - } +} #define CONF_HANDLE_SIZE_T(o, n, min, max) \ if (sizeof(#n)-1 == klen && strncmp(#n, k, \ klen) == 0) { \ - uintmax_t um; \ + uintmax_t um; \ char *end; \ \ errno = 0; \ @@ -502,11 +514,30 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_stats_print, stats_print) if (config_fill) { CONF_HANDLE_BOOL(opt_junk, junk) + CONF_HANDLE_SIZE_T(opt_quarantine, quarantine, + 0, SIZE_T_MAX) + CONF_HANDLE_BOOL(opt_redzone, redzone) CONF_HANDLE_BOOL(opt_zero, zero) } if (config_utrace) { CONF_HANDLE_BOOL(opt_utrace, utrace) } + if (config_valgrind) { + bool hit; + CONF_HANDLE_BOOL_HIT(opt_valgrind, + valgrind, hit) + if (config_fill && opt_valgrind && hit) { + opt_junk = false; + opt_zero = false; + if (opt_quarantine == 0) { + opt_quarantine = + JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + } + opt_redzone = true; + } + if (hit) + continue; + } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) } @@ -662,6 +693,11 @@ malloc_init_hard(void) return (true); } + if (config_fill && quarantine_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + if (config_prof && prof_boot2()) { malloc_mutex_unlock(&init_lock); return (true); @@ -763,7 +799,7 @@ je_malloc(size_t size) } else ret = imalloc(size); } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); ret = imalloc(size); } @@ -780,10 +816,11 @@ label_oom: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); return (ret); } @@ -872,7 +909,7 @@ imemalign(void **memptr, size_t alignment, size_t size, label_return: if (config_stats && result != NULL) { - assert(usize == isalloc(result)); + assert(usize == isalloc(result, config_prof)); thread_allocated_tsd_get()->allocated += usize; } if (config_prof && opt_prof && result != NULL) @@ -886,8 +923,10 @@ JEMALLOC_ATTR(visibility("default")) int je_posix_memalign(void **memptr, size_t alignment, size_t size) { - - return imemalign(memptr, alignment, size, sizeof(void *)); + int ret = imemalign(memptr, alignment, size, sizeof(void *)); + JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, + config_prof), false); + return (ret); } JEMALLOC_ATTR(malloc) @@ -902,6 +941,8 @@ je_aligned_alloc(size_t alignment, size_t size) ret = NULL; errno = err; } + JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), + false); return (ret); } @@ -956,7 +997,7 @@ je_calloc(size_t num, size_t size) } else ret = icalloc(num_size); } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(num_size); ret = icalloc(num_size); } @@ -974,10 +1015,11 @@ label_return: if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); if (config_stats && ret != NULL) { - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); return (ret); } @@ -988,19 +1030,30 @@ je_realloc(void *ptr, size_t size) void *ret; size_t usize; size_t old_size = 0; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); if (size == 0) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(p). */ - if (config_prof || config_stats) - old_size = isalloc(ptr); + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } if (config_prof && opt_prof) { old_ctx = prof_ctx_get(ptr); cnt = NULL; } - idalloc(ptr); + iqalloc(ptr); ret = NULL; goto label_return; } else @@ -1010,8 +1063,18 @@ je_realloc(void *ptr, size_t size) if (ptr != NULL) { assert(malloc_initialized || IS_INITIALIZER); - if (config_prof || config_stats) - old_size = isalloc(ptr); + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } if (config_prof && opt_prof) { usize = s2u(size); old_ctx = prof_ctx_get(ptr); @@ -1035,7 +1098,7 @@ je_realloc(void *ptr, size_t size) old_ctx = NULL; } } else { - if (config_stats) + if (config_stats || (config_valgrind && opt_valgrind)) usize = s2u(size); ret = iralloc(ptr, size, 0, 0, false, false); } @@ -1076,7 +1139,8 @@ label_oom: ret = imalloc(size); } } else { - if (config_stats) + if (config_stats || (config_valgrind && + opt_valgrind)) usize = s2u(size); ret = imalloc(size); } @@ -1097,12 +1161,13 @@ label_return: prof_realloc(ret, usize, cnt, old_size, old_ctx); if (config_stats && ret != NULL) { thread_allocated_t *ta; - assert(usize == isalloc(ret)); + assert(usize == isalloc(ret, config_prof)); ta = thread_allocated_tsd_get(); ta->allocated += usize; ta->deallocated += old_size; } UTRACE(ptr, size, ret); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); return (ret); } @@ -1114,18 +1179,21 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (ptr != NULL) { size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(malloc_initialized || IS_INITIALIZER); if (config_prof && opt_prof) { - usize = isalloc(ptr); + usize = isalloc(ptr, config_prof); prof_free(ptr, usize); - } else if (config_stats) { - usize = isalloc(ptr); - } + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); } } @@ -1145,6 +1213,7 @@ je_memalign(size_t alignment, size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, alignment, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1157,6 +1226,7 @@ je_valloc(size_t size) { void *ret JEMALLOC_CC_SILENCE_INIT(NULL); imemalign(&ret, PAGE, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); return (ret); } #endif @@ -1209,9 +1279,9 @@ je_malloc_usable_size(const void *ptr) assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) - ret = ivsalloc(ptr); + ret = ivsalloc(ptr, config_prof); else - ret = (ptr != NULL) ? isalloc(ptr) : 0; + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; return (ret); } @@ -1336,10 +1406,11 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) *ptr = p; if (config_stats) { - assert(usize == isalloc(p)); + assert(usize == isalloc(p, config_prof)); thread_allocated_tsd_get()->allocated += usize; } UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); return (ALLOCM_SUCCESS); label_oom: if (config_xmalloc && opt_xmalloc) { @@ -1360,6 +1431,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) void *p, *q; size_t usize; size_t old_size; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; @@ -1384,7 +1456,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) size_t max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, alignment, NULL); prof_ctx_t *old_ctx = prof_ctx_get(p); - old_size = isalloc(p); + old_size = isalloc(p, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(p); PROF_ALLOC_PREP(1, max_usize, cnt); if (cnt == NULL) goto label_oom; @@ -1403,27 +1477,33 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) usize = max_usize; arena_prof_promoted(q, usize); } else - usize = isalloc(q); + usize = isalloc(q, config_prof); } else { q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto label_err; - usize = isalloc(q); + usize = isalloc(q, config_prof); } prof_realloc(q, usize, cnt, old_size, old_ctx); if (rsize != NULL) *rsize = usize; } else { - if (config_stats) - old_size = isalloc(p); + if (config_stats) { + old_size = isalloc(p, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(p, false); + old_rzsize = u2rz(old_size); + } q = iralloc(p, size, extra, alignment, zero, no_move); if (q == NULL) goto label_err; if (config_stats) - usize = isalloc(q); + usize = isalloc(q, config_prof); if (rsize != NULL) { if (config_stats == false) - usize = isalloc(q); + usize = isalloc(q, config_prof); *rsize = usize; } } @@ -1436,6 +1516,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) ta->deallocated += old_size; } UTRACE(p, size, q); + JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); return (ALLOCM_SUCCESS); label_err: if (no_move) { @@ -1462,10 +1543,10 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) assert(malloc_initialized || IS_INITIALIZER); if (config_ivsalloc) - sz = ivsalloc(ptr); + sz = ivsalloc(ptr, config_prof); else { assert(ptr != NULL); - sz = isalloc(ptr); + sz = isalloc(ptr, config_prof); } assert(rsize != NULL); *rsize = sz; @@ -1479,21 +1560,25 @@ int je_dallocm(void *ptr, int flags) { size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); UTRACE(ptr, 0, 0); - if (config_stats) - usize = isalloc(ptr); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); if (config_prof && opt_prof) { - if (config_stats == false) - usize = isalloc(ptr); + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); prof_free(ptr, usize); } if (config_stats) thread_allocated_tsd_get()->deallocated += usize; - idalloc(ptr); + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); } diff --git a/src/quarantine.c b/src/quarantine.c new file mode 100644 index 00000000..89a25c6a --- /dev/null +++ b/src/quarantine.c @@ -0,0 +1,163 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +typedef struct quarantine_s quarantine_t; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + void *objs[1]; /* Dynamically sized ring buffer. */ +}; + +static void quarantine_cleanup(void *arg); + +malloc_tsd_data(static, quarantine, quarantine_t *, NULL) +malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static quarantine_t *quarantine_init(size_t lg_maxobjs); +static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); + +/******************************************************************************/ + +static quarantine_t * +quarantine_init(size_t lg_maxobjs) +{ + quarantine_t *quarantine; + + quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(void *))); + if (quarantine == NULL) + return (NULL); + quarantine->curbytes = 0; + quarantine->curobjs = 0; + quarantine->first = 0; + quarantine->lg_maxobjs = lg_maxobjs; + + quarantine_tsd_set(&quarantine); + + return (quarantine); +} + +static quarantine_t * +quarantine_grow(quarantine_t *quarantine) +{ + quarantine_t *ret; + + ret = quarantine_init(quarantine->lg_maxobjs + 1); + if (ret == NULL) + return (quarantine); + + ret->curbytes = quarantine->curbytes; + if (quarantine->first + quarantine->curobjs < (ZU(1) << + quarantine->lg_maxobjs)) { + /* objs ring buffer data are contiguous. */ + memcpy(ret->objs, &quarantine->objs[quarantine->first], + quarantine->curobjs * sizeof(void *)); + ret->curobjs = quarantine->curobjs; + } else { + /* objs ring buffer data wrap around. */ + size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + quarantine->first; + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * + sizeof(void *)); + ret->curobjs = ncopy; + if (quarantine->curobjs != 0) { + memcpy(&ret->objs[ret->curobjs], quarantine->objs, + quarantine->curobjs - ncopy); + } + } + + return (ret); +} + +static void +quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +{ + + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { + void *ptr = quarantine->objs[quarantine->first]; + size_t usize = isalloc(ptr, config_prof); + idalloc(ptr); + quarantine->curbytes -= usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); + } +} + +void +quarantine(void *ptr) +{ + quarantine_t *quarantine; + size_t usize = isalloc(ptr, config_prof); + + assert(config_fill); + assert(opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL && (quarantine = + quarantine_init(LG_MAXOBJS_INIT)) == NULL) { + idalloc(ptr); + return; + } + /* + * Drain one or more objects if the quarantine size limit would be + * exceeded by appending ptr. + */ + if (quarantine->curbytes + usize > opt_quarantine) { + size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine + - usize : 0; + quarantine_drain(quarantine, upper_bound); + } + /* Grow the quarantine ring buffer if it's full. */ + if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) + quarantine = quarantine_grow(quarantine); + /* quarantine_grow() must free a slot if it fails to grow. */ + assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); + /* Append ptr if its size doesn't exceed the quarantine size. */ + if (quarantine->curbytes + usize <= opt_quarantine) { + size_t offset = (quarantine->first + quarantine->curobjs) & + ((ZU(1) << quarantine->lg_maxobjs) - 1); + quarantine->objs[offset] = ptr; + quarantine->curbytes += usize; + quarantine->curobjs++; + if (opt_junk) + memset(ptr, 0x5a, usize); + } else { + assert(quarantine->curbytes == 0); + idalloc(ptr); + } +} + +static void +quarantine_cleanup(void *arg) +{ + quarantine_t *quarantine = *(quarantine_t **)arg; + + if (quarantine != NULL) { + quarantine_drain(quarantine, 0); + idalloc(quarantine); + } +} + +bool +quarantine_boot(void) +{ + + assert(config_fill); + + if (quarantine_tsd_boot()) + return (true); + + return (false); +} diff --git a/src/stats.c b/src/stats.c index 0cd70b0d..4cad214f 100644 --- a/src/stats.c +++ b/src/stats.c @@ -382,8 +382,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) OPT_WRITE_BOOL(junk) + OPT_WRITE_SIZE_T(quarantine) + OPT_WRITE_BOOL(redzone) OPT_WRITE_BOOL(zero) OPT_WRITE_BOOL(utrace) + OPT_WRITE_BOOL(valgrind) OPT_WRITE_BOOL(xmalloc) OPT_WRITE_BOOL(tcache) OPT_WRITE_SSIZE_T(lg_tcache_max) diff --git a/src/tcache.c b/src/tcache.c index 99a657b6..be26b59c 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -75,6 +75,10 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, + &arena_bin_info[binind], true); + } arena_dalloc_bin(arena, chunk, ptr, mapelm); } else { /* @@ -298,7 +302,7 @@ tcache_destroy(tcache_t *tcache) malloc_mutex_unlock(&tcache->arena->lock); } - tcache_size = arena_salloc(tcache); + tcache_size = arena_salloc(tcache, false); if (tcache_size <= SMALL_MAXCLASS) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; diff --git a/src/zone.c b/src/zone.c index a50c129c..cde5d49a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr) * not work in practice, we must check all pointers to assure that they * reside within a mapped chunk before determining size. */ - return (ivsalloc(ptr)); + return (ivsalloc(ptr, config_prof)); } static void * @@ -87,7 +87,7 @@ static void zone_free(malloc_zone_t *zone, void *ptr) { - if (ivsalloc(ptr) != 0) { + if (ivsalloc(ptr, config_prof) != 0) { je_free(ptr); return; } @@ -99,7 +99,7 @@ static void * zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(ptr) != 0) + if (ivsalloc(ptr, config_prof) != 0) return (je_realloc(ptr, size)); return (realloc(ptr, size)); @@ -122,8 +122,8 @@ static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { - if (ivsalloc(ptr) != 0) { - assert(ivsalloc(ptr) == size); + if (ivsalloc(ptr, config_prof) != 0) { + assert(ivsalloc(ptr, config_prof) == size); je_free(ptr); return; } From 5ff709c264e52651de25b788692c62ff1f6f389c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Apr 2012 18:13:45 -0700 Subject: [PATCH 110/205] Normalize aligned allocation algorithms. Normalize arena_palloc(), chunk_alloc_mmap_slow(), and chunk_recycle_dss() to use the same algorithm for trimming over-allocation. Add the ALIGNMENT_ADDR2BASE(), ALIGNMENT_ADDR2OFFSET(), and ALIGNMENT_CEILING() macros, and use them where appropriate. Remove the run_size_p parameter from sa2u(). Fix a potential deadlock in chunk_recycle_dss() that was introduced by eae269036c9f702d9fa9be497a1a2aa1be13a29e (Add alignment support to chunk_alloc()). --- include/jemalloc/internal/arena.h | 3 +- .../jemalloc/internal/jemalloc_internal.h.in | 65 ++++++------- include/jemalloc/internal/private_namespace.h | 1 - src/arena.c | 52 +++++----- src/chunk_dss.c | 94 +++++++++++-------- src/chunk_mmap.c | 50 ++++------ src/ckh.c | 6 +- src/jemalloc.c | 22 ++--- 8 files changed, 138 insertions(+), 155 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f52fac42..3790818c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -407,8 +407,7 @@ void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, - size_t alignment, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a16e5e27..57895fb3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -293,6 +293,18 @@ static const bool config_ivsalloc = #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions @@ -499,7 +511,7 @@ void jemalloc_postfork_child(void); malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); +size_t sa2u(size_t size, size_t alignment); arena_t *choose_arena(arena_t *arena); #endif @@ -531,10 +543,12 @@ s2u(size_t size) * specified size and alignment. */ JEMALLOC_INLINE size_t -sa2u(size_t size, size_t alignment, size_t *run_size_p) +sa2u(size_t size, size_t alignment) { size_t usize; + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + /* * Round size up to the nearest multiple of alignment. * @@ -549,7 +563,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 144 | 10100000 | 32 * 192 | 11000000 | 64 */ - usize = (size + (alignment - 1)) & (-alignment); + usize = ALIGNMENT_CEILING(size, alignment); /* * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. @@ -592,24 +606,10 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. */ - if (usize >= alignment) - run_size = usize + alignment - PAGE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE, which in the case of overflow leaves - * us with a very large run_size. That causes the - * first conditional below to fail, which means that - * the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE; - } - if (run_size_p != NULL) - *run_size_p = run_size; - + run_size = usize + alignment - PAGE; if (run_size <= arena_maxclass) return (PAGE_CEILING(usize)); return (CHUNK_CEILING(usize)); @@ -685,32 +685,21 @@ ipalloc(size_t usize, size_t alignment, bool zero) void *ret; assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); + assert(usize == sa2u(usize, alignment)); if (usize <= arena_maxclass && alignment <= PAGE) ret = arena_malloc(NULL, usize, zero, true); else { - size_t run_size JEMALLOC_CC_SILENCE_INIT(0); - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(NULL), usize, run_size, - alignment, zero); + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(NULL), usize, alignment, + zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); else ret = huge_palloc(usize, alignment, zero); } - assert(((uintptr_t)ret & (alignment - 1)) == 0); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); return (ret); } @@ -818,7 +807,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ if (no_move) return (NULL); - usize = sa2u(size + extra, alignment, NULL); + usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -826,7 +815,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (extra == 0) return (NULL); /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a962192c..fca65950 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -55,7 +55,6 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) diff --git a/src/arena.c b/src/arena.c index 1d4f61ea..1a108db5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1418,48 +1418,38 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Only handles large allocations that require more than page alignment. */ void * -arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, - bool zero) +arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) { void *ret; - size_t offset; + size_t alloc_size, leadsize, trailsize; + arena_run_t *run; arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); alignment = PAGE_CEILING(alignment); + alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); - if (ret == NULL) { + run = arena_run_alloc(arena, alloc_size, true, zero); + if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & PAGE_MASK) == 0); - assert(offset < alloc_size); - if (offset == 0) - arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false); - else { - size_t leadsize, trailsize; - - leadsize = alignment - offset; - if (leadsize > 0) { - arena_run_trim_head(arena, chunk, ret, alloc_size, - alloc_size - leadsize); - ret = (void *)((uintptr_t)ret + leadsize); - } - - trailsize = alloc_size - leadsize - size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - arena_run_trim_tail(arena, chunk, ret, size + trailsize, - size, false); - } + leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - + (uintptr_t)run; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)run + leadsize); + if (leadsize != 0) { + arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - + leadsize); + } + if (trailsize != 0) { + arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + false); } if (config_stats) { @@ -1950,7 +1940,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * copying. */ if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); + size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -1962,7 +1952,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); /* Try again, this time without extra. */ if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); + size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index ccd86b91..7c034092 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -36,51 +36,71 @@ static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); static void * chunk_recycle_dss(size_t size, size_t alignment, bool *zero) { - extent_node_t *node, key; + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; cassert(config_dss); + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); key.addr = NULL; - key.size = size + alignment - chunksize; + key.size = alloc_size; malloc_mutex_lock(&dss_mtx); node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node != NULL) { - size_t offset = (size_t)((uintptr_t)(node->addr) & (alignment - - 1)); - void *ret; - if (offset > 0) - offset = alignment - offset; - ret = (void *)((uintptr_t)(node->addr) + offset); - - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - if (offset > 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } - if (alignment - chunksize > offset) { - if (offset > 0) - node = base_node_alloc(); - /* Insert the trailing space as a smaller chunk. */ - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = alignment - chunksize - offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } else if (offset == 0) - base_node_dealloc(node); - + if (node == NULL) { malloc_mutex_unlock(&dss_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&dss_chunks_szad, node); + extent_tree_ad_remove(&dss_chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop dss_mtx in order to avoid deadlock, + * and if node allocation fails, deallocate the result + * before returning an error. + */ + malloc_mutex_unlock(&dss_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc_dss(ret, size); + return (NULL); + } + malloc_mutex_lock(&dss_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; } malloc_mutex_unlock(&dss_mtx); - return (NULL); + if (node != NULL) + base_node_dealloc(node); + if (*zero) + memset(ret, 0, size); + return (ret); } void * @@ -129,8 +149,8 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) * recycled for later use. */ cpad = (void *)((uintptr_t)dss_max + gap_size); - ret = (void *)(((uintptr_t)dss_max + (alignment - 1)) & - ~(alignment - 1)); + ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, + alignment); cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 37dad204..6cbf094a 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -19,7 +19,6 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned); -static void *chunk_alloc_mmap_internal(size_t size, size_t alignment); /******************************************************************************/ @@ -76,34 +75,28 @@ pages_unmap(void *addr, size_t size) static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { - void *ret; - size_t offset; + void *ret, *pages; + size_t alloc_size, leadsize, trailsize; + alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ - if (size + chunksize <= size) + if (alloc_size < size) return (NULL); - - ret = pages_map(NULL, size + alignment); - if (ret == NULL) + pages = pages_map(NULL, alloc_size); + if (pages == NULL) return (NULL); - - /* Clean up unneeded leading/trailing space. */ - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); - if (offset != 0) { + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)pages + leadsize); + if (leadsize != 0) { /* Note that mmap() returned an unaligned mapping. */ unaligned = true; - - /* Leading space. */ - pages_unmap(ret, alignment - offset); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), alignment); + pages_unmap(pages, leadsize); } + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); /* * If mmap() returned an aligned mapping, reset mmap_unaligned so that @@ -118,8 +111,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) return (ret); } -static void * -chunk_alloc_mmap_internal(size_t size, size_t alignment) +void * +chunk_alloc_mmap(size_t size, size_t alignment) { void *ret; @@ -158,7 +151,7 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) if (ret == NULL) return (NULL); - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { bool mu = true; mmap_unaligned_tsd_set(&mu); @@ -185,13 +178,6 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) return (ret); } -void * -chunk_alloc_mmap(size_t size, size_t alignment) -{ - - return (chunk_alloc_mmap_internal(size, alignment)); -} - void chunk_dealloc_mmap(void *chunk, size_t size) { diff --git a/src/ckh.c b/src/ckh.c index 169fc0d4..742a950b 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -264,7 +264,7 @@ ckh_grow(ckh_t *ckh) size_t usize; lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) { ret = true; goto label_return; @@ -309,7 +309,7 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); @@ -382,7 +382,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); if (usize == 0) { ret = true; goto label_return; diff --git a/src/jemalloc.c b/src/jemalloc.c index 237dd589..8e10c556 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -862,7 +862,7 @@ imemalign(void **memptr, size_t alignment, size_t size, goto label_return; } - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) { result = NULL; ret = ENOMEM; @@ -878,9 +878,9 @@ imemalign(void **memptr, size_t alignment, size_t size, if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, - alignment, NULL) != 0); + alignment) != 0); result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment, NULL), alignment, false); + alignment), alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); @@ -1343,8 +1343,8 @@ JEMALLOC_INLINE void * iallocm(size_t usize, size_t alignment, bool zero) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); if (alignment != 0) return (ipalloc(usize, alignment, zero)); @@ -1372,7 +1372,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1384,7 +1384,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment, NULL); + alignment); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); if (p == NULL) @@ -1454,7 +1454,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment, NULL); + sa2u(size+extra, alignment); prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p, true); if (config_valgrind && opt_valgrind) @@ -1466,8 +1466,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * Use minimum usize to determine whether promotion may happen. */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= SMALL_MAXCLASS) { + && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) + <= SMALL_MAXCLASS) { q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); @@ -1596,7 +1596,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) if (malloc_init()) return (ALLOCM_ERR_OOM); - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) return (ALLOCM_ERR_OOM); From 83c324acd8bd5f32e0ce9b4d3df2f1a0ae46f487 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 10:13:03 +0200 Subject: [PATCH 111/205] Use a stub replacement and disable dss when sbrk is not supported --- configure.ac | 8 ++++++++ include/jemalloc/jemalloc_defs.h.in | 3 +++ src/chunk_dss.c | 11 +++++++++++ 3 files changed, 22 insertions(+) diff --git a/configure.ac b/configure.ac index a272ecd0..739cfa9a 100644 --- a/configure.ac +++ b/configure.ac @@ -678,6 +678,14 @@ fi ], [enable_dss="0"] ) +dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support. +AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"]) +if test "x$have_sbrk" = "x1" ; then + AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ]) +else + enable_dss="0" +fi + if test "x$enable_dss" = "x1" ; then AC_DEFINE([JEMALLOC_DSS], [ ]) fi diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 7770a7af..28fe5e9a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -108,6 +108,9 @@ # define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) #endif +/* Defined if sbrk() is supported. */ +#undef JEMALLOC_HAVE_SBRK + /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 7c034092..5fb6a73d 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,6 +3,17 @@ /******************************************************************************/ /* Data. */ +#ifndef JEMALLOC_HAVE_SBRK +void * +sbrk(intptr_t increment) +{ + + not_implemented(); + + return (NULL); +} +#endif + /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. From b8325f9cb031285585567cdeb1338aeca4185f6c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 15:15:35 +0200 Subject: [PATCH 112/205] Call base_boot before chunk_boot0 Chunk_boot0 calls rtree_new, which calls base_alloc, which locks the base_mtx mutex. That mutex is initialized in base_boot. --- src/jemalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 8e10c556..1622937d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -626,12 +626,12 @@ malloc_init_hard(void) } } - if (chunk_boot0()) { + if (base_boot()) { malloc_mutex_unlock(&init_lock); return (true); } - if (base_boot()) { + if (chunk_boot0()) { malloc_mutex_unlock(&init_lock); return (true); } From 927893b4784f732a2b8006a0490293ab18d0c2cf Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 17:21:58 +0200 Subject: [PATCH 113/205] Remove bogus dependency test/bitmap.c #includes src/bitmap.c, which is correctly detected by gcc -MM, but building test/bitmap.o doesn't require src/bitmap.o. --- Makefile.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8aa94253..3bdfeaf3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -132,9 +132,6 @@ build_doc: $(DOCS) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" -# Automatic dependency generation misses #include "*.c". -@objroot@test/bitmap.o : @objroot@src/bitmap.o - @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) From fd5c36466d0665b26bd96badc1416b61bd55802e Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 12 Apr 2012 17:19:20 +0200 Subject: [PATCH 114/205] Use -MT options to build dependency files --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 3bdfeaf3..59ac103f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,12 +104,12 @@ build_doc: $(DOCS) @objroot@src/%.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @objroot@src/%.pic.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) @@ -130,7 +130,7 @@ build_doc: $(DOCS) @objroot@test/%.o: @srcroot@test/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" + $(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) From c751b1c2b03db82a3041ffdd110cd90cb50aa34f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 17:08:45 -0700 Subject: [PATCH 115/205] Re-silence -MM compiler invocations. --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 59ac103f..8828d7f7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -104,12 +104,12 @@ build_doc: $(DOCS) @objroot@src/%.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @objroot@src/%.pic.o: @srcroot@src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - $(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) @@ -130,7 +130,7 @@ build_doc: $(DOCS) @objroot@test/%.o: @srcroot@test/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $< - $(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -I@objroot@test -MT $@ -o $(@:%.o=%.d) $< @objroot@test/%: @objroot@test/%.o \ @objroot@lib/libjemalloc@install_suffix@.$(SO) From d6abcbb14b8d1c8beb1c61bfc5a24cb54578b85c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 17:09:54 -0700 Subject: [PATCH 116/205] Always disable redzone by default. Always disable redzone by default, even when --enable-debug is specified. The memory overhead for redzones can be substantial, which makes this feature something that should only be opted into. --- doc/jemalloc.xml.in | 4 +--- src/jemalloc.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index a47c7635..ee60c98a 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -865,9 +865,7 @@ for (i = 0; i < nbins; i++) { url="http://http://valgrind.org/">Valgrind, which needs redzones in order to do effective buffer overflow/underflow detection. This option is intended for debugging and will impact performance - negatively. This option is disabled by default unless - is specified during configuration, in - which case it is enabled by default. + negatively. This option is disabled by default. diff --git a/src/jemalloc.c b/src/jemalloc.c index 1622937d..9b8b52d2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -14,17 +14,15 @@ const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); bool opt_abort = true; # ifdef JEMALLOC_FILL bool opt_junk = true; -bool opt_redzone = true; # else bool opt_junk = false; -bool opt_redzone = false; # endif #else bool opt_abort = false; bool opt_junk = false; -bool opt_redzone = false; #endif size_t opt_quarantine = ZU(0); +bool opt_redzone = false; bool opt_utrace = false; bool opt_valgrind = false; bool opt_xmalloc = false; From 7ca0fdfb85b2a9fc7a112e158892c098e004385b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Apr 2012 20:20:58 -0700 Subject: [PATCH 117/205] Disable munmap() if it causes VM map holes. Add a configure test to determine whether common mmap()/munmap() patterns cause VM map holes, and only use munmap() to discard unused chunks if the problem does not exist. Unify the chunk caching for mmap and dss. Fix options processing to limit lg_chunk to be large enough that redzones will always fit. --- configure.ac | 67 +++++ include/jemalloc/internal/chunk_dss.h | 1 - include/jemalloc/internal/chunk_mmap.h | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 7 + include/jemalloc/internal/private_namespace.h | 1 - include/jemalloc/jemalloc_defs.h.in | 15 ++ src/arena.c | 10 +- src/chunk.c | 170 ++++++++++++- src/chunk_dss.c | 230 +----------------- src/chunk_mmap.c | 7 +- src/jemalloc.c | 11 +- 11 files changed, 277 insertions(+), 244 deletions(-) diff --git a/configure.ac b/configure.ac index 739cfa9a..8d20659f 100644 --- a/configure.ac +++ b/configure.ac @@ -817,6 +817,73 @@ else AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) fi +dnl Determine whether common sequences of mmap()/munmap() calls will leave +dnl semi-permanent VM map holes. If so, disable munmap. +AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], + [je_cv_vmmap_hole], + AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[#include +#include +#include + +#define NPTRS 11 +#define MMAP_SIZE ((size_t)(1U << 22)) + +static void * +do_mmap(size_t size) +{ + void *ret; + + ret = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, + 0); + if (ret == MAP_FAILED) { + fprintf(stderr, "mmap() error\n"); + exit(1); + } + + return (ret); +} + +static void +do_munmap(void *ptr, size_t size) +{ + if (munmap(ptr, size) == -1) { + fprintf(stderr, "munmap() error\n"); + exit(1); + } +} +]], +[[ + void *p0, *p1, *p2, *p3, *p4; + FILE *f; + + f = fopen("conftest.out", "w"); + if (f == NULL) + exit(1); + + p0 = do_mmap(MMAP_SIZE); + p1 = do_mmap(MMAP_SIZE); + p2 = do_mmap(MMAP_SIZE); + do_munmap(p1, MMAP_SIZE); + p3 = do_mmap(MMAP_SIZE * 2); + do_munmap(p3, MMAP_SIZE * 2); + p4 = do_mmap(MMAP_SIZE); + if (p4 != p1) { + fprintf(stderr, "Hoped for %p, got %p\n", p1, p4); + fprintf(stderr, "%p..%p..%p..%p..%p\n", p0, p1, p2, p3, p4); + fprintf(f, "yes\n"); + } else + fprintf(f, "no\n"); + + fclose(f); + return (0); +]])], + [je_cv_vmmap_hole=`cat conftest.out`], + [je_cv_vmmap_hole=unknown])) +if test "x$je_cv_vmmap_hole" = "xno" ; then + AC_DEFINE([JEMALLOC_MUNMAP], [ ]) +fi + dnl ============================================================================ dnl jemalloc configuration. dnl diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h index 16ea9542..6e2643b2 100644 --- a/include/jemalloc/internal/chunk_dss.h +++ b/include/jemalloc/internal/chunk_dss.h @@ -11,7 +11,6 @@ void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); -bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); void chunk_dss_prefork(void); void chunk_dss_postfork_parent(void); diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 148fefef..04e86af9 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -10,7 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *chunk_alloc_mmap(size_t size, size_t alignment); -void chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 57895fb3..aa21aa5d 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -102,6 +102,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; static const bool config_stats = #ifdef JEMALLOC_STATS true diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index fca65950..742d1605 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -57,7 +57,6 @@ #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) #define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 28fe5e9a..b6e5593b 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -172,6 +172,14 @@ /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is automatically disabled if configuration determines + * that common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#undef JEMALLOC_MUNMAP + /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS @@ -209,6 +217,13 @@ */ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +#else +# error "No method defined for purging unused dirty pages." +#endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/src/arena.c b/src/arena.c index 1a108db5..989034d4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -676,16 +676,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (config_debug) ndirty -= npages; -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define MADV_PURGE MADV_FREE -#else -# error "No method defined for purging unused dirty pages." -#endif madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE), MADV_PURGE); -#undef MADV_PURGE + (npages << LG_PAGE), JEMALLOC_MADV_PURGE); if (config_stats) nmadvise++; } diff --git a/src/chunk.c b/src/chunk.c index b0641294..67e0d503 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -9,6 +9,15 @@ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t chunks_szad; +static extent_tree_t chunks_ad; + rtree_t *chunks_rtree; /* Various chunk-related settings. */ @@ -19,6 +28,84 @@ size_t map_bias; size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void chunk_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle(size_t size, size_t alignment, bool *zero) +{ + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; + + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + key.addr = NULL; + key.size = alloc_size; + malloc_mutex_lock(&chunks_mtx); + node = extent_tree_szad_nsearch(&chunks_szad, &key); + if (node == NULL) { + malloc_mutex_unlock(&chunks_mtx); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&chunks_szad, node); + extent_tree_ad_remove(&chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop chunks_mtx in order to avoid + * deadlock, and if node allocation fails, deallocate + * the result before returning an error. + */ + malloc_mutex_unlock(&chunks_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc(ret, size, true); + return (NULL); + } + malloc_mutex_lock(&chunks_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + malloc_mutex_unlock(&chunks_mtx); + + if (node != NULL) + base_node_dealloc(node); +#ifdef JEMALLOC_PURGE_MADVISE_FREE + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } +#endif + return (ret); +} /* * If the caller specifies (*zero == false), it is still possible to receive @@ -35,6 +122,9 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert((size & chunksize_mask) == 0); assert((alignment & chunksize_mask) == 0); + ret = chunk_recycle(size, alignment, zero); + if (ret != NULL) + goto label_return; if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) @@ -76,6 +166,80 @@ label_return: return (ret); } +static void +chunk_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + madvise(chunk, size, JEMALLOC_MADV_PURGE); + + xnode = NULL; + malloc_mutex_lock(&chunks_mtx); + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within chunks_ad, + * so only remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on chunks_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&chunks_mtx); + xnode = base_node_alloc(); + if (xnode == NULL) + return; + malloc_mutex_lock(&chunks_mtx); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused due to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within chunks_ad, so only + * remove/insert node from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, prev); + extent_tree_ad_remove(&chunks_ad, prev); + + extent_tree_szad_remove(&chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&chunks_szad, node); + + base_node_dealloc(prev); + } + malloc_mutex_unlock(&chunks_mtx); +} + void chunk_dealloc(void *chunk, size_t size, bool unmap) { @@ -94,9 +258,9 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (config_dss && chunk_dealloc_dss(chunk, size) == false) + if (chunk_dealloc_mmap(chunk, size) == false) return; - chunk_dealloc_mmap(chunk, size); + chunk_record(chunk, size); } } @@ -117,6 +281,8 @@ chunk_boot0(void) } if (config_dss && chunk_dss_boot()) return (true); + extent_tree_szad_new(&chunks_szad); + extent_tree_ad_new(&chunks_ad); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index 5fb6a73d..b05509a5 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -3,17 +3,6 @@ /******************************************************************************/ /* Data. */ -#ifndef JEMALLOC_HAVE_SBRK -void * -sbrk(intptr_t increment) -{ - - not_implemented(); - - return (NULL); -} -#endif - /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. @@ -27,92 +16,18 @@ static void *dss_prev; /* Current upper limit on DSS addresses. */ static void *dss_max; -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t dss_chunks_szad; -static extent_tree_t dss_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_dss(size_t size, size_t alignment, bool *zero); -static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); - /******************************************************************************/ +#ifndef JEMALLOC_HAVE_SBRK static void * -chunk_recycle_dss(size_t size, size_t alignment, bool *zero) +sbrk(intptr_t increment) { - void *ret; - extent_node_t *node; - extent_node_t key; - size_t alloc_size, leadsize, trailsize; - cassert(config_dss); + not_implemented(); - alloc_size = size + alignment - chunksize; - /* Beware size_t wrap-around. */ - if (alloc_size < size) - return (NULL); - key.addr = NULL; - key.size = alloc_size; - malloc_mutex_lock(&dss_mtx); - node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node == NULL) { - malloc_mutex_unlock(&dss_mtx); - return (NULL); - } - leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - - (uintptr_t)node->addr; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)node->addr + leadsize); - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - if (leadsize != 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = leadsize; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - node = NULL; - } - if (trailsize != 0) { - /* Insert the trailing space as a smaller chunk. */ - if (node == NULL) { - /* - * An additional node is required, but - * base_node_alloc() can cause a new base chunk to be - * allocated. Drop dss_mtx in order to avoid deadlock, - * and if node allocation fails, deallocate the result - * before returning an error. - */ - malloc_mutex_unlock(&dss_mtx); - node = base_node_alloc(); - if (node == NULL) { - chunk_dealloc_dss(ret, size); - return (NULL); - } - malloc_mutex_lock(&dss_mtx); - } - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = trailsize; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - node = NULL; - } - malloc_mutex_unlock(&dss_mtx); - - if (node != NULL) - base_node_dealloc(node); - if (*zero) - memset(ret, 0, size); - return (ret); + return (NULL); } +#endif void * chunk_alloc_dss(size_t size, size_t alignment, bool *zero) @@ -123,10 +38,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) assert(size > 0 && (size & chunksize_mask) == 0); assert(alignment > 0 && (alignment & chunksize_mask) == 0); - ret = chunk_recycle_dss(size, alignment, zero); - if (ret != NULL) - return (ret); - /* * sbrk() uses a signed increment argument, so take care not to * interpret a huge allocation request as a negative increment. @@ -177,7 +88,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_dealloc_dss(cpad, cpad_size); + chunk_dealloc(cpad, cpad_size, true); *zero = true; return (ret); } @@ -188,81 +99,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) return (NULL); } -static extent_node_t * -chunk_dealloc_dss_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - cassert(config_dss); - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * dss_chunks_ad, so only remove/insert from/into - * dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on dss_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&dss_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&dss_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&dss_chunks_ad, node); - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&dss_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within dss_chunks_ad, so only - * remove/insert node from/into dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, prev); - extent_tree_ad_remove(&dss_chunks_ad, prev); - - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&dss_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - bool chunk_in_dss(void *chunk) { @@ -281,58 +117,6 @@ chunk_in_dss(void *chunk) return (ret); } -bool -chunk_dealloc_dss(void *chunk, size_t size) -{ - bool ret; - - cassert(config_dss); - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_dss_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Try to shrink the DSS if this chunk is at the end of the - * DSS. The sbrk() call here is subject to a race condition - * with threads that use brk(2) or sbrk(2) directly, but the - * alternative would be to leak memory for the sake of poorly - * designed multi-threaded programs. - */ - if ((void *)((uintptr_t)chunk + size) == dss_max - && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); - - if (node != NULL) { - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - - ret = false; - goto label_return; - } - - ret = true; -label_return: - malloc_mutex_unlock(&dss_mtx); - return (ret); -} - bool chunk_dss_boot(void) { @@ -344,8 +128,6 @@ chunk_dss_boot(void) dss_base = sbrk(0); dss_prev = dss_base; dss_max = dss_base; - extent_tree_szad_new(&dss_chunks_szad); - extent_tree_ad_new(&dss_chunks_ad); return (false); } diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 6cbf094a..e11cc0e6 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -178,11 +178,14 @@ chunk_alloc_mmap(size_t size, size_t alignment) return (ret); } -void +bool chunk_dealloc_mmap(void *chunk, size_t size) { - pages_unmap(chunk, size); + if (config_munmap) + pages_unmap(chunk, size); + + return (config_munmap == false); } bool diff --git a/src/jemalloc.c b/src/jemalloc.c index 9b8b52d2..0decd8a8 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -501,11 +501,14 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_abort, abort) /* - * Chunks always require at least one * header page, - * plus one data page. + * Chunks always require at least one header page, plus + * one data page in the absence of redzones, or three + * pages in the presence of redzones. In order to + * simplify options processing, fix the limit based on + * config_fill. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE+1, - (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE + + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, -1, (sizeof(size_t) << 3) - 1) From 1dbfd5a209bed1a3d4bacaa31726a4179a4f1215 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 14 Apr 2012 00:16:02 -0700 Subject: [PATCH 118/205] Add/remove missing/cruft entries to/from private_namespace.h. --- include/jemalloc/internal/private_namespace.h | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 742d1605..a69482b6 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -25,15 +25,25 @@ #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) +#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) +#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_sub_u JEMALLOC_N(atomic_sub_u) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) #define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define atomic_sub_z JEMALLOC_N(atomic_sub_z) #define base_alloc JEMALLOC_N(base_alloc) #define base_boot JEMALLOC_N(base_boot) +#define base_calloc JEMALLOC_N(base_calloc) #define base_node_alloc JEMALLOC_N(base_node_alloc) #define base_node_dealloc JEMALLOC_N(base_node_dealloc) #define base_postfork_child JEMALLOC_N(base_postfork_child) @@ -55,7 +65,8 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot JEMALLOC_N(chunk_boot) +#define chunk_boot0 JEMALLOC_N(chunk_boot0) +#define chunk_boot1 JEMALLOC_N(chunk_boot1) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) @@ -139,25 +150,33 @@ #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) #define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) #define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) #define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) #define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) -#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) #define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) #define mb_write JEMALLOC_N(mb_write) +#define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) +#define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) +#define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) +#define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) +#define mutex_boot JEMALLOC_N(mutex_boot) #define opt_abort JEMALLOC_N(opt_abort) #define opt_junk JEMALLOC_N(opt_junk) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) @@ -193,11 +212,18 @@ #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) -#define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) +#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) +#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) #define pthread_create JEMALLOC_N(pthread_create) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) +#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) +#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) #define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) @@ -213,23 +239,36 @@ #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) #define stats_print JEMALLOC_N(stats_print) -#define szone2ozone JEMALLOC_N(szone2ozone) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) #define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) #define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_boot0 JEMALLOC_N(tcache_boot0) +#define tcache_boot1 JEMALLOC_N(tcache_boot1) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) #define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) +#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) +#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) -#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define tcache_tls JEMALLOC_N(tcache_tls) -#define thread_allocated_get JEMALLOC_N(thread_allocated_get) -#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) +#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) +#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) +#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) +#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) #define u2rz JEMALLOC_N(u2rz) From a398a6b46e53035a4ef660b4c7a1c406f3abe645 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 12:39:45 -0700 Subject: [PATCH 119/205] Remove configure test cruft. --- configure.ac | 1 - 1 file changed, 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8d20659f..5d0c7262 100644 --- a/configure.ac +++ b/configure.ac @@ -826,7 +826,6 @@ AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], #include #include -#define NPTRS 11 #define MMAP_SIZE ((size_t)(1U << 22)) static void * From 59ae2766af88bad07ac721c4ee427b171e897bcb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 17:52:27 -0700 Subject: [PATCH 120/205] Add the --disable-munmap option. Add the --disable-munmap option, remove the configure test that attempted to detect the VM allocation quirk known to exist on Linux x86[_64], and make --disable-munmap implicit on Linux. --- INSTALL | 7 ++++ configure.ac | 85 ++++++++++----------------------------------- doc/jemalloc.xml.in | 10 ++++++ src/ctl.c | 3 ++ 4 files changed, 39 insertions(+), 66 deletions(-) diff --git a/INSTALL b/INSTALL index a5942ec8..04671a1d 100644 --- a/INSTALL +++ b/INSTALL @@ -108,6 +108,13 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. +--disable-munmap + Disable virtual memory deallocation via munmap(2); instead keep track of + the virtual memory for later use. munmap() is disabled by default (i.e. + --disable-munmap is implied) on Linux, which has a quirk in its virtual + memory allocation algorithm that causes semi-permanent VM map holes under + normal jemalloc operation. + --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/configure.ac b/configure.ac index 5d0c7262..90235f7b 100644 --- a/configure.ac +++ b/configure.ac @@ -206,6 +206,7 @@ dnl dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the dnl definitions need to be seen before any headers are included, which is a pain dnl to make happen otherwise. +default_munmap="1" case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS -fno-common" @@ -230,6 +231,7 @@ case "${host}" in AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) RPATH="-Wl,-rpath," + default_munmap="0" ;; *-*-netbsd*) AC_MSG_CHECKING([ABI]) @@ -667,6 +669,22 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) +dnl Enable VM deallocation via munmap() by default. +AC_ARG_ENABLE([munmap], + [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], +[if test "x$enable_munmap" = "xno" ; then + enable_munmap="0" +else + enable_munmap="1" +fi +], +[enable_munmap="${default_munmap}"] +) +if test "x$enable_munmap" = "x1" ; then + AC_DEFINE([JEMALLOC_MUNMAP], [ ]) +fi +AC_SUBST([enable_munmap]) + dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -817,72 +835,6 @@ else AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT]) fi -dnl Determine whether common sequences of mmap()/munmap() calls will leave -dnl semi-permanent VM map holes. If so, disable munmap. -AC_CACHE_CHECK([whether munmap() leaves semi-permanent VM map holes], - [je_cv_vmmap_hole], - AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[#include -#include -#include - -#define MMAP_SIZE ((size_t)(1U << 22)) - -static void * -do_mmap(size_t size) -{ - void *ret; - - ret = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, - 0); - if (ret == MAP_FAILED) { - fprintf(stderr, "mmap() error\n"); - exit(1); - } - - return (ret); -} - -static void -do_munmap(void *ptr, size_t size) -{ - if (munmap(ptr, size) == -1) { - fprintf(stderr, "munmap() error\n"); - exit(1); - } -} -]], -[[ - void *p0, *p1, *p2, *p3, *p4; - FILE *f; - - f = fopen("conftest.out", "w"); - if (f == NULL) - exit(1); - - p0 = do_mmap(MMAP_SIZE); - p1 = do_mmap(MMAP_SIZE); - p2 = do_mmap(MMAP_SIZE); - do_munmap(p1, MMAP_SIZE); - p3 = do_mmap(MMAP_SIZE * 2); - do_munmap(p3, MMAP_SIZE * 2); - p4 = do_mmap(MMAP_SIZE); - if (p4 != p1) { - fprintf(stderr, "Hoped for %p, got %p\n", p1, p4); - fprintf(stderr, "%p..%p..%p..%p..%p\n", p0, p1, p2, p3, p4); - fprintf(f, "yes\n"); - } else - fprintf(f, "no\n"); - - fclose(f); - return (0); -]])], - [je_cv_vmmap_hole=`cat conftest.out`], - [je_cv_vmmap_hole=unknown])) -if test "x$je_cv_vmmap_hole" = "xno" ; then - AC_DEFINE([JEMALLOC_MUNMAP], [ ]) -fi - dnl ============================================================================ dnl jemalloc configuration. dnl @@ -1198,6 +1150,7 @@ AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([munmap : ${enable_munmap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) AC_MSG_RESULT([tls : ${enable_tls}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index ee60c98a..98d0ba41 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -650,6 +650,16 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.munmap + (bool) + r- + + was specified during + build configuration. + + config.prof diff --git a/src/ctl.c b/src/ctl.c index 6be40561..a6a02cc5 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -50,6 +50,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) @@ -176,6 +177,7 @@ static const ctl_node_t config_node[] = { {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, @@ -1087,6 +1089,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) From 9ef7f5dc34ff02f50d401e41c8d9a4a928e7c2aa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Apr 2012 18:16:48 -0700 Subject: [PATCH 121/205] Start preparing ChangeLog for 3.0.0 release. Start preparing ChangeLog for 3.0.0 release. Additional fixes and changes are yet to come, so this is not a complete ChangeLog. --- ChangeLog | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/ChangeLog b/ChangeLog index 326ee7a9..8f6edd5a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,78 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git +* 3.0.0 (XXX not yet released) + + Although this version adds some major new features, the primary focus is on + internal code cleanup that facilitates maintainability and portability, most + of which is not reflected in the ChangeLog. This is the first release to + incorporate substantial contributions from numerous other developers, and the + result is a more broadly useful allocator (see the git revision history for + contribution details). Note that the license has been unified, thanks to + Facebook granting a license under the same terms as the other copyright + holders (see COPYING). + + New features: + - Implement Valgrind support, redzones, and quarantine. + - Add support for additional operating systems: + + FreeBSD + + Mac OS X Lion + - Add support for additional architectures: + + MIPS + + SH4 + + Tilera + - Add support for cross compiling. + - Add nallocm(), which rounds a request size up to the nearest size class + without actually allocating. + - Implement aligned_alloc() (blame C11). + - Add the --disable-munmap option, and make it the default on Linux. + - Add the --with-mangling option. + - Add the --disable-experimental option. + - Add the "thread.tcache.enabled" mallctl. + + Incompatible changes: + - Enable stats by default. + - Enable fill by default. + - Disable lazy locking by default. + - Rename the "tcache.flush" mallctl to "thread.tcache.flush". + - Rename the "arenas.pagesize" mallctl to "arenas.page". + + Removed features: + - Remove the swap feature, including the "config.swap", "swap.avail", + "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls. + - Remove highruns statistics, including the + "stats.arenas..bins..highruns" and + "stats.arenas..lruns..highruns" mallctls. + - As part of small size class refactoring, remove the "opt.lg_[qc]space_max", + "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and + "arenas.[tqcs]bins" mallctls. + - Remove the "arenas.chunksize" mallctl. + - Remove the "opt.lg_prof_tcmax" option. + - Remove the "opt.lg_prof_bt_max" option. + - Remove the "opt.lg_tcache_gc_sweep" option. + - Remove the --disable-tiny option, including the "config.tiny" mallctl. + - Remove the --enable-dynamic-page-shift configure option. + - Remove the --enable-sysv configure option. + + Bug fixes: + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. + - Fix a statistics-related bug in the "thread.arena" mallctl that could cause + invalid statistics and crashes. + - Work around TLS dallocation via free() on Linux. This bug could cause + write-after-free memory corruption. + - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. + - Fix realloc(p, 0) to act like free(p). + - Do not enforce minimum alignment in memalign(). + - Check for NULL pointer in malloc_usable_size(). + - Fix bin->runcur management to fix a layout policy bug. This bug did not + affect correctness. + - Fix a bug in choose_arena_hard() that potentially caused more arenas to be + initialized than necessary. + - Add missing "opt.lg_tcache_max" mallctl implementation. + - Use glibc allocator hooks to make mixed allocator usage less likely. + - Fix build issues for --disable-tcache. + * 2.2.5 (November 14, 2011) Bug fixes: From f5e0f526ec2079bf9b734f500df1cbc090f33b39 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:20 +0200 Subject: [PATCH 122/205] Remove -dynamic CFLAG on OSX It is a linker flag, so it doesn't make sense in CFLAGS, and it's the default when invoking the linker for shared libraries. --- Makefile.in | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8828d7f7..64798571 100644 --- a/Makefile.in +++ b/Makefile.in @@ -21,9 +21,6 @@ MANDIR := $(DESTDIR)@MANDIR@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include CFLAGS := @CFLAGS@ -ifeq (macho, @abi@) -CFLAGS += -dynamic -endif LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ From 6f2ed70f5a33c4d6bae93b7f18d6b636843a6495 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:21 +0200 Subject: [PATCH 123/205] Use $(LIBS) instead of -lpthread when linking tests This will allow linking for win32 without pthreads more easily --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 64798571..8cd0418c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -133,9 +133,9 @@ build_doc: $(DOCS) @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) ifneq (@RPATH@, ) - $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread + $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ $(LIBS) else - $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread + $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ $(LIBS) endif build_lib_shared: $(DSOS) From 2d04f5e5ffb607e270356e6fa258e708dafb5b86 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:22 +0200 Subject: [PATCH 124/205] Use make variables instead of preprocessing --- Makefile.in | 202 ++++++++++++++++++++++++++++------------------------ 1 file changed, 109 insertions(+), 93 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8cd0418c..1c30f328 100644 --- a/Makefile.in +++ b/Makefile.in @@ -17,9 +17,13 @@ INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ LIBDIR := $(DESTDIR)@LIBDIR@ DATADIR := $(DESTDIR)@DATADIR@ MANDIR := $(DESTDIR)@MANDIR@ +srcroot := @srcroot@ +objroot := @objroot@ +abs_srcroot := @abs_srcroot@ +abs_objroot := @abs_objroot@ # Build parameters. -CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include +CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ @@ -31,42 +35,54 @@ else WL_SONAME := soname endif REV := @rev@ -ifeq (macho, @abi@) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib +install_suffix := @install_suffix@ +ABI := @abi@ +XSLTPROC := @XSLTPROC@ +AUTOCONF := @AUTOCONF@ +RPATH := @RPATH@ +cfghdrs_in := @cfghdrs_in@ +cfghdrs_out := @cfghdrs_out@ +cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_out := @cfgoutputs_out@ +enable_autogen := @enable_autogen@ +enable_experimental := @enable_experimental@ + +ifeq (macho, $(ABI)) +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib else TEST_LIBRARY_PATH := endif # Lists of files. -BINS := @srcroot@bin/pprof @objroot@bin/jemalloc.sh -CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ - @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h -CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ - @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ - @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ - @srcroot@src/ckh.c @srcroot@src/ctl.c @srcroot@src/extent.c \ - @srcroot@src/hash.c @srcroot@src/huge.c @srcroot@src/mb.c \ - @srcroot@src/mutex.c @srcroot@src/prof.c @srcroot@src/quarantine.c \ - @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c \ - @srcroot@src/util.c @srcroot@src/tsd.c -ifeq (macho, @abi@) -CSRCS += @srcroot@src/zone.c +BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ + $(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h +CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ + $(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \ + $(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ + $(srcroot)src/util.c $(srcroot)src/tsd.c +ifeq (macho, $(ABI)) +CSRCS += $(srcroot)src/zone.c endif -STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a -DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ - @objroot@lib/libjemalloc@install_suffix@.$(SO) \ - @objroot@lib/libjemalloc@install_suffix@_pic.a -MAN3 := @objroot@doc/jemalloc@install_suffix@.3 -DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml -DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) -DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) +STATIC_LIBS := $(objroot)lib/libjemalloc$(install_suffix).a +DSOS := $(objroot)lib/libjemalloc$(install_suffix).$(SO).$(REV) \ + $(objroot)lib/libjemalloc$(install_suffix).$(SO) \ + $(objroot)lib/libjemalloc$(install_suffix)_pic.a +MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 +DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/aligned_alloc.c @srcroot@test/allocated.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/thread_arena.c \ - @srcroot@test/thread_tcache_enabled.c -ifeq (@enable_experimental@, 1) -CTESTS += @srcroot@test/allocm.c @srcroot@test/rallocm.c +CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \ + $(srcroot)test/bitmap.c $(srcroot)test/mremap.c \ + $(srcroot)test/posix_memalign.c $(srcroot)test/thread_arena.c \ + $(srcroot)test/thread_tcache_enabled.c +ifeq ($(enable_experimental), 1) +CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c endif .PHONY: all dist doc_html doc_man doc @@ -74,18 +90,18 @@ endif .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) +.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.o) # Default target. all: build dist: build_doc -@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl - @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< +$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl + $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl - @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< +$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl + $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) build_doc_man: $(DOCS_MAN3) @@ -94,16 +110,16 @@ build_doc: $(DOCS) # # Include generated dependency files. # --include $(CSRCS:@srcroot@%.c=@objroot@%.d) --include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) --include $(CTESTS:@srcroot@%.c=@objroot@%.d) +-include $(CSRCS:$(srcroot)%.c=$(objroot)%.d) +-include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) +-include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) -@objroot@src/%.o: @srcroot@src/%.c +$(objroot)src/%.o: $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< -@objroot@src/%.pic.o: @srcroot@src/%.c +$(objroot)src/%.pic.o: $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< @@ -112,30 +128,30 @@ build_doc: $(DOCS) @mkdir -p $(@D) ln -sf $( @objroot@$${t}.out 2>&1; \ - if test -e "@srcroot@$${t}.exp"; then \ - diff -w -u @srcroot@$${t}.exp \ - @objroot@$${t}.out >/dev/null 2>&1; \ + $(TEST_LIBRARY_PATH) $${t} $(abs_srcroot) $(abs_objroot) \ + > $(objroot)$${t}.out 2>&1; \ + if test -e "$(srcroot)$${t}.exp"; then \ + diff -w -u $(srcroot)$${t}.exp \ + $(objroot)$${t}.out >/dev/null 2>&1; \ fail=$$?; \ if test "$${fail}" -eq "1" ; then \ failures=`expr $${failures} + 1`; \ @@ -217,49 +233,49 @@ check: tests echo "Failures: $${failures}/$${total}"' clean: - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.d) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.d) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.out) rm -f $(DSOS) $(STATIC_LIBS) distclean: clean - rm -rf @objroot@autom4te.cache - rm -f @objroot@config.log - rm -f @objroot@config.status - rm -f @objroot@config.stamp - rm -f @cfghdrs_out@ - rm -f @cfgoutputs_out@ + rm -rf $(objroot)autom4te.cache + rm -f $(objroot)config.log + rm -f $(objroot)config.status + rm -f $(objroot)config.stamp + rm -f $(cfghdrs_out) + rm -f $(cfgoutputs_out) relclean: distclean - rm -f @objroot@configure - rm -f @srcroot@VERSION + rm -f $(objroot)configure + rm -f $(srcroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) #=============================================================================== # Re-configuration rules. -ifeq (@enable_autogen@, 1) -@srcroot@configure : @srcroot@configure.ac - cd ./@srcroot@ && @AUTOCONF@ +ifeq ($(enable_autogen), 1) +$(srcroot)configure : $(srcroot)configure.ac + cd ./$(srcroot) && $(AUTOCONF) -@objroot@config.status : @srcroot@configure - ./@objroot@config.status --recheck +$(objroot)config.status : $(srcroot)configure + ./$(objroot)config.status --recheck -@srcroot@config.stamp.in : @srcroot@configure.ac - echo stamp > @srcroot@config.stamp.in +$(srcroot)config.stamp.in : $(srcroot)configure.ac + echo stamp > $(srcroot)config.stamp.in -@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure - ./@objroot@config.status +$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure + ./$(objroot)config.status @touch $@ # There must be some action in order for make to re-read Makefile when it is # out of date. -@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp +$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp @true endif From 72ca7220f21ef32f17d12cfde1bd9732d56fb872 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:23 +0200 Subject: [PATCH 125/205] Use echo instead of cat in loops in size_classes.sh This avoids fork/exec()ing in loops, as echo is a builtin, and makes size_classes.sh much faster (from > 10s to < 0.2s on mingw on my machine). --- include/jemalloc/internal/size_classes.sh | 32 ++++++++--------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index 3d236136..29c80c1f 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -35,10 +35,8 @@ while [ ${lg_q} -le ${lg_qmax} ] ; do while [ ${lg_t} -le ${lg_q} ] ; do lg_p=${lg_pmin} while [ ${lg_p} -le ${lg_pmax} ] ; do - cat < Date: Mon, 16 Apr 2012 16:30:24 +0200 Subject: [PATCH 126/205] Add variables for library prefix, and static library, object and executable suffixes This makes hacking on Makefile easier. --- Makefile.in | 58 +++++++++++++++++++++++++++++----------------------- configure.ac | 8 ++++++++ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/Makefile.in b/Makefile.in index 1c30f328..146f57fd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,6 +29,10 @@ LDFLAGS := @LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ +O := @o@ +A := @a@ +EXE := @exe@ +LIB := @lib@ ifeq (macho, @abi@) WL_SONAME := dylib_install_name else @@ -53,6 +57,8 @@ else TEST_LIBRARY_PATH := endif +LIBJEMALLOC := $(LIB)jemalloc$(install_suffix) + # Lists of files. BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \ @@ -68,10 +74,10 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif -STATIC_LIBS := $(objroot)lib/libjemalloc$(install_suffix).a -DSOS := $(objroot)lib/libjemalloc$(install_suffix).$(SO).$(REV) \ - $(objroot)lib/libjemalloc$(install_suffix).$(SO) \ - $(objroot)lib/libjemalloc$(install_suffix)_pic.a +STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) +DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) \ + $(objroot)lib/$(LIBJEMALLOC).$(SO) \ + $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) @@ -90,7 +96,7 @@ endif .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.o) +.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) # Default target. all: build @@ -114,39 +120,39 @@ build_doc: $(DOCS) -include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) -include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) -$(objroot)src/%.o: $(srcroot)src/%.c +$(objroot)src/%.$(O): $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)src/%.pic.o: $(srcroot)src/%.c +$(objroot)src/%.pic.$(O): $(srcroot)src/%.c @mkdir -p $(@D) $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.o=%.d) $< + @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< %.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) ln -sf $( $(objroot)$${t}.out 2>&1; \ if test -e "$(srcroot)$${t}.exp"; then \ diff -w -u $(srcroot)$${t}.exp \ @@ -233,12 +239,12 @@ check: tests echo "Failures: $${failures}/$${total}"' clean: - rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.o) - rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.o) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) + rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.d) rm -f $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) - rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%) - rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.o) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%$(EXE)) + rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.d) rm -f $(CTESTS:$(srcroot)%.c=$(objroot)%.out) rm -f $(DSOS) $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 90235f7b..55efcea0 100644 --- a/configure.ac +++ b/configure.ac @@ -195,6 +195,10 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +o="o" +a="a" +exe= +lib="lib" dnl Heap profiling uses the log(3) function. LIBS="$LIBS -lm" @@ -277,6 +281,10 @@ AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) AC_SUBST([so]) +AC_SUBST([o]) +AC_SUBST([a]) +AC_SUBST([exe]) +AC_SUBST([lib]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], From fa08da752bf91c146f77fff59b4ed09b42633260 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:25 +0200 Subject: [PATCH 127/205] Limit the number of flags directly given to the linker, and refactor rpath This will make things easier for MSVC support. --- Makefile.in | 17 +++++------------ configure.ac | 11 +++++------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/Makefile.in b/Makefile.in index 146f57fd..ef8f7175 100644 --- a/Makefile.in +++ b/Makefile.in @@ -33,23 +33,20 @@ O := @o@ A := @a@ EXE := @exe@ LIB := @lib@ -ifeq (macho, @abi@) -WL_SONAME := dylib_install_name -else -WL_SONAME := soname -endif REV := @rev@ install_suffix := @install_suffix@ ABI := @abi@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ -RPATH := @RPATH@ +_RPATH = @RPATH@ +RPATH = $(if $(1),$(call _RPATH,$(1))) cfghdrs_in := @cfghdrs_in@ cfghdrs_out := @cfghdrs_out@ cfgoutputs_in := @cfgoutputs_in@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ +DSO_LDFLAGS = @DSO_LDFLAGS@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -136,7 +133,7 @@ $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) : $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) @mkdir -p $(@D) - $(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=$(RPATH)%) -o $@ $+ $(LDFLAGS) $(LIBS) + $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) -o $@ $+ $(LDFLAGS) $(LIBS) $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) @mkdir -p $(@D) @@ -154,11 +151,7 @@ $(objroot)test/%.$(O): $(srcroot)test/%.c $(objroot)test/%$(EXE): $(objroot)test/%.$(O) \ $(objroot)lib/$(LIBJEMALLOC).$(SO) @mkdir -p $(@D) -ifneq ($(RPATH), ) - $(CC) -o $@ $< $(RPATH)$(objroot)lib -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) -else - $(CC) -o $@ $< -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) -endif + $(CC) -o $@ $< $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 55efcea0..3a7a2457 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,8 @@ o="o" a="a" exe= lib="lib" +DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' +RPATH='-Wl,-rpath,$(1)' dnl Heap profiling uses the log(3) function. LIBS="$LIBS -lm" @@ -220,12 +222,12 @@ case "${host}" in LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" force_tls="0" + DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' ;; *-*-freebsd*) CFLAGS="$CFLAGS" abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - RPATH="-Wl,-rpath," force_lazy_lock="1" ;; *-*-linux*) @@ -234,7 +236,6 @@ case "${host}" in abi="elf" AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) AC_DEFINE([JEMALLOC_THREADED_INIT], [ ]) - RPATH="-Wl,-rpath," default_munmap="0" ;; *-*-netbsd*) @@ -250,12 +251,11 @@ case "${host}" in [abi="aout"]) AC_MSG_RESULT([$abi]) AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) - RPATH="-Wl,-rpath," ;; *-*-solaris2*) CFLAGS="$CFLAGS" abi="elf" - RPATH="-Wl,-R," + RPATH='-Wl,-R,$(1)' dnl Solaris needs this for sigwait(). CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" LIBS="$LIBS -lposix4 -lsocket -lnsl" @@ -269,12 +269,10 @@ case "${host}" in LD_PRELOAD_VAR="LDR_PRELOAD" fi abi="xcoff" - RPATH="-Wl,-rpath," ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" - RPATH="-Wl,-rpath," ;; esac AC_SUBST([abi]) @@ -285,6 +283,7 @@ AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([lib]) +AC_SUBST([DSO_LDFLAGS]) JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], From 45f208e112fcb82e0c98d572fc34259d65d6b6c1 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 16 Apr 2012 16:30:26 +0200 Subject: [PATCH 128/205] Replace fprintf with malloc_printf in tests. --- Makefile.in | 6 ++- .../jemalloc/internal/jemalloc_internal.h.in | 3 ++ test/aligned_alloc.c | 18 +++---- test/allocated.c | 18 +++---- test/allocm.c | 54 +++++++++---------- test/bitmap.c | 10 +--- test/jemalloc_test.h.in | 1 + test/mremap.c | 12 ++--- test/posix_memalign.c | 18 +++---- test/rallocm.c | 52 +++++++++--------- test/thread_arena.c | 16 +++--- test/thread_tcache_enabled.c | 8 +-- 12 files changed, 108 insertions(+), 108 deletions(-) diff --git a/Makefile.in b/Makefile.in index ef8f7175..0dfddea0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -148,10 +148,12 @@ $(objroot)test/%.$(O): $(srcroot)test/%.c $(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(objroot)test -o $@ $< @$(CC) -MM $(CPPFLAGS) -I$(objroot)test -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)test/%$(EXE): $(objroot)test/%.$(O) \ +$(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) + +$(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) \ $(objroot)lib/$(LIBJEMALLOC).$(SO) @mkdir -p $(@D) - $(CC) -o $@ $< $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) + $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index aa21aa5d..51d40fb8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,3 +1,5 @@ +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H #include #include #include @@ -868,3 +870,4 @@ malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, #undef JEMALLOC_H_INLINES /******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 2a95604f..81b8f933 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -20,14 +20,14 @@ main(void) unsigned i; void *p, *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Test error conditions. */ alignment = 0; errno = 0; p = aligned_alloc(alignment, 1); if (p != NULL || errno != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } @@ -36,7 +36,7 @@ main(void) errno = 0; p = aligned_alloc(alignment + 1, 1); if (p != NULL || errno != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); } @@ -52,7 +52,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); } @@ -67,7 +67,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); } @@ -81,7 +81,7 @@ main(void) errno = 0; p = aligned_alloc(alignment, size); if (p != NULL || errno != ENOMEM) { - fprintf(stderr, + malloc_printf( "Expected error for aligned_alloc(&p, %zu, %zu)\n", alignment, size); } @@ -93,14 +93,14 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { for (i = 0; i < NITER; i++) { ps[i] = aligned_alloc(alignment, size); if (ps[i] == NULL) { - fprintf(stderr, + malloc_printf( "Error for size %zu (%#zx): %s\n", size, size, strerror(errno)); exit(1); @@ -118,6 +118,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/allocated.c b/test/allocated.c index 921ab3ae..81cd4ca9 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -27,7 +27,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -39,7 +39,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -53,7 +53,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -65,7 +65,7 @@ thread_start(void *arg) #endif goto label_return; } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); exit(1); } @@ -73,7 +73,7 @@ thread_start(void *arg) p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); exit(1); } @@ -108,13 +108,13 @@ main(void) int ret = 0; pthread_t thread; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); thread_start(NULL); if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -124,7 +124,7 @@ main(void) if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -133,6 +133,6 @@ main(void) thread_start(NULL); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/allocm.c b/test/allocm.c index 3aa0fd23..c6bc6f83 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -19,52 +19,52 @@ main(void) unsigned i; void *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); sz = 42; nsz = 0; r = nallocm(&nsz, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); abort(); } rsz = 0; r = allocm(&p, &rsz, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (rsz < sz) - fprintf(stderr, "Real size smaller than expected\n"); + malloc_printf("Real size smaller than expected\n"); if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); r = allocm(&p, NULL, sz, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); abort(); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); if (dallocm(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); + malloc_printf("Unexpected dallocm() error\n"); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x8000000000000000); @@ -76,19 +76,19 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); #if LG_SIZEOF_PTR == 3 alignment = UINT64_C(0x4000000000000000); @@ -100,11 +100,11 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected nallocm() error\n"); + malloc_printf("Unexpected nallocm() error\n"); rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } @@ -118,19 +118,19 @@ main(void) nsz = 0; r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for nallocm(&nsz, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } rsz = 0; r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment)); if (r == ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "Expected error for allocm(&p, %zu, %#x)\n", sz, ALLOCM_ALIGN(alignment)); } if (nsz != rsz) - fprintf(stderr, "nallocm()/allocm() rsize mismatch\n"); + malloc_printf("nallocm()/allocm() rsize mismatch\n"); for (i = 0; i < NITER; i++) ps[i] = NULL; @@ -139,7 +139,7 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (sz = 1; sz < 3 * alignment && sz < (1U << 31); sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -148,7 +148,7 @@ main(void) r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "nallocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); @@ -158,24 +158,24 @@ main(void) r = allocm(&ps[i], &rsz, sz, ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, + malloc_printf( "allocm() error for size %zu" " (%#zx): %d\n", sz, sz, r); exit(1); } if (rsz < sz) { - fprintf(stderr, + malloc_printf( "Real size smaller than" " expected\n"); } if (nsz != rsz) { - fprintf(stderr, + malloc_printf( "nallocm()/allocm() rsize" " mismatch\n"); } if ((uintptr_t)p & (alignment-1)) { - fprintf(stderr, + malloc_printf( "%p inadequately aligned for" " alignment: %zu\n", p, alignment); } @@ -193,6 +193,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/bitmap.c b/test/bitmap.c index adfaacfe..ee9b1ecc 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -7,12 +7,6 @@ * */ #include -/* - * Directly include the bitmap code, since it isn't exposed outside - * libjemalloc. - */ -#include "../src/bitmap.c" - #if (LG_BITMAP_MAXBITS > 12) # define MAXBITS 4500 #else @@ -144,7 +138,7 @@ test_bitmap_sfu(void) int main(void) { - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); test_bitmap_size(); test_bitmap_init(); @@ -152,6 +146,6 @@ main(void) test_bitmap_unset(); test_bitmap_sfu(); - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 0c48895e..58fa08e4 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -4,3 +4,4 @@ * have a different name. */ #include "jemalloc/jemalloc@install_suffix@.h" +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/test/mremap.c b/test/mremap.c index cac3bd82..84c03491 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -14,12 +14,12 @@ main(void) size_t sz, lg_chunk, chunksize, i; char *p, *q; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); sz = sizeof(lg_chunk); if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) { assert(err != ENOENT); - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; goto label_return; @@ -28,7 +28,7 @@ main(void) p = (char *)malloc(chunksize); if (p == NULL) { - fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); + malloc_printf("malloc(%zu) --> %p\n", chunksize, p); ret = 1; goto label_return; } @@ -36,7 +36,7 @@ main(void) q = (char *)realloc(p, chunksize * 2); if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, + malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2, q); ret = 1; goto label_return; @@ -49,7 +49,7 @@ main(void) q = (char *)realloc(p, chunksize); if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); + malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q); ret = 1; goto label_return; } @@ -61,6 +61,6 @@ main(void) ret = 0; label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/posix_memalign.c b/test/posix_memalign.c index 0ea35c89..e1302df2 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -21,13 +21,13 @@ main(void) int err; void *p, *ps[NITER]; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Test error conditions. */ for (alignment = 0; alignment < sizeof(void *); alignment++) { err = posix_memalign(&p, alignment, 1); if (err != EINVAL) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } @@ -37,7 +37,7 @@ main(void) alignment <<= 1) { err = posix_memalign(&p, alignment + 1, 1); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); } @@ -52,7 +52,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -66,7 +66,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -79,7 +79,7 @@ main(void) #endif err = posix_memalign(&p, alignment, size); if (err == 0) { - fprintf(stderr, + malloc_printf( "Expected error for posix_memalign(&p, %zu, %zu)\n", alignment, size); } @@ -91,7 +91,7 @@ main(void) alignment <= MAXALIGN; alignment <<= 1) { total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); + malloc_printf("Alignment: %zu\n", alignment); for (size = 1; size < 3 * alignment && size < (1U << 31); size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { @@ -99,7 +99,7 @@ main(void) err = posix_memalign(&ps[i], alignment, size); if (err) { - fprintf(stderr, + malloc_printf( "Error for size %zu (%#zx): %s\n", size, size, strerror(err)); exit(1); @@ -117,6 +117,6 @@ main(void) } } - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/rallocm.c b/test/rallocm.c index 9c0df403..18db5eec 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -15,7 +15,7 @@ main(void) size_t sz, tsz; int r; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); /* Get page size. */ { @@ -26,51 +26,51 @@ main(void) r = allocm(&p, &sz, 42, 0); if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); + malloc_printf("Unexpected allocm() error\n"); abort(); } q = p; r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_ERR_NOT_MOVED) - fprintf(stderr, "Unexpected rallocm() result\n"); + malloc_printf("Unexpected rallocm() result\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + malloc_printf("Unexpected size change: %zu --> %zu\n", sz, tsz); } q = p; r = rallocm(&q, &tsz, sz + 5, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q == p) - fprintf(stderr, "Expected object move\n"); + malloc_printf("Expected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -78,11 +78,11 @@ main(void) r = rallocm(&q, &tsz, pagesize*2, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q == p) - fprintf(stderr, "Expected object move\n"); + malloc_printf("Expected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -90,9 +90,9 @@ main(void) r = rallocm(&q, &tsz, pagesize*4, 0, 0); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } p = q; @@ -100,28 +100,28 @@ main(void) r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } sz = tsz; r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE); if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); + malloc_printf("Unexpected rallocm() error\n"); if (q != p) - fprintf(stderr, "Unexpected object move\n"); + malloc_printf("Unexpected object move\n"); if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", + malloc_printf("Expected size change: %zu --> %zu\n", sz, tsz); } sz = tsz; dallocm(p, 0); - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (0); } diff --git a/test/thread_arena.c b/test/thread_arena.c index 9b7b2ddc..e443b712 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -20,14 +20,14 @@ thread_start(void *arg) p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); return (void *)1; } size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind, sizeof(main_arena_ind)))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } @@ -35,7 +35,7 @@ thread_start(void *arg) size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); return (void *)1; } @@ -55,18 +55,18 @@ main(void) pthread_t threads[NTHREADS]; unsigned i; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); p = malloc(1); if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); + malloc_printf("%s(): Error in malloc()\n", __func__); ret = 1; goto label_return; } size = sizeof(arena_ind); if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + malloc_printf("%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1; goto label_return; @@ -75,7 +75,7 @@ main(void) for (i = 0; i < NTHREADS; i++) { if (pthread_create(&threads[i], NULL, thread_start, (void *)&arena_ind) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; @@ -86,6 +86,6 @@ main(void) pthread_join(threads[i], (void *)&ret); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 0a3e45a9..59b76a27 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -79,13 +79,13 @@ main(void) int ret = 0; pthread_t thread; - fprintf(stderr, "Test begin\n"); + malloc_printf("Test begin\n"); thread_start(NULL); if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -95,7 +95,7 @@ main(void) if (pthread_create(&thread, NULL, thread_start, NULL) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + malloc_printf("%s(): Error in pthread_create()\n", __func__); ret = 1; goto label_return; } @@ -104,6 +104,6 @@ main(void) thread_start(NULL); label_return: - fprintf(stderr, "Test end\n"); + malloc_printf("Test end\n"); return (ret); } From b57d3ec571c6551231be62b7bf92c084a8c8291c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 13:17:54 -0700 Subject: [PATCH 129/205] Add atomic(9) implementations of atomic operations. Add atomic(9) implementations of atomic operations. These are used on FreeBSD for non-x86 architectures. --- configure.ac | 25 +++++++++- include/jemalloc/internal/atomic.h | 46 +++++++++++++++++-- .../jemalloc/internal/jemalloc_internal.h.in | 4 ++ include/jemalloc/jemalloc_defs.h.in | 3 ++ 4 files changed, 72 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 3a7a2457..f4c25060 100644 --- a/configure.ac +++ b/configure.ac @@ -980,6 +980,29 @@ if test "x${je_cv_function_ffsl}" != "xyes" ; then AC_MSG_ERROR([Cannot build without ffsl(3)]) fi +dnl ============================================================================ +dnl Check for atomic(9) operations as provided on FreeBSD. + +JE_COMPILABLE([atomic(9)], [ +#include +#include +#include +], [ + { + uint32_t x32 = 0; + volatile uint32_t *x32p = &x32; + atomic_fetchadd_32(x32p, 1); + } + { + unsigned long xlong = 0; + volatile unsigned long *xlongp = &xlong; + atomic_fetchadd_long(xlongp, 1); + } +], [je_cv_atomic9]) +if test "x${je_cv_atomic9}" = "xyes" ; then + AC_DEFINE([JEMALLOC_ATOMIC9]) +fi + dnl ============================================================================ dnl Check for atomic(3) operations as provided on Darwin. @@ -1031,7 +1054,7 @@ AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[ fi ]) -if test "x${je_cv_osatomic}" != "xyes" ; then +if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4) JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8) fi diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index d8f6ca57..016c472a 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -32,7 +32,8 @@ unsigned atomic_sub_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -60,7 +61,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64__) || defined(__x86_64__)) +# elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -87,7 +88,29 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (x); } -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} +# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -101,8 +124,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } -#else -# if (LG_SIZEOF_PTR == 3) +# else # error "Missing implementation for 64-bit atomic operations" # endif #endif @@ -164,6 +186,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} #elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 51d40fb8..905653a2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -161,6 +161,10 @@ static const bool config_ivsalloc = #endif ; +#ifdef JEMALLOC_ATOMIC9 +#include +#endif + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include #endif diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index b6e5593b..90baa355 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -47,6 +47,9 @@ */ #undef CPU_SPINWAIT +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#undef JEMALLOC_ATOMIC9 + /* * Defined if OSAtomic*() functions are available, as provided by Darwin, and * documented in the atomic(3) manual page. From 25a000e89649d9ce5aacc1089408b8b3bafeb5e4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 15:49:30 -0700 Subject: [PATCH 130/205] Update pprof (from gperftools 2.0). --- ChangeLog | 1 + bin/pprof | 839 ++++++++++++++++++++++++++++++++++---------- doc/jemalloc.xml.in | 2 +- 3 files changed, 649 insertions(+), 193 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8f6edd5a..1db47d8c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,7 @@ found in the git revision history: - Add the --with-mangling option. - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. + - Update pprof (from gperftools 2.0). Incompatible changes: - Enable stats by default. diff --git a/bin/pprof b/bin/pprof index 280ddcc8..727eb437 100755 --- a/bin/pprof +++ b/bin/pprof @@ -72,7 +72,7 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "1.7"; +my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a # user-specified location using --tools, from the PPROF_TOOLS @@ -87,13 +87,14 @@ my %obj_tool_map = ( #"addr2line_pdb" => "addr2line-pdb", # ditto #"otool" => "otool", # equivalent of objdump on OS X ); -my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local -my $GV = "gv"; -my $EVINCE = "evince"; # could also be xpdf or perhaps acroread -my $KCACHEGRIND = "kcachegrind"; -my $PS2PDF = "ps2pdf"; +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my $URL_FETCHER = "curl -s"; +my @URL_FETCHER = ("curl", "-s"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -104,7 +105,10 @@ my $GROWTH_PAGE = "/pprof/growth"; my $CONTENTION_PAGE = "/pprof/contention"; my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; -my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#" +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; @@ -122,6 +126,11 @@ my $UNKNOWN_BINARY = "(unknown)"; # 64-bit profiles. To err on the safe size, default to 64-bit here: my $address_length = 16; +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + # A list of paths to search for shared object files my @prefix_list = (); @@ -151,7 +160,8 @@ pprof [options] The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. - For instance: "pprof http://myserver.com:80$HEAP_PAGE". + For instance: + pprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). pprof --symbols Maps addresses to symbol names. In this mode, stdin should be a @@ -162,7 +172,7 @@ pprof --symbols For more help with querying remote servers, including how to add the necessary server-side support code, see this filename (or one like it): - /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html Options: --cum Sort by cumulative data @@ -260,7 +270,7 @@ EOF sub version_string { return < 0) { + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } } if ($main::use_symbol_page || $main::use_symbolized_profile) { @@ -540,7 +552,7 @@ sub Init() { ConfigureObjTools($main::prog) } - # Break the opt_list_prefix into the prefix_list array + # Break the opt_lib_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); # Remove trailing / from the prefixes, in the list to prevent @@ -636,9 +648,9 @@ sub Main() { # Print if (!$main::opt_interactive) { if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); } elsif ($main::opt_list) { - PrintListing($libs, $flat, $cumulative, $main::opt_list); + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); } elsif ($main::opt_text) { # Make sure the output is empty when have nothing to report # (only matters when --heapcheck is given but we must be @@ -646,7 +658,7 @@ sub Main() { if ($total != 0) { printf("Total: %s %s\n", Unparse($total), Units()); } - PrintText($symbols, $flat, $cumulative, $total, -1); + PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { PrintSymbolizedProfile($symbols, $profile, $main::prog); } elsif ($main::opt_callgrind) { @@ -656,7 +668,7 @@ sub Main() { if ($main::opt_gv) { RunGV(TempName($main::next_tmpfile, "ps"), ""); } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); } elsif ($main::opt_web) { my $tmp = TempName($main::next_tmpfile, "svg"); RunWeb($tmp); @@ -705,24 +717,25 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - if (!system("$GV --version >/dev/null 2>&1")) { + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for # postscript files with large dimensions. # TODO: Maybe we should not pass the --noantialias flag # if the gv version is known to work properly without the flag. - system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg); + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); } else { # Old gv version - only supports options that use single dash. - print STDERR "$GV -scale $main::opt_scale\n"; - system("$GV -scale $main::opt_scale " . $fname . $bg); + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); } } sub RunEvince { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - system("$EVINCE " . $fname . $bg); + system(ShellEscape(@EVINCE, $fname) . $bg); } sub RunWeb { @@ -756,8 +769,8 @@ sub RunWeb { sub RunKcachegrind { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; - system("$KCACHEGRIND " . $fname . $bg); + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); } @@ -834,14 +847,14 @@ sub InteractiveCommand { my $ignore; ($routine, $ignore) = ParseInteractiveArgs($3); - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintText($symbols, $flat, $cumulative, $total, $line_limit); + PrintText($symbols, $flat, $cumulative, $line_limit); return 1; } if (m/^\s*callgrind\s*([^ \n]*)/) { @@ -861,21 +874,22 @@ sub InteractiveCommand { return 1; } - if (m/^\s*list\s*(.+)/) { + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); $main::opt_list = 1; my $routine; my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); + ($routine, $ignore) = ParseInteractiveArgs($2); - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintListing($libs, $flat, $cumulative, $routine); + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); return 1; } if (m/^\s*disasm\s*(.+)/) { @@ -886,14 +900,14 @@ sub InteractiveCommand { ($routine, $ignore) = ParseInteractiveArgs($1); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintDisassembly($libs, $flat, $cumulative, $routine, $total); + PrintDisassembly($libs, $flat, $cumulative, $routine); return 1; } if (m/^\s*(gv|web|evince)\s*(.*)/) { @@ -913,7 +927,8 @@ sub InteractiveCommand { ($focus, $ignore) = ParseInteractiveArgs($2); # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); my $reduced = ReduceProfile($symbols, $profile); # Get derived profiles @@ -941,6 +956,7 @@ sub InteractiveCommand { sub ProcessProfile { + my $total_count = shift; my $orig_profile = shift; my $symbols = shift; my $focus = shift; @@ -948,7 +964,6 @@ sub ProcessProfile { # Process current profile to account for various settings my $profile = $orig_profile; - my $total_count = TotalProfile($profile); printf("Total: %s %s\n", Unparse($total_count), Units()); if ($focus ne '') { $profile = FocusProfile($symbols, $profile, $focus); @@ -995,6 +1010,11 @@ Commands: list [routine_regexp] [-ignore1] [-ignore2] Show source listing of routines whose names match "routine_regexp" + weblist [routine_regexp] [-ignore1] [-ignore2] + Displays a source listing of routines whose names match "routine_regexp" + in a web browser. You can click on source lines to view the + corresponding disassembly. + top [--cum] [-ignore1] [-ignore2] top20 [--cum] [-ignore1] [-ignore2] top37 [--cum] [-ignore1] [-ignore2] @@ -1019,8 +1039,8 @@ parameters will be ignored. Further pprof details are available at this location (or one similar): - /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html - /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html ENDOFHELP } @@ -1137,9 +1157,10 @@ sub PrintText { my $symbols = shift; my $flat = shift; my $cumulative = shift; - my $total = shift; my $line_limit = shift; + my $total = TotalProfile($flat); + # Which profile to sort by? my $s = $main::opt_cum ? $cumulative : $flat; @@ -1169,7 +1190,29 @@ sub PrintText { $sym); } $lines++; - last if ($line_limit >= 0 && $lines > $line_limit); + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; } } @@ -1177,13 +1220,16 @@ sub PrintText { sub PrintCallgrind { my $calls = shift; my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + if ($main::opt_interactive) { $filename = shift; print STDERR "Writing callgrind file to '$filename'.\n" } else { $filename = "&STDOUT"; } - open(CG, ">".$filename ); + open(CG, ">$filename"); printf CG ("events: Hits\n\n"); foreach my $call ( map { $_->[0] } sort { $a->[1] cmp $b ->[1] || @@ -1197,11 +1243,14 @@ sub PrintCallgrind { $callee_file, $callee_line, $callee_function ) = ( $1, $2, $3, $5, $6, $7 ); - - printf CG ("fl=$caller_file\nfn=$caller_function\n"); + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); if (defined $6) { - printf CG ("cfl=$callee_file\n"); - printf CG ("cfn=$callee_function\n"); + printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); printf CG ("calls=$count $callee_line\n"); } printf CG ("$caller_line $count\n\n"); @@ -1214,7 +1263,8 @@ sub PrintDisassembly { my $flat = shift; my $cumulative = shift; my $disasm_opts = shift; - my $total = shift; + + my $total = TotalProfile($flat); foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); @@ -1249,10 +1299,10 @@ sub Disassemble { my $end_addr = shift; my $objdump = $obj_tool_map{"objdump"}; - my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . - "--start-address=0x$start_addr " . - "--stop-address=0x$end_addr $prog"); - open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); my @result = (); my $filename = ""; my $linenumber = -1; @@ -1315,13 +1365,33 @@ sub ByName { return ShortFunctionName($a) cmp ShortFunctionName($b); } -# Print source-listing for all all routines that match $main::opt_list +# Print source-listing for all all routines that match $list_opts sub PrintListing { + my $total = shift; my $libs = shift; my $flat = shift; my $cumulative = shift; my $list_opts = shift; + my $html = shift; + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("
%s
Total: %s %s
\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); my $offset = AddressSub($lib->[1], $lib->[3]); @@ -1333,15 +1403,113 @@ sub PrintListing { my $addr = AddressAdd($start_addr, $offset); for (my $i = 0; $i < $length; $i++) { if (defined($cumulative->{$addr})) { - PrintSource($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr); + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); last; } $addr = AddressInc($addr); } } } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; + + + +Pprof listing + + + + +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; + + +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; } # Returns the indentation of the line, if it has any non-whitespace @@ -1355,6 +1523,45 @@ sub Indentation { } } +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + # Print source-listing for one routine sub PrintSource { my $prog = shift; @@ -1364,9 +1571,12 @@ sub PrintSource { my $cumulative = shift; my $start_addr = shift; my $end_addr = shift; + my $html = shift; + my $output = shift; # Disassemble all instructions (just to get line numbers) my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); # Hack 1: assume that the first source file encountered in the # disassembly contains the routine @@ -1379,7 +1589,7 @@ sub PrintSource { } if (!defined($filename)) { print STDERR "no filename found in $routine\n"; - return; + return 0; } # Hack 2: assume that the largest line number from $filename is the @@ -1412,7 +1622,7 @@ sub PrintSource { { if (!open(FILE, "<$filename")) { print STDERR "$filename: $!\n"; - return; + return 0; } my $l = 0; my $first_indentation = -1; @@ -1440,12 +1650,24 @@ sub PrintSource { # Assign all samples to the range $firstline,$lastline, # Hack 4: If an instruction does not occur in the range, its samples # are moved to the next instruction that occurs in the range. - my $samples1 = {}; - my $samples2 = {}; - my $running1 = 0; # Unassigned flat counts - my $running2 = 0; # Unassigned cumulative counts - my $total1 = 0; # Total flat counts - my $total2 = 0; # Total cumulative counts + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line foreach my $e (@instructions) { # Add up counts for all address that fall inside this instruction my $c1 = 0; @@ -1454,6 +1676,38 @@ sub PrintSource { $c1 += GetEntry($flat, $a); $c2 += GetEntry($cumulative, $a); } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("%s" . + ":%d", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + $running1 += $c1; $running2 += $c2; $total1 += $c1; @@ -1468,23 +1722,49 @@ sub PrintSource { AddEntry($samples2, $line, $running2); $running1 = 0; $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } } } # Assign any leftover samples to $lastline AddEntry($samples1, $lastline, $running1); AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } - printf("ROUTINE ====================== %s in %s\n" . - "%6s %6s Total %s (flat / cumulative)\n", - ShortFunctionName($routine), - $filename, - Units(), - Unparse($total1), - Unparse($total2)); + if ($html) { + printf $output ( + "

%s

%s\n
\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
   if (!open(FILE, "<$filename")) {
     print STDERR "$filename: $!\n";
-    return;
+    return 0;
   }
   my $l = 0;
   while () {
@@ -1494,16 +1774,47 @@ sub PrintSource {
         (($l <= $oldlastline + 5) || ($l <= $lastline))) {
       chop;
       my $text = $_;
-      if ($l == $firstline) { printf("---\n"); }
-      printf("%6s %6s %4d: %s\n",
-             UnparseAlt(GetEntry($samples1, $l)),
-             UnparseAlt(GetEntry($samples2, $l)),
-             $l,
-             $text);
-      if ($l == $lastline)  { printf("---\n"); }
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "" . $dis . "";
+        }
+        my $source_class = (($n1 + $n2 > 0) 
+                            ? "livesrc" 
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "%5d " .
+          "%6s %6s %s%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
     };
   }
   close(FILE);
+  if ($html) {
+    print $output "
\n"; + } + return 1; } # Return the source line for the specified file/linenumber. @@ -1646,21 +1957,11 @@ sub PrintDisassembledFunction { # Print disassembly for (my $x = $first_inst; $x <= $last_inst; $x++) { my $e = $instructions[$x]; - my $address = $e->[0]; - $address = AddressSub($address, $offset); # Make relative to section - $address =~ s/^0x//; - $address =~ s/^0*//; - - # Trim symbols - my $d = $e->[3]; - while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) - while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments - printf("%6s %6s %8s: %6s\n", UnparseAlt($flat_count[$x]), UnparseAlt($cum_count[$x]), - $address, - $d); + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); } } } @@ -1706,19 +2007,24 @@ sub PrintDot { # Open DOT output file my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); if ($main::opt_gv) { - $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; } elsif ($main::opt_evince) { - $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; } elsif ($main::opt_ps) { - $output = "| $DOT -Tps2"; + $output = "| $escaped_dot -Tps2"; } elsif ($main::opt_pdf) { - $output = "| $DOT -Tps2 | $PS2PDF - -"; + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; } elsif ($main::opt_web || $main::opt_svg) { # We need to post-process the SVG, so write to a temporary file always. - $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; } elsif ($main::opt_gif) { - $output = "| $DOT -Tgif"; + $output = "| $escaped_dot -Tgif"; } else { $output = ">&STDOUT"; } @@ -1770,7 +2076,7 @@ sub PrintDot { if ($f != $c) { $extra = sprintf("\\rof %s (%s)", Unparse($c), - Percent($c, $overall_total)); + Percent($c, $local_total)); } my $style = ""; if ($main::opt_heapcheck) { @@ -1789,7 +2095,7 @@ sub PrintDot { $node{$a}, $sym, Unparse($f), - Percent($f, $overall_total), + Percent($f, $local_total), $extra, $fs, $style, @@ -1799,10 +2105,12 @@ sub PrintDot { # Get edges and counts per edge my %edge = (); my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); foreach my $k (keys(%{$raw})) { # TODO: omit low %age edges $n = $raw->{$k}; - my @translated = TranslateStack($symbols, $k); + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); for (my $i = 1; $i <= $#translated; $i++) { my $src = $translated[$i]; my $dst = $translated[$i-1]; @@ -2186,6 +2494,50 @@ function handleMouseUp(evt) { EOF } +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + # Return a small number that identifies the argument. # Multiple calls with the same argument will return the same number. # Calls with different arguments will return different numbers. @@ -2202,6 +2554,7 @@ sub ShortIdFor { # Translate a stack of addresses into a stack of symbols sub TranslateStack { my $symbols = shift; + my $fullname_to_shortname_map = shift; my $k = shift; my @addrs = split(/\n/, $k); @@ -2233,6 +2586,9 @@ sub TranslateStack { my $func = $symlist->[$j-2]; my $fileline = $symlist->[$j-1]; my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } if ($j > 2) { $func = "$func (inline)"; } @@ -2319,6 +2675,16 @@ sub UnparseAlt { } } +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + # Return output units sub Units { if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { @@ -2475,6 +2841,13 @@ sub RemoveUninterestingFrames { '__builtin_vec_new', 'operator new', 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', # These mark the beginning/end of our custom sections '__start_google_malloc', '__stop_google_malloc', @@ -2566,9 +2939,11 @@ sub ReduceProfile { my $symbols = shift; my $profile = shift; my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); foreach my $k (keys(%{$profile})) { my $count = $profile->{$k}; - my @translated = TranslateStack($symbols, $k); + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); my @path = (); my %seen = (); $seen{''} = 1; # So that empty keys are skipped @@ -2775,7 +3150,8 @@ sub AddEntries { sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$URL_FETCHER '$url' |"); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); my $line = ; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2832,7 +3208,7 @@ sub SymbolPageURL { sub FetchProgramName() { my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = "$URL_FETCHER '$url'"; + my $command_line = ShellEscape(@URL_FETCHER, $url); open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = ; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2849,7 +3225,7 @@ sub FetchProgramName() { # curl. Redirection happens on borg hosts. sub ResolveRedirectionForCurl { my $url = shift; - my $command_line = "$URL_FETCHER --head '$url'"; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); open(CMDLINE, "$command_line |") or error($command_line); while () { s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2861,18 +3237,18 @@ sub ResolveRedirectionForCurl { return $url; } -# Add a timeout flat to URL_FETCHER +# Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { - my $fetcher = shift; my $timeout = shift; + my @fetcher = shift; if (defined($timeout)) { - if ($fetcher =~ m/\bcurl -s/) { - $fetcher .= sprintf(" --max-time %d", $timeout); - } elsif ($fetcher =~ m/\brpcget\b/) { - $fetcher .= sprintf(" --deadline=%d", $timeout); + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); } } - return $fetcher; + return @fetcher; } # Reads a symbol map from the file handle name given as $1, returning @@ -2932,15 +3308,17 @@ sub FetchSymbols { my $url = SymbolPageURL(); my $command_line; - if ($URL_FETCHER =~ m/\bcurl -s/) { + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); - $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); } else { - $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); } # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. - my $cppfilt = $obj_tool_map{"c++filt"}; - open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); $symbol_map = ReadSymbols(*SYMBOL{IO}); close(SYMBOL); } @@ -2956,8 +3334,8 @@ sub FetchSymbols { my $shortpc = $pc; $shortpc =~ s/^0*//; # Each line may have a list of names, which includes the function - # and also other functions it has inlined. They are separated - # (in PrintSymbolizedFile), by --, which is illegal in function names. + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. my $fullnames; if (defined($symbol_map->{$shortpc})) { $fullnames = $symbol_map->{$shortpc}; @@ -3035,8 +3413,8 @@ sub FetchDynamicProfile { return $real_profile; } - my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); - my $cmd = "$fetcher '$url' > '$tmp_profile'"; + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { @@ -3047,7 +3425,7 @@ sub FetchDynamicProfile { } (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); - (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); print STDERR "Wrote profile to $real_profile\n"; $main::collected_profile = $real_profile; return $main::collected_profile; @@ -3161,7 +3539,7 @@ BEGIN { my $has_q = 0; eval { $has_q = pack("Q", "1") ? 1 : 1; }; if (!$has_q) { - $self->{perl_is_64bit} = 0; + $self->{perl_is_64bit} = 0; } read($self->{file}, $str, 8); if (substr($str, 4, 4) eq chr(0)x4) { @@ -3197,17 +3575,17 @@ BEGIN { # TODO(csilvers): if this is a 32-bit perl, the math below # could end up in a too-large int, which perl will promote # to a double, losing necessary precision. Deal with that. - # Right now, we just die. - my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); if ($self->{unpack_code} eq 'N') { # big-endian - ($lo, $hi) = ($hi, $lo); - } - my $value = $lo + $hi * (2**32); - if (!$self->{perl_is_64bit} && # check value is exactly represented - (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { - ::error("Need a 64-bit perl to process this 64-bit profile.\n"); - } - push(@b64_values, $value); + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); } @$slots = @b64_values; } @@ -3335,7 +3713,7 @@ sub ReadProfile { if (!$main::use_symbolized_profile) { # we have both a binary and symbolized profiles, abort error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . - "a binary arg. Try again without passing\n $prog\n"); + "a binary arg. Try again without passing\n $prog\n"); } # Read the symbol section of the symbolized profile file. $symbols = ReadSymbols(*PROFILE{IO}); @@ -3636,18 +4014,18 @@ sub ReadHeapProfile { # The sampling frequency is the rate of a Poisson process. # This means that the probability of sampling an allocation of # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } } else { # Remote-heap version 1 my $ratio; @@ -3771,19 +4149,19 @@ sub ReadSynchProfile { return $r; } -# Given a hex value in the form "0x1abcd" return "0001abcd" or -# "000000000001abcd", depending on the current address length. -# There's probably a more idiomatic (or faster) way to do this... +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. sub HexExtend { my $addr = shift; - $addr =~ s/^0x//; - - if (length $addr > $address_length) { - printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; } - - return substr("000000000000000".$addr, -$address_length); + return ("0" x $zeros_needed) . $addr; } ##### Symbol extraction ##### @@ -3834,9 +4212,8 @@ sub ParseTextSectionHeaderFromObjdump { my $file_offset; # Get objdump output from the library file to figure out how to # map between mapped addresses and addresses in the library. - my $objdump = $obj_tool_map{"objdump"}; - open(OBJDUMP, "$objdump -h $lib |") - || error("$objdump $lib: $!\n"); + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); while () { s/\r//g; # turn windows-looking lines into unix-looking lines # Idx Name Size VMA LMA File off Algn @@ -3874,9 +4251,8 @@ sub ParseTextSectionHeaderFromOtool { my $file_offset = undef; # Get otool output from the library file to figure out how to # map between mapped addresses and addresses in the library. - my $otool = $obj_tool_map{"otool"}; - open(OTOOL, "$otool -l $lib |") - || error("$otool $lib: $!\n"); + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); my $cmd = ""; my $sectname = ""; my $segname = ""; @@ -4218,18 +4594,18 @@ sub ExtractSymbols { my ($start_pc_index, $finish_pc_index); # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; - $finish_pc_index--) { + $finish_pc_index--) { last if $pcs[$finish_pc_index - 1] le $finish; } # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; - $start_pc_index--) { + $start_pc_index--) { last if $pcs[$start_pc_index - 1] lt $start; } # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, # in case there are overlaps in libraries and the main binary. @{$contained} = splice(@pcs, $start_pc_index, - $finish_pc_index - $start_pc_index); + $finish_pc_index - $start_pc_index); # Map to symbols MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); } @@ -4251,15 +4627,15 @@ sub MapToSymbols { # Figure out the addr2line command to use my $addr2line = $obj_tool_map{"addr2line"}; - my $cmd = "$addr2line -f -C -e $image"; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); if (exists $obj_tool_map{"addr2line_pdb"}) { $addr2line = $obj_tool_map{"addr2line_pdb"}; - $cmd = "$addr2line --demangle -f -C -e $image"; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); } # If "addr2line" isn't installed on the system at all, just use # nm to get what info we can (function names, but not line numbers). - if (system("$addr2line --help >/dev/null 2>&1") != 0) { + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { MapSymbolsWithNM($image, $offset, $pclist, $symbols); return; } @@ -4273,11 +4649,10 @@ sub MapToSymbols { $sep_address = undef; # May be filled in by MapSymbolsWithNM() my $nm_symbols = {}; MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); - # TODO(csilvers): only add '-i' if addr2line supports it. if (defined($sep_address)) { # Only add " -i" to addr2line if the binary supports it. # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { $cmd .= " -i"; } else { $sep_address = undef; # no need for sep_address if we don't support -i @@ -4299,13 +4674,14 @@ sub MapToSymbols { close(ADDRESSES); if ($debug) { print("----\n"); - system("cat $main::tmpfile_sym"); + system("cat", $main::tmpfile_sym); print("----\n"); - system("$cmd <$main::tmpfile_sym"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); print("----\n"); } - open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); my $count = 0; # Index in pclist while () { # Read fullfunction and filelineinfo from next pair of lines @@ -4325,15 +4701,29 @@ sub MapToSymbols { my $pcstr = $pclist->[$count]; my $function = ShortFunctionName($fullfunction); - if ($fullfunction eq '??') { - # See if nm found a symbol - my $nms = $nm_symbols->{$pcstr}; - if (defined($nms)) { + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. $function = $nms->[0]; $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template paramters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } } } - + # Prepend to accumulated symbols for pcstr # (so that caller comes before callee) my $sym = $symbols->{$pcstr}; @@ -4344,7 +4734,7 @@ sub MapToSymbols { unshift(@{$sym}, $function, $filelinenum, $fullfunction); if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } if (!defined($sep_address)) { - # Inlining is off, se this entry ends immediately + # Inlining is off, so this entry ends immediately $count++; } } @@ -4407,6 +4797,31 @@ sub ShortFunctionName { return $function; } +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + ##### Miscellaneous ##### # Find the right versions of the above object tools to use. The @@ -4423,8 +4838,18 @@ sub ConfigureObjTools { # predictably return error status in prod. (-e $prog_file) || error("$prog_file does not exist.\n"); - # Follow symlinks (at least for systems where "file" supports that) - my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + if ($file_type =~ /64-bit/) { # Change $address_length to 16 if the program file is ELF 64-bit. # We can't detect this from many (most?) heap or lock contention @@ -4500,6 +4925,19 @@ sub ConfigureTool { return $path; } +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + sub cleanup { unlink($main::tmpfile_sym); unlink(keys %main::tempnames); @@ -4537,11 +4975,11 @@ sub error { # names match "$regexp" and returns them in a hashtable mapping from # procedure name to a two-element vector of [start address, end address] sub GetProcedureBoundariesViaNm { - my $nm_command = shift; + my $escaped_nm_command = shift; # shell-escaped my $regexp = shift; my $symbol_table = {}; - open(NM, "$nm_command |") || error("$nm_command: $!\n"); + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); my $last_start = "0"; my $routine = ""; while () { @@ -4619,6 +5057,21 @@ sub GetProcedureBoundaries { my $image = shift; my $regexp = shift; + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols my $debugging = DebuggingLibrary($image); if ($debugging) { @@ -4636,28 +5089,29 @@ sub GetProcedureBoundaries { # --demangle and -f. my $demangle_flag = ""; my $cppfilt_flag = ""; - if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { # In this mode, we do "nm --demangle " $demangle_flag = "--demangle"; $cppfilt_flag = ""; - } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { # In this mode, we do "nm | c++filt" - $cppfilt_flag = " | $cppfilt"; + $cppfilt_flag = " | " . ShellEscape($cppfilt); }; my $flatten_flag = ""; - if (system("$nm -f $image >/dev/null 2>&1") == 0) { + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { $flatten_flag = "-f"; } # Finally, in the case $imagie isn't a debug library, we try again with # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. - my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", # 6nm is for Go binaries - "6nm $image 2>/dev/null | sort", + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", ); # If the executable is an MS Windows PDB-format executable, we'll @@ -4665,8 +5119,9 @@ sub GetProcedureBoundaries { # want to use both unix nm and windows-specific nm_pdb, since # PDB-format executables can apparently include dwarf .o files. if (exists $obj_tool_map{"nm_pdb"}) { - my $nm_pdb = $obj_tool_map{"nm_pdb"}; - push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); } foreach my $nm_command (@nm_commands) { diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 98d0ba41..3a6781b3 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1006,7 +1006,7 @@ malloc_conf = "xmalloc:true";]]> option for information on high-water-triggered profile dumping. Profile output is compatible with the included pprof Perl script, which originates from the google-perftools + url="http://code.google.com/p/gperftools/">gperftools package.
From 0b25fe79aaf8840a5acda7e3160a053d42349872 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 17 Apr 2012 16:39:33 -0700 Subject: [PATCH 131/205] Update prof defaults to match common usage. Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). Change the "opt.prof_accum" default from true to false. Add the "opt.prof_final" mallctl, so that "opt.prof_prefix" need not be abused to disable final profile dumping. --- ChangeLog | 3 +++ doc/jemalloc.xml.in | 45 ++++++++++++++++++++------------ include/jemalloc/internal/prof.h | 3 ++- src/ctl.c | 3 +++ src/jemalloc.c | 1 + src/prof.c | 5 ++-- src/stats.c | 1 + 7 files changed, 41 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1db47d8c..1fed914f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,7 @@ found in the git revision history: - Add the --with-mangling option. - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. + - Add the "opt.prof_final" mallctl. - Update pprof (from gperftools 2.0). Incompatible changes: @@ -42,6 +43,8 @@ found in the git revision history: - Disable lazy locking by default. - Rename the "tcache.flush" mallctl to "thread.tcache.flush". - Rename the "arenas.pagesize" mallctl to "arenas.page". + - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB). + - Change the "opt.prof_accum" default from true to false. Removed features: - Remove the swap feature, including the "config.swap", "swap.avail", diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 3a6781b3..f78f423c 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -986,14 +986,7 @@ malloc_conf = "xmalloc:true";]]> []
Memory profiling enabled/disabled. If enabled, profile - memory allocation activity, and use an - atexit - 3 function to dump final memory - usage to a file named according to the pattern - <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the opt.prof_prefix - option. See the opt.prof_active option for on-the-fly activation/deactivation. See the opt.lg_prof_sample @@ -1001,12 +994,13 @@ malloc_conf = "xmalloc:true";]]> linkend="opt.prof_accum">opt.prof_accum option for control of cumulative sample reporting. See the opt.lg_prof_interval - option for information on interval-triggered profile dumping, and the - opt.prof_gdump - option for information on high-water-triggered profile dumping. - Profile output is compatible with the included pprof - Perl script, which originates from the gperftools + option for information on interval-triggered profile dumping, the opt.prof_gdump + option for information on high-water-triggered profile dumping, and the + opt.prof_final + option for final profile dumping. Profile output is compatible with + the included pprof Perl script, which originates + from the gperftools package.
@@ -1051,8 +1045,8 @@ malloc_conf = "xmalloc:true";]]> Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity. Increasing the sampling interval decreases profile fidelity, but also decreases the - computational overhead. The default sample interval is 1 (2^0) (i.e. - all allocations are sampled). + computational overhead. The default sample interval is 512 KiB (2^19 + B). @@ -1066,7 +1060,7 @@ malloc_conf = "xmalloc:true";]]> dumps enabled/disabled. If this option is enabled, every unique backtrace must be stored for the duration of execution. Depending on the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. This option is enabled + cumulative counts are not always of interest. This option is disabled by default. @@ -1107,6 +1101,23 @@ malloc_conf = "xmalloc:true";]]> option. This option is disabled by default. + + + opt.prof_final + (bool) + r- + [] + + Use an + atexit + 3 function to dump final memory + usage to a file named according to the pattern + <prefix>.<pid>.<seq>.f.heap, + where <prefix> is controlled by the opt.prof_prefix + option. This option is enabled by default. + + opt.prof_leak diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a37bb448..a4c563cc 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -9,7 +9,7 @@ typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_SAMPLE_DEFAULT 19 #define LG_PROF_INTERVAL_DEFAULT -1 /* @@ -169,6 +169,7 @@ extern bool opt_prof_active; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ extern char opt_prof_prefix[PATH_MAX + 1]; diff --git a/src/ctl.c b/src/ctl.c index a6a02cc5..98ea3d1c 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -80,6 +80,7 @@ CTL_PROTO(opt_prof_active) CTL_PROTO(opt_lg_prof_sample) CTL_PROTO(opt_lg_prof_interval) CTL_PROTO(opt_prof_gdump) +CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) CTL_PROTO(arenas_bin_i_size) @@ -210,6 +211,7 @@ static const ctl_node_t opt_node[] = { {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_final"), CTL(opt_prof_final)}, {NAME("prof_leak"), CTL(opt_prof_leak)}, {NAME("prof_accum"), CTL(opt_prof_accum)} }; @@ -1122,6 +1124,7 @@ CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index 0decd8a8..d4b681b4 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -561,6 +561,7 @@ malloc_conf_init(void) lg_prof_interval, -1, (sizeof(uint64_t) << 3) - 1) CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) + CONF_HANDLE_BOOL(opt_prof_final, prof_final) CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) } malloc_conf_error("Invalid conf pair", k, klen, v, diff --git a/src/prof.c b/src/prof.c index b509aaef..227560b8 100644 --- a/src/prof.c +++ b/src/prof.c @@ -21,8 +21,9 @@ bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; +bool opt_prof_final = true; bool opt_prof_leak = false; -bool opt_prof_accum = true; +bool opt_prof_accum = false; char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; @@ -944,7 +945,7 @@ prof_fdump(void) if (prof_booted == false) return; - if (opt_prof_prefix[0] != '\0') { + if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); malloc_mutex_unlock(&prof_dump_seq_mtx); diff --git a/src/stats.c b/src/stats.c index 4cad214f..08f7098c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -397,6 +397,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(prof_accum) OPT_WRITE_SSIZE_T(lg_prof_interval) OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_final) OPT_WRITE_BOOL(prof_leak) #undef OPT_WRITE_BOOL From 78f7352259768f670f8e1f9b000388dd32b62493 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Apr 2012 13:38:40 -0700 Subject: [PATCH 132/205] Clean up a few config-related conditionals/asserts. Clean up a few config-related conditionals to avoid unnecessary dependencies on prof symbols. Use cassert() rather than assert() everywhere that it's appropriate. --- src/arena.c | 10 ++++++---- src/quarantine.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/arena.c b/src/arena.c index 989034d4..0f15562a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1213,7 +1213,9 @@ void arena_prof_accum(arena_t *arena, uint64_t accumbytes) { - if (prof_interval != 0) { + cassert(config_prof); + + if (config_prof && prof_interval != 0) { arena->prof_accumbytes += accumbytes; if (arena->prof_accumbytes >= prof_interval) { prof_idump(); @@ -1490,8 +1492,8 @@ arena_salloc(const void *ptr, bool demote) } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = mapbits & ~PAGE_MASK; - if (demote && prof_promote && ret == PAGE && (mapbits & - CHUNK_MAP_CLASS_MASK) != 0) { + if (config_prof && demote && prof_promote && ret == PAGE && + (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; assert(binind < NBINS); @@ -1509,7 +1511,7 @@ arena_prof_promoted(const void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; - assert(config_prof); + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr, false) == PAGE); diff --git a/src/quarantine.c b/src/quarantine.c index 89a25c6a..5fb6c390 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -101,7 +101,7 @@ quarantine(void *ptr) quarantine_t *quarantine; size_t usize = isalloc(ptr, config_prof); - assert(config_fill); + cassert(config_fill); assert(opt_quarantine); quarantine = *quarantine_tsd_get(); @@ -154,7 +154,7 @@ bool quarantine_boot(void) { - assert(config_fill); + cassert(config_fill); if (quarantine_tsd_boot()) return (true); From 85221d5d75be26ce8941cc08a798e69ecdd0a57c Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:40 +0200 Subject: [PATCH 133/205] Make versioned shared library suffix configurable This allows for different patterns for file names: - lib.so.version for e.g. Linux - lib.version.dylib for OSX (which is much more common than lib.dylib.version) - lib.dll for Windows (no version at all). --- Makefile.in | 21 ++++++++++++++------- bin/jemalloc.sh.in | 2 +- configure.ac | 3 +++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0dfddea0..d426cbdb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -47,6 +47,7 @@ cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ +SOREV = @SOREV@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -72,9 +73,11 @@ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SO).$(REV) \ - $(objroot)lib/$(LIBJEMALLOC).$(SO) \ - $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +DSOS := $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) \ + $(objroot)lib/$(LIBJEMALLOC).$(SOREV) +ifneq ($(SOREV),$(SO)) +DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) +endif MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) @@ -127,11 +130,13 @@ $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -%.$(SO) : %.$(SO).$(REV) +ifneq ($(SOREV),$(SO)) +%.$(SO) : %.$(SOREV) @mkdir -p $(@D) ln -sf $( Date: Wed, 18 Apr 2012 18:29:41 +0200 Subject: [PATCH 134/205] Refactor object and library build, and only build PIC libraries when PIC_CFLAGS is defined --- Makefile.in | 75 +++++++++++++++++++++++++++------------------------- configure.ac | 2 ++ 2 files changed, 41 insertions(+), 36 deletions(-) diff --git a/Makefile.in b/Makefile.in index d426cbdb..8a34928b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -48,6 +48,7 @@ enable_autogen := @enable_autogen@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ +PIC_CFLAGS = @PIC_CFLAGS@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib @@ -73,8 +74,10 @@ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) -DSOS := $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) \ - $(objroot)lib/$(LIBJEMALLOC).$(SOREV) +ifdef PIC_CFLAGS +STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +endif +DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif @@ -91,12 +94,16 @@ ifeq ($(enable_experimental), 1) CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c endif +COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O)) +CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O)) +CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) + .PHONY: all dist doc_html doc_man doc .PHONY: install_bin install_include install_lib .PHONY: install_html install_man install_doc install .PHONY: tests check clean distclean relclean -.SECONDARY : $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O)) +.SECONDARY : $(CTESTOBJS) # Default target. all: build @@ -116,47 +123,41 @@ build_doc: $(DOCS) # # Include generated dependency files. # --include $(CSRCS:$(srcroot)%.c=$(objroot)%.d) --include $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.d) --include $(CTESTS:$(srcroot)%.c=$(objroot)%.d) +-include $(COBJS:%.$(O)=%.d) +-include $(CPICOBJS:%.$(O)=%.d) +-include $(CTESTOBJS:%.$(O)=%.d) -$(objroot)src/%.$(O): $(srcroot)src/%.c +$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c +$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c +$(CPICOBJS): CFLAGS += $(PIC_CFLAGS) +$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c +$(CTESTOBJS): CPPFLAGS += -I$(objroot)test + +$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< -$(objroot)src/%.pic.$(O): $(srcroot)src/%.c - @mkdir -p $(@D) - $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< - ifneq ($(SOREV),$(SO)) %.$(SO) : %.$(SOREV) @mkdir -p $(@D) ln -sf $( Date: Wed, 18 Apr 2012 18:29:42 +0200 Subject: [PATCH 135/205] Add an abstraction layer for threading in tests --- test/allocated.c | 30 +++++++++--------------------- test/jemalloc_test.h.in | 22 ++++++++++++++++++++++ test/thread_arena.c | 18 +++++------------- test/thread_tcache_enabled.c | 30 +++++++++--------------------- 4 files changed, 45 insertions(+), 55 deletions(-) diff --git a/test/allocated.c b/test/allocated.c index 81cd4ca9..00039ed8 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -11,7 +10,7 @@ #include "jemalloc_test.h" void * -thread_start(void *arg) +je_thread_start(void *arg) { int err; void *p; @@ -106,33 +105,22 @@ int main(void) { int ret = 0; - pthread_t thread; + je_thread_t thread; malloc_printf("Test begin\n"); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); -label_return: malloc_printf("Test end\n"); return (ret); } diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 58fa08e4..8833a03e 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -5,3 +5,25 @@ */ #include "jemalloc/jemalloc@install_suffix@.h" #include "jemalloc/internal/jemalloc_internal.h" + +/* Abstraction layer for threading in tests */ +#include + +typedef pthread_t je_thread_t; + +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + + if (pthread_create(thread, NULL, proc, arg) != 0) { + malloc_printf("Error in pthread_create()\n"); + exit(1); + } +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + + pthread_join(thread, ret); +} diff --git a/test/thread_arena.c b/test/thread_arena.c index e443b712..98354282 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -10,7 +9,7 @@ #define NTHREADS 10 void * -thread_start(void *arg) +je_thread_start(void *arg) { unsigned main_arena_ind = *(unsigned *)arg; void *p; @@ -52,7 +51,7 @@ main(void) unsigned arena_ind; size_t size; int err; - pthread_t threads[NTHREADS]; + je_thread_t threads[NTHREADS]; unsigned i; malloc_printf("Test begin\n"); @@ -72,18 +71,11 @@ main(void) goto label_return; } - for (i = 0; i < NTHREADS; i++) { - if (pthread_create(&threads[i], NULL, thread_start, - (void *)&arena_ind) != 0) { - malloc_printf("%s(): Error in pthread_create()\n", - __func__); - ret = 1; - goto label_return; - } - } + for (i = 0; i < NTHREADS; i++) + je_thread_create(&threads[i], je_thread_start, (void *)&arena_ind); for (i = 0; i < NTHREADS; i++) - pthread_join(threads[i], (void *)&ret); + je_thread_join(threads[i], (void *)&ret); label_return: malloc_printf("Test end\n"); diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 59b76a27..9f765841 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include @@ -9,7 +8,7 @@ #include "jemalloc_test.h" void * -thread_start(void *arg) +je_thread_start(void *arg) { int err; size_t sz; @@ -77,33 +76,22 @@ int main(void) { int ret = 0; - pthread_t thread; + je_thread_t thread; malloc_printf("Test begin\n"); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - malloc_printf("%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto label_return; - } - pthread_join(thread, (void *)&ret); + je_thread_create(&thread, je_thread_start, NULL); + je_thread_join(thread, (void *)&ret); - thread_start(NULL); + je_thread_start(NULL); -label_return: malloc_printf("Test end\n"); return (ret); } From 666c5bf7a8baaa842da69cb402948411432a9d00 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:43 +0200 Subject: [PATCH 136/205] Add a pages_purge function to wrap madvise(JEMALLOC_MADV_PURGE) calls This will be used to implement the feature on mingw, which doesn't have madvise. --- include/jemalloc/internal/chunk_mmap.h | 2 ++ include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/jemalloc_defs.h.in | 7 ------- src/arena.c | 4 ++-- src/chunk.c | 2 +- src/chunk_mmap.c | 14 ++++++++++++++ 6 files changed, 20 insertions(+), 10 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 04e86af9..2d01ac22 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,6 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +void pages_purge(void *addr, size_t length); + void *chunk_alloc_mmap(size_t size, size_t alignment); bool chunk_dealloc_mmap(void *chunk, size_t size); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a69482b6..a2171ed2 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -196,6 +196,7 @@ #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define p2rz JEMALLOC_N(p2rz) +#define pages_purge JEMALLOC_N(pages_purge) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 90baa355..6e816557 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -220,13 +220,6 @@ */ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE -#else -# error "No method defined for purging unused dirty pages." -#endif /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/src/arena.c b/src/arena.c index 0f15562a..ed47824a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -676,8 +676,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (config_debug) ndirty -= npages; - madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE), JEMALLOC_MADV_PURGE); + pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), + (npages << LG_PAGE)); if (config_stats) nmadvise++; } diff --git a/src/chunk.c b/src/chunk.c index 67e0d503..bcaedea4 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -171,7 +171,7 @@ chunk_record(void *chunk, size_t size) { extent_node_t *xnode, *node, *prev, key; - madvise(chunk, size, JEMALLOC_MADV_PURGE); + pages_purge(chunk, size); xnode = NULL; malloc_mutex_lock(&chunks_mtx); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index e11cc0e6..9dea8318 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -72,6 +72,20 @@ pages_unmap(void *addr, size_t size) } } +void +pages_purge(void *addr, size_t length) +{ + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +#else +# error "No method defined for purging unused dirty pages." +#endif + madvise(addr, length, JEMALLOC_MADV_PURGE); +} + static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { From 1ad56385adc40cfbca1b14c240a9c647135ac641 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:44 +0200 Subject: [PATCH 137/205] Fix malloc_vsnprintf handling of %o, %u and %x These flags take unsigned values, but they were fed with signed values taken with va_arg, and that led to sign extension in cases where the corresponding value has the most significant bit set. --- src/util.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/util.c b/src/util.c index 2aab61fe..99ae26dd 100644 --- a/src/util.c +++ b/src/util.c @@ -320,12 +320,21 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '?': \ val = va_arg(ap, int); \ break; \ + case '?' | 0x80: \ + val = va_arg(ap, unsigned int); \ + break; \ case 'l': \ val = va_arg(ap, long); \ break; \ + case 'l' | 0x80: \ + val = va_arg(ap, unsigned long); \ + break; \ case 'q': \ val = va_arg(ap, long long); \ break; \ + case 'q' | 0x80: \ + val = va_arg(ap, unsigned long long); \ + break; \ case 'j': \ val = va_arg(ap, intmax_t); \ break; \ @@ -335,6 +344,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case 'z': \ val = va_arg(ap, ssize_t); \ break; \ + case 'z' | 0x80: \ + val = va_arg(ap, size_t); \ + break; \ case 'p': /* Synthetic; used for %p. */ \ val = va_arg(ap, uintptr_t); \ break; \ @@ -358,7 +370,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) bool plus_plus = false; int prec = -1; int width = -1; - char len = '?'; + unsigned char len = '?'; f++; if (*f == '%') { @@ -480,7 +492,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[O2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = o2s(val, alt_form, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -489,7 +501,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[U2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = u2s(val, 10, false, buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; @@ -498,7 +510,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); char buf[X2S_BUFSIZE]; - GET_ARG_NUMERIC(val, len); + GET_ARG_NUMERIC(val, len | 0x80); s = x2s(val, alt_form, *f == 'X', buf, &slen); APPEND_PADDED_S(s, slen, width, left_justify); f++; From 86e58583bb443fcfe885a1a96b466ab5933cb443 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 18 Apr 2012 19:01:00 -0700 Subject: [PATCH 138/205] Make special FreeBSD function overrides visible. Make special FreeBSD libc/libthr function overrides for _malloc_prefork(), _malloc_postfork(), and _malloc_thread_cleanup() visible. --- include/jemalloc/internal/private_namespace.h | 3 +++ src/jemalloc.c | 2 ++ src/tsd.c | 1 + 3 files changed, 6 insertions(+) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a2171ed2..b1e8330e 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -1,3 +1,6 @@ +#define a0calloc JEMALLOC_N(a0calloc) +#define a0free JEMALLOC_N(a0free) +#define a0malloc JEMALLOC_N(a0malloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) #define arena_boot JEMALLOC_N(arena_boot) diff --git a/src/jemalloc.c b/src/jemalloc.c index d4b681b4..6669c110 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1621,6 +1621,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) void jemalloc_prefork(void) #else +JEMALLOC_ATTR(visibility("default")) void _malloc_prefork(void) #endif @@ -1642,6 +1643,7 @@ _malloc_prefork(void) void jemalloc_postfork_parent(void) #else +JEMALLOC_ATTR(visibility("default")) void _malloc_postfork(void) #endif diff --git a/src/tsd.c b/src/tsd.c index 0838dc86..f63493d3 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -32,6 +32,7 @@ malloc_tsd_no_cleanup(void *arg) } #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) { From 7ff1ce4131651fea1df7b1c010d71667bc574816 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:47 +0200 Subject: [PATCH 139/205] Initialize all members of non-TLS tsd wrapper when creating it Not setting the initialized member leads to randomly calling the cleanup function in cases it shouldn't be called (and isn't called in other implementations). --- include/jemalloc/internal/tsd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 35ae5e3c..3f953f9a 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -253,6 +253,7 @@ a_name##_tsd_get_wrapper(void) \ } else { \ static a_type tsd_static_data = a_initializer; \ wrapper->isstatic = false; \ + wrapper->initialized = false; \ wrapper->val = tsd_static_data; \ } \ if (pthread_setspecific(a_name##_tsd, \ From 8ad483fe60a803acdbd403d88bb30b548ee1b5f9 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:48 +0200 Subject: [PATCH 140/205] Remove initialization of the non-TLS tsd wrapper from static memory Using static memory when malloc_tsd_malloc fails means all threads share the same wrapper and thus the same wrapped value. This defeats the purpose of TSD. --- include/jemalloc/internal/tsd.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 3f953f9a..5888b377 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -192,7 +192,6 @@ a_name##_tsd_set(a_type *val) \ a_cleanup) \ /* Data structure. */ \ typedef struct { \ - bool isstatic; \ bool initialized; \ a_type val; \ } a_name##_tsd_wrapper_t; \ @@ -218,8 +217,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ return; \ } \ } \ - if (wrapper->isstatic == false) \ - malloc_tsd_dalloc(wrapper); \ + malloc_tsd_dalloc(wrapper); \ } \ a_attr bool \ a_name##_tsd_boot(void) \ @@ -242,17 +240,11 @@ a_name##_tsd_get_wrapper(void) \ wrapper = (a_name##_tsd_wrapper_t *) \ malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ if (wrapper == NULL) { \ - static a_name##_tsd_wrapper_t \ - a_name##_tsd_static_data = \ - {true, false, a_initializer}; \ malloc_write(": Error allocating" \ " TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - wrapper = &a_name##_tsd_static_data; \ + abort(); \ } else { \ static a_type tsd_static_data = a_initializer; \ - wrapper->isstatic = false; \ wrapper->initialized = false; \ wrapper->val = tsd_static_data; \ } \ @@ -260,8 +252,7 @@ a_name##_tsd_get_wrapper(void) \ (void *)wrapper)) { \ malloc_write(": Error setting" \ " TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ + abort(); \ } \ } \ return (wrapper); \ From 13067ec8350f213c3accc2e5fb70ca5a503e0e17 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 18 Apr 2012 18:29:49 +0200 Subject: [PATCH 141/205] Remove extra argument for malloc_tsd_cleanup_register Bookkeeping an extra argument that actually only stores a function pointer for a function we already have is not very useful. --- include/jemalloc/internal/tsd.h | 15 +++++---------- src/tsd.c | 7 +++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5888b377..5e904cbb 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -4,11 +4,7 @@ /* Maximum number of malloc_tsd users with cleanup functions. */ #define MALLOC_TSD_CLEANUPS_MAX 8 -typedef struct malloc_tsd_cleanup_s malloc_tsd_cleanup_t; -struct malloc_tsd_cleanup_s { - bool (*f)(void *); - void *arg; -}; +typedef bool (*malloc_tsd_cleanup_t)(void); /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There @@ -110,13 +106,12 @@ a_attr bool a_name##_booted = false; a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void *arg) \ +a_name##_tsd_cleanup_wrapper(void) \ { \ - bool (*cleanup)(void *) = arg; \ \ if (a_name##_initialized) { \ a_name##_initialized = false; \ - cleanup(&a_name##_tls); \ + a_cleanup(&a_name##_tls); \ } \ return (a_name##_initialized); \ } \ @@ -126,7 +121,7 @@ a_name##_tsd_boot(void) \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper, a_cleanup); \ + &a_name##_tsd_cleanup_wrapper); \ } \ a_name##_booted = true; \ return (false); \ @@ -290,7 +285,7 @@ a_name##_tsd_set(a_type *val) \ void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); void malloc_tsd_no_cleanup(void *); -void malloc_tsd_cleanup_register(bool (*f)(void *), void *arg); +void malloc_tsd_cleanup_register(bool (*f)(void)); void malloc_tsd_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/tsd.c b/src/tsd.c index f63493d3..281a2e9b 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -46,7 +46,7 @@ _malloc_thread_cleanup(void) again = false; for (i = 0; i < ncleanups; i++) { if (pending[i]) { - pending[i] = cleanups[i].f(cleanups[i].arg); + pending[i] = cleanups[i](); if (pending[i]) again = true; } @@ -56,12 +56,11 @@ _malloc_thread_cleanup(void) #endif void -malloc_tsd_cleanup_register(bool (*f)(void *), void *arg) +malloc_tsd_cleanup_register(bool (*f)(void)) { assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); - cleanups[ncleanups].f = f; - cleanups[ncleanups].arg = arg; + cleanups[ncleanups] = f; ncleanups++; } From f7088e6c992d079bc3162e0c48ed4dc5def6d263 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 19 Apr 2012 18:28:03 -0700 Subject: [PATCH 142/205] Make arena_salloc() an inline function. --- include/jemalloc/internal/arena.h | 42 ++++++++++++++++++- .../jemalloc/internal/jemalloc_internal.h.in | 5 +-- include/jemalloc/internal/tcache.h | 13 +++--- src/arena.c | 40 ------------------ src/tcache.c | 6 +++ 5 files changed, 56 insertions(+), 50 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 3790818c..2eb41cd9 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -408,7 +408,6 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); -size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); @@ -437,6 +436,7 @@ unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +size_t arena_salloc(const void *ptr, bool demote); void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif @@ -625,6 +625,46 @@ arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) } } +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_INLINE size_t +arena_salloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + ret = bin_info->reg_size; + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = mapbits & ~PAGE_MASK; + if (config_prof && demote && prof_promote && ret == PAGE && + (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < NBINS); + ret = arena_bin_info[binind].reg_size; + } + assert(ret != 0); + } + + return (ret); +} + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 905653a2..b61abe84 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -732,10 +732,9 @@ isalloc(const void *ptr, bool demote) assert(config_prof || demote == false); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ + if (chunk != ptr) ret = arena_salloc(ptr, demote); - } else + else ret = huge_salloc(ptr); return (ret); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 9d8c992d..cfb17c28 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -100,6 +100,9 @@ extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; +size_t tcache_salloc(const void *ptr); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, tcache_t *tcache); void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -107,8 +110,6 @@ void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, void tcache_arena_associate(tcache_t *tcache, arena_t *arena); void tcache_arena_dissociate(tcache_t *tcache); tcache_t *tcache_create(arena_t *arena); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); void tcache_destroy(tcache_t *tcache); void tcache_thread_cleanup(void *arg); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); @@ -340,7 +341,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret, false) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { if (config_fill) { @@ -431,7 +432,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr, false) <= SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -468,8 +469,8 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr, false) > SMALL_MAXCLASS); - assert(arena_salloc(ptr, false) <= tcache_maxclass); + assert(tcache_salloc(ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= tcache_maxclass); binind = NBINS + (size >> LG_PAGE) - 1; diff --git a/src/arena.c b/src/arena.c index ed47824a..6f28abe9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1465,46 +1465,6 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) return (ret); } -/* Return the size of the allocation pointed to by ptr. */ -size_t -arena_salloc(const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } - assert(ret != 0); - } - - return (ret); -} - void arena_prof_promoted(const void *ptr, size_t size) { diff --git a/src/tcache.c b/src/tcache.c index be26b59c..9c4970c5 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -18,6 +18,12 @@ size_t tcache_maxclass; /******************************************************************************/ +size_t tcache_salloc(const void *ptr) +{ + + return (arena_salloc(ptr, false)); +} + void * tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { From 7d20fbc44a28209e8b70e2d8efe12254962c6fd6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 13:06:39 -0700 Subject: [PATCH 143/205] Don't mangle pthread_create(). Don't mangle pthread_create(); it's an exported symbol when defined. --- include/jemalloc/internal/private_namespace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index b1e8330e..547ead99 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -221,7 +221,6 @@ #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) #define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) -#define pthread_create JEMALLOC_N(pthread_create) #define quarantine JEMALLOC_N(quarantine) #define quarantine_boot JEMALLOC_N(quarantine_boot) #define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) From 918d6e20b760da13776ca0faf8bc00b4647a482c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 13:42:21 -0700 Subject: [PATCH 144/205] Add missing private namespace mangling. --- ChangeLog | 1 + include/jemalloc/internal/private_namespace.h | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 1fed914f..6d5670f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -81,6 +81,7 @@ found in the git revision history: - Add missing "opt.lg_tcache_max" mallctl implementation. - Use glibc allocator hooks to make mixed allocator usage less likely. - Fix build issues for --disable-tcache. + - Don't mangle pthread_create() when --with-private-namespace is specified. * 2.2.5 (November 14, 2011) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 547ead99..15fe3c51 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -3,6 +3,7 @@ #define a0malloc JEMALLOC_N(a0malloc) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_bin_info JEMALLOC_N(arena_bin_info) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) @@ -11,6 +12,7 @@ #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) #define arena_postfork_child JEMALLOC_N(arena_postfork_child) @@ -27,9 +29,13 @@ #define arena_salloc JEMALLOC_N(arena_salloc) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas JEMALLOC_N(arenas) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_booted JEMALLOC_N(arenas_booted) #define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_initialized JEMALLOC_N(arenas_initialized) +#define arenas_lock JEMALLOC_N(arenas_lock) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) #define arenas_tls JEMALLOC_N(arenas_tls) #define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) @@ -78,6 +84,11 @@ #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) #define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) +#define chunk_npages JEMALLOC_N(chunk_npages) +#define chunks_mtx JEMALLOC_N(chunks_mtx) +#define chunks_rtree JEMALLOC_N(chunks_rtree) +#define chunksize JEMALLOC_N(chunksize) +#define chunksize_mask JEMALLOC_N(chunksize_mask) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) @@ -132,9 +143,13 @@ #define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define hash JEMALLOC_N(hash) +#define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_mtx JEMALLOC_N(huge_mtx) +#define huge_ndalloc JEMALLOC_N(huge_ndalloc) +#define huge_nmalloc JEMALLOC_N(huge_nmalloc) #define huge_palloc JEMALLOC_N(huge_palloc) #define huge_postfork_child JEMALLOC_N(huge_postfork_child) #define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) @@ -152,6 +167,7 @@ #define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) +#define isthreaded JEMALLOC_N(isthreaded) #define ivsalloc JEMALLOC_N(ivsalloc) #define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) #define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) @@ -174,12 +190,16 @@ #define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) #define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) +#define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) #define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) #define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) #define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) #define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) #define mutex_boot JEMALLOC_N(mutex_boot) +#define narenas JEMALLOC_N(narenas) +#define ncpus JEMALLOC_N(ncpus) +#define nhbins JEMALLOC_N(nhbins) #define opt_abort JEMALLOC_N(opt_abort) #define opt_junk JEMALLOC_N(opt_junk) #define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) @@ -191,11 +211,16 @@ #define opt_prof JEMALLOC_N(opt_prof) #define opt_prof_accum JEMALLOC_N(opt_prof_accum) #define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_final JEMALLOC_N(opt_prof_final) #define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) #define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_quarantine JEMALLOC_N(opt_quarantine) +#define opt_redzone JEMALLOC_N(opt_redzone) #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_tcache JEMALLOC_N(opt_tcache) #define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_valgrind JEMALLOC_N(opt_valgrind) #define opt_xmalloc JEMALLOC_N(opt_xmalloc) #define opt_zero JEMALLOC_N(opt_zero) #define p2rz JEMALLOC_N(p2rz) @@ -210,13 +235,20 @@ #define prof_free JEMALLOC_N(prof_free) #define prof_gdump JEMALLOC_N(prof_gdump) #define prof_idump JEMALLOC_N(prof_idump) +#define prof_interval JEMALLOC_N(prof_interval) #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_init JEMALLOC_N(prof_tdata_init) +#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) #define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) #define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) #define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) @@ -241,6 +273,7 @@ #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_chunks JEMALLOC_N(stats_chunks) #define stats_print JEMALLOC_N(stats_print) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) @@ -250,26 +283,39 @@ #define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_bin_info JEMALLOC_N(tcache_bin_info) #define tcache_boot0 JEMALLOC_N(tcache_boot0) #define tcache_boot1 JEMALLOC_N(tcache_boot1) +#define tcache_booted JEMALLOC_N(tcache_booted) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) #define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) #define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) #define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) #define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) #define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) +#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) +#define tcache_get JEMALLOC_N(tcache_get) +#define tcache_maxclass JEMALLOC_N(tcache_maxclass) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_salloc JEMALLOC_N(tcache_salloc) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tls JEMALLOC_N(tcache_tls) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) #define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) #define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) #define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) +#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) #define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) #define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) #define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) From bedceea2a8aef427d96a77762e9d4bda9f0cc0c3 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 14:12:30 -0700 Subject: [PATCH 145/205] Fix isthreaded-related build breakage. --- include/jemalloc/internal/mutex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index c46feee3..8837ef57 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -40,6 +40,7 @@ struct malloc_mutex_s { #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; #else +# undef isthreaded /* Undo private_namespace.h definition. */ # define isthreaded true #endif From 606f1fdc3cdbc700717133ca56685313caea24bb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 20 Apr 2012 21:39:14 -0700 Subject: [PATCH 146/205] Put CONF_HANDLE_*() keys in quotes. Put CONF_HANDLE_*() keys in quotes, so that they aren't mangled when --with-private-namespace is used. --- src/jemalloc.c | 57 +++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 6669c110..00c2b23c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -423,7 +423,7 @@ malloc_conf_init(void) while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, &vlen) == false) { #define CONF_HANDLE_BOOL_HIT(o, n, hit) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ if (strncmp("true", v, vlen) == 0 && \ vlen == sizeof("true")-1) \ @@ -446,7 +446,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SIZE_T(o, n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ uintmax_t um; \ char *end; \ @@ -467,7 +467,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ long l; \ char *end; \ @@ -489,7 +489,7 @@ malloc_conf_init(void) continue; \ } #define CONF_HANDLE_CHAR_P(o, n, d) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + if (sizeof(n)-1 == klen && strncmp(n, k, \ klen) == 0) { \ size_t cpylen = (vlen <= \ sizeof(o)-1) ? vlen : \ @@ -499,7 +499,7 @@ malloc_conf_init(void) continue; \ } - CONF_HANDLE_BOOL(opt_abort, abort) + CONF_HANDLE_BOOL(opt_abort, "abort") /* * Chunks always require at least one header page, plus * one data page in the absence of redzones, or three @@ -507,26 +507,27 @@ malloc_conf_init(void) * simplify options processing, fix the limit based on * config_fill. */ - CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE + + CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) - CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, + CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, + SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_stats_print, stats_print) + CONF_HANDLE_BOOL(opt_stats_print, "stats_print") if (config_fill) { - CONF_HANDLE_BOOL(opt_junk, junk) - CONF_HANDLE_SIZE_T(opt_quarantine, quarantine, + CONF_HANDLE_BOOL(opt_junk, "junk") + CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine", 0, SIZE_T_MAX) - CONF_HANDLE_BOOL(opt_redzone, redzone) - CONF_HANDLE_BOOL(opt_zero, zero) + CONF_HANDLE_BOOL(opt_redzone, "redzone") + CONF_HANDLE_BOOL(opt_zero, "zero") } if (config_utrace) { - CONF_HANDLE_BOOL(opt_utrace, utrace) + CONF_HANDLE_BOOL(opt_utrace, "utrace") } if (config_valgrind) { bool hit; CONF_HANDLE_BOOL_HIT(opt_valgrind, - valgrind, hit) + "valgrind", hit) if (config_fill && opt_valgrind && hit) { opt_junk = false; opt_zero = false; @@ -540,29 +541,29 @@ malloc_conf_init(void) continue; } if (config_xmalloc) { - CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) + CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } if (config_tcache) { - CONF_HANDLE_BOOL(opt_tcache, tcache) + CONF_HANDLE_BOOL(opt_tcache, "tcache") CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, - lg_tcache_max, -1, + "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } if (config_prof) { - CONF_HANDLE_BOOL(opt_prof, prof) - CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix, - "jeprof") - CONF_HANDLE_BOOL(opt_prof_active, prof_active) + CONF_HANDLE_BOOL(opt_prof, "prof") + CONF_HANDLE_CHAR_P(opt_prof_prefix, + "prof_prefix", "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, "prof_active") CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, - lg_prof_sample, 0, + "lg_prof_sample", 0, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_accum, prof_accum) + CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum") CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, - lg_prof_interval, -1, + "lg_prof_interval", -1, (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) - CONF_HANDLE_BOOL(opt_prof_final, prof_final) - CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) + CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump") + CONF_HANDLE_BOOL(opt_prof_final, "prof_final") + CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); From 8f0e0eb1c01d5d934586ea62e519ca8b8637aebc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 13:33:48 -0700 Subject: [PATCH 147/205] Fix a memory corruption bug in chunk_alloc_dss(). Fix a memory corruption bug in chunk_alloc_dss() that was due to claiming newly allocated memory is zeroed. Reverse order of preference between mmap() and sbrk() to prefer mmap(). Clean up management of 'zero' parameter in chunk_alloc*(). --- ChangeLog | 2 ++ doc/jemalloc.xml.in | 4 ++-- include/jemalloc/internal/chunk_mmap.h | 2 +- src/chunk.c | 10 +++++----- src/chunk_dss.c | 1 - src/chunk_mmap.c | 16 ++++++++++------ 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6d5670f5..b71fa165 100644 --- a/ChangeLog +++ b/ChangeLog @@ -70,6 +70,8 @@ found in the git revision history: invalid statistics and crashes. - Work around TLS dallocation via free() on Linux. This bug could cause write-after-free memory corruption. + - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could + cause memory corruption and crashes with --enable-dss specified. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix realloc(p, 0) to act like free(p). - Do not enforce minimum alignment in memalign(). diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index f78f423c..e8a57225 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -444,9 +444,9 @@ for (i = 0; i < nbins; i++) { suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory. If is specified during configuration, this - allocator uses both sbrk + allocator uses both mmap 2 and - mmap + sbrk 2, in that order of preference; otherwise only mmap 2 is used. diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 2d01ac22..8224430a 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -11,7 +11,7 @@ void pages_purge(void *addr, size_t length); -void *chunk_alloc_mmap(size_t size, size_t alignment); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); bool chunk_dealloc_mmap(void *chunk, size_t size); bool chunk_mmap_boot(void); diff --git a/src/chunk.c b/src/chunk.c index bcaedea4..31485058 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -125,16 +125,16 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) ret = chunk_recycle(size, alignment, zero); if (ret != NULL) goto label_return; + + ret = chunk_alloc_mmap(size, alignment, zero); + if (ret != NULL) + goto label_return; + if (config_dss) { ret = chunk_alloc_dss(size, alignment, zero); if (ret != NULL) goto label_return; } - ret = chunk_alloc_mmap(size, alignment); - if (ret != NULL) { - *zero = true; - goto label_return; - } /* All strategies for allocation failed. */ ret = NULL; diff --git a/src/chunk_dss.c b/src/chunk_dss.c index b05509a5..bd4a724b 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -89,7 +89,6 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) chunk_dealloc(cpad, cpad_size, true); - *zero = true; return (ret); } } while (dss_prev != (void *)-1); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9dea8318..126406ae 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -18,7 +18,7 @@ malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool unaligned); + bool unaligned, bool *zero); /******************************************************************************/ @@ -87,7 +87,7 @@ pages_purge(void *addr, size_t length) } static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) { void *ret, *pages; size_t alloc_size, leadsize, trailsize; @@ -122,11 +122,13 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) mmap_unaligned_tsd_set(&mu); } + assert(ret != NULL); + *zero = true; return (ret); } void * -chunk_alloc_mmap(size_t size, size_t alignment) +chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) { void *ret; @@ -177,8 +179,8 @@ chunk_alloc_mmap(size_t size, size_t alignment) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, alignment, - true); + return (chunk_alloc_mmap_slow(size, alignment, + true, zero)); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -187,8 +189,10 @@ chunk_alloc_mmap(size_t size, size_t alignment) } } } else - ret = chunk_alloc_mmap_slow(size, alignment, false); + return (chunk_alloc_mmap_slow(size, alignment, false, zero)); + assert(ret != NULL); + *zero = true; return (ret); } From 7ad54c1c30e0805e0758690115875f982de46cf2 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 16:04:51 -0700 Subject: [PATCH 148/205] Fix chunk allocation/deallocation bugs. Fix chunk_alloc_dss() to zero memory when requested. Fix chunk_dealloc() to avoid chunk_dealloc_mmap() for dss-allocated memory. Fix huge_palloc() to always junk fill when requested. Improve chunk_recycle() to report that memory is zeroed as a side effect of pages_purge(). --- include/jemalloc/internal/tsd.h | 2 +- src/chunk.c | 17 +++++++++++++---- src/chunk_dss.c | 4 ++++ src/huge.c | 10 ++++++++-- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 5e904cbb..20491c88 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -111,7 +111,7 @@ a_name##_tsd_cleanup_wrapper(void) \ \ if (a_name##_initialized) { \ a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ + a_cleanup(&a_name##_tls); \ } \ return (a_name##_initialized); \ } \ diff --git a/src/chunk.c b/src/chunk.c index 31485058..0fccd0ce 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -98,7 +98,10 @@ chunk_recycle(size_t size, size_t alignment, bool *zero) if (node != NULL) base_node_dealloc(node); -#ifdef JEMALLOC_PURGE_MADVISE_FREE +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* Pages are zeroed as a side effect of pages_purge(). */ + *zero = true; +#else if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); @@ -161,7 +164,13 @@ label_return: if (config_prof && opt_prof && opt_prof_gdump && gdump) prof_gdump(); } + if (config_debug && *zero && ret != NULL) { + size_t i; + size_t *p = (size_t *)(uintptr_t)ret; + for (i = 0; i < size / sizeof(size_t); i++) + assert(p[i] == 0); + } assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } @@ -258,9 +267,9 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } if (unmap) { - if (chunk_dealloc_mmap(chunk, size) == false) - return; - chunk_record(chunk, size); + if ((config_dss && chunk_in_dss(chunk)) || + chunk_dealloc_mmap(chunk, size)) + chunk_record(chunk, size); } } diff --git a/src/chunk_dss.c b/src/chunk_dss.c index bd4a724b..2d68e480 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -89,6 +89,10 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) chunk_dealloc(cpad, cpad_size, true); + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } return (ret); } } while (dss_prev != (void *)-1); diff --git a/src/huge.c b/src/huge.c index daf0c622..23eb074a 100644 --- a/src/huge.c +++ b/src/huge.c @@ -28,6 +28,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) void *ret; size_t csize; extent_node_t *node; + bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ @@ -42,7 +43,12 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, alignment, false, &zero); + /* + * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that + * it is possible to make correct junk/zero fill decisions below. + */ + is_zeroed = zero; + ret = chunk_alloc(csize, alignment, false, &is_zeroed); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -64,7 +70,7 @@ huge_palloc(size_t size, size_t alignment, bool zero) if (config_fill && zero == false) { if (opt_junk) memset(ret, 0xa5, csize); - else if (opt_zero) + else if (opt_zero && is_zeroed == false) memset(ret, 0, csize); } From a8f8d7540d66ddee7337db80c92890916e1063ca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 21 Apr 2012 19:17:21 -0700 Subject: [PATCH 149/205] Remove mmap_unaligned. Remove mmap_unaligned, which was used to heuristically decide whether to optimistically call mmap() in such a way that could reduce the total number of system calls. If I remember correctly, the intention of mmap_unaligned was to avoid always executing the slow path in the presence of ASLR. However, that reasoning seems to have been based on a flawed understanding of how ASLR actually works. Although ASLR apparently causes mmap() to ignore address requests, it does not cause total placement randomness, so there is a reasonable expectation that iterative mmap() calls will start returning chunk-aligned mappings once the first chunk has been properly aligned. --- include/jemalloc/internal/chunk.h | 3 +- include/jemalloc/internal/chunk_mmap.h | 2 - include/jemalloc/internal/private_namespace.h | 8 +- src/chunk.c | 12 +-- src/chunk_mmap.c | 98 +++++-------------- src/jemalloc.c | 7 +- 6 files changed, 29 insertions(+), 101 deletions(-) diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h index e047c2b1..8fb1fe6d 100644 --- a/include/jemalloc/internal/chunk.h +++ b/include/jemalloc/internal/chunk.h @@ -44,8 +44,7 @@ extern size_t arena_maxclass; /* Max size class for arenas. */ void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); -bool chunk_boot0(void); -bool chunk_boot1(void); +bool chunk_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 8224430a..b29f39e9 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -14,8 +14,6 @@ void pages_purge(void *addr, size_t length); void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); bool chunk_dealloc_mmap(void *chunk, size_t size); -bool chunk_mmap_boot(void); - #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 15fe3c51..bb1b63e9 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -74,8 +74,7 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_boot0 JEMALLOC_N(chunk_boot0) -#define chunk_boot1 JEMALLOC_N(chunk_boot1) +#define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) @@ -83,7 +82,6 @@ #define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) #define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) #define chunk_npages JEMALLOC_N(chunk_npages) #define chunks_mtx JEMALLOC_N(chunks_mtx) #define chunks_rtree JEMALLOC_N(chunks_rtree) @@ -192,10 +190,6 @@ #define malloc_write JEMALLOC_N(malloc_write) #define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) -#define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) -#define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) -#define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) -#define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) #define mutex_boot JEMALLOC_N(mutex_boot) #define narenas JEMALLOC_N(narenas) #define ncpus JEMALLOC_N(ncpus) diff --git a/src/chunk.c b/src/chunk.c index 0fccd0ce..5426b027 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -274,7 +274,7 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) } bool -chunk_boot0(void) +chunk_boot(void) { /* Set variables according to the value of opt_lg_chunk. */ @@ -301,13 +301,3 @@ chunk_boot0(void) return (false); } - -bool -chunk_boot1(void) -{ - - if (chunk_mmap_boot()) - return (true); - - return (false); -} diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 126406ae..9ff7480a 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -1,17 +1,6 @@ #define JEMALLOC_CHUNK_MMAP_C_ #include "jemalloc/internal/jemalloc_internal.h" -/******************************************************************************/ -/* Data. */ - -/* - * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. - */ -malloc_tsd_data(static, mmap_unaligned, bool, false) -malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, - malloc_tsd_no_cleanup) - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -112,16 +101,6 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) if (trailsize != 0) pages_unmap((void *)((uintptr_t)ret + size), trailsize); - /* - * If mmap() returned an aligned mapping, reset mmap_unaligned so that - * the next chunk_alloc_mmap() execution tries the fast allocation - * method. - */ - if (unaligned == false && mmap_unaligned_booted) { - bool mu = false; - mmap_unaligned_tsd_set(&mu); - } - assert(ret != NULL); *zero = true; return (ret); @@ -131,6 +110,7 @@ void * chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) { void *ret; + size_t offset; /* * Ideally, there would be a way to specify alignment to mmap() (like @@ -152,44 +132,34 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * * Another possible confounding factor is address space layout * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. mmap_unaligned tracks whether the previous - * chunk_alloc_mmap() execution received any unaligned or relocated - * mappings, and if so, the current execution will immediately fall - * back to the slow method. However, we keep track of whether the fast - * method would have succeeded, and if so, we make a note to try the - * fast method next time. + * requested address. As such, repeatedly trying to extend unaligned + * mappings could result in an infinite loop, so if extension fails, + * immediately fall back to the reliable method of over-allocation + * followed by trimming. */ - if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { - size_t offset; + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); - - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); - if (offset != 0) { - bool mu = true; - mmap_unaligned_tsd_set(&mu); - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset) == NULL) { - /* - * Extension failed. Clean up, then revert to - * the reliable-but-expensive method. - */ - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, - true, zero)); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - - offset)); - } + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); + if (offset != 0) { + /* Try to extend chunk boundary. */ + if (pages_map((void *)((uintptr_t)ret + size), chunksize - + offset) == NULL) { + /* + * Extension failed. Clean up, then fall back to the + * reliable-but-expensive method. + */ + pages_unmap(ret, size); + return (chunk_alloc_mmap_slow(size, alignment, true, + zero)); + } else { + /* Clean up unneeded leading space. */ + pages_unmap(ret, chunksize - offset); + ret = (void *)((uintptr_t)ret + (chunksize - offset)); } - } else - return (chunk_alloc_mmap_slow(size, alignment, false, zero)); + } assert(ret != NULL); *zero = true; @@ -205,21 +175,3 @@ chunk_dealloc_mmap(void *chunk, size_t size) return (config_munmap == false); } - -bool -chunk_mmap_boot(void) -{ - - /* - * XXX For the non-TLS implementation of tsd, the first access from - * each thread causes memory allocation. The result is a bootstrapping - * problem for this particular use case, so for now just disable it by - * leaving it in an unbooted state. - */ -#ifdef JEMALLOC_TLS - if (mmap_unaligned_tsd_boot()) - return (true); -#endif - - return (false); -} diff --git a/src/jemalloc.c b/src/jemalloc.c index 00c2b23c..f9c89168 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -634,7 +634,7 @@ malloc_init_hard(void) return (true); } - if (chunk_boot0()) { + if (chunk_boot()) { malloc_mutex_unlock(&init_lock); return (true); } @@ -711,11 +711,6 @@ malloc_init_hard(void) ncpus = malloc_ncpus(); malloc_mutex_lock(&init_lock); - if (chunk_boot1()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (mutex_boot()) { malloc_mutex_unlock(&init_lock); return (true); From a19e87fbad020e8dd3d26682032929e8e5ae71c1 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sat, 21 Apr 2012 21:27:46 -0700 Subject: [PATCH 150/205] Add support for Mingw --- Makefile.in | 10 +- configure.ac | 60 +++++++--- .../jemalloc/internal/jemalloc_internal.h.in | 17 +-- include/jemalloc/internal/mutex.h | 20 +++- include/jemalloc/internal/tsd.h | 101 ++++++++++++++++ src/chunk_mmap.c | 108 +++++++++++++----- src/jemalloc.c | 32 +++++- src/mutex.c | 16 ++- src/tsd.c | 27 ++++- src/util.c | 7 +- test/jemalloc_test.h.in | 24 ++++ test/rallocm.c | 6 + 12 files changed, 357 insertions(+), 71 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8a34928b..b9917da4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -32,7 +32,7 @@ SO := @so@ O := @o@ A := @a@ EXE := @exe@ -LIB := @lib@ +LIBPREFIX := @libprefix@ REV := @rev@ install_suffix := @install_suffix@ ABI := @abi@ @@ -51,12 +51,16 @@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ ifeq (macho, $(ABI)) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=$(objroot)lib +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" +else +ifeq (pecoff, $(ABI)) +TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib" else TEST_LIBRARY_PATH := endif +endif -LIBJEMALLOC := $(LIB)jemalloc$(install_suffix) +LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh diff --git a/configure.ac b/configure.ac index 0ed0494b..5bdddfab 100644 --- a/configure.ac +++ b/configure.ac @@ -198,7 +198,7 @@ so="so" o="o" a="a" exe= -lib="lib" +libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV='$(SO).$(REV)' @@ -273,6 +273,19 @@ case "${host}" in fi abi="xcoff" ;; + *-*-mingw*) + abi="pecoff" + force_tls="0" + RPATH="" + so="dll" + DSO_LDFLAGS="-shared" + o="obj" + a="lib" + libprefix="" + exe=".exe" + SOREV='$(SO)' + PIC_CFLAGS="" + ;; *) AC_MSG_RESULT([Unsupported operating system: ${host}]) abi="elf" @@ -285,7 +298,7 @@ AC_SUBST([so]) AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) -AC_SUBST([lib]) +AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) @@ -817,23 +830,36 @@ AC_SUBST([enable_xmalloc]) AC_CACHE_CHECK([STATIC_PAGE_SHIFT], [je_cv_static_page_shift], AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[#include +[[ +#ifdef _WIN32 +#include +#else #include #include +#endif +#include ]], [[ long result; FILE *f; +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwPageSize; +#else result = sysconf(_SC_PAGESIZE); +#endif if (result == -1) { return 1; } + result = ffsl(result) - 1; + f = fopen("conftest.out", "w"); if (f == NULL) { return 1; } - fprintf(f, "%u\n", ffs((int)result) - 1); + fprintf(f, "%u\n", result); fclose(f); return 0; @@ -871,12 +897,14 @@ AC_SUBST([jemalloc_version_gid]) dnl ============================================================================ dnl Configure pthreads. -AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) -dnl Some systems may embed pthreads functionality in libc; check for libpthread -dnl first, but try libc too before failing. -AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_SEARCH_LIBS([pthread_create], , , - AC_MSG_ERROR([libpthread is missing]))]) +if test "x$abi" != "xpecoff" ; then + AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) + dnl Some systems may embed pthreads functionality in libc; check for libpthread + dnl first, but try libc too before failing. + AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], + [AC_SEARCH_LIBS([pthread_create], , , + AC_MSG_ERROR([libpthread is missing]))]) +fi CPPFLAGS="$CPPFLAGS -D_REENTRANT" @@ -921,11 +949,13 @@ if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then enable_lazy_lock="1" fi if test "x$enable_lazy_lock" = "x1" ; then - AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_FUNC([dlsym], [], - [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) - ]) + if test "x$abi" != "xpecoff" ; then + AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) + AC_CHECK_FUNC([dlsym], [], + [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], + [AC_MSG_ERROR([libdl is missing])]) + ]) + fi AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) fi AC_SUBST([enable_lazy_lock]) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index b61abe84..fd5de725 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,21 +1,25 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include #include -#include -#if !defined(SYS_write) && defined(__NR_write) -#define SYS_write __NR_write +#ifdef _WIN32 +# include +#else +# include +# include +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include +# include #endif #include #include -#include #include #include #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include #include #include #include @@ -32,7 +36,6 @@ #include #include #include -#include #include #define JEMALLOC_NO_DEMANGLE diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 8837ef57..de44e143 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -3,10 +3,12 @@ typedef struct malloc_mutex_s malloc_mutex_t; -#ifdef JEMALLOC_OSSPIN -#define MALLOC_MUTEX_INITIALIZER {0} +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0} #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else # if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) @@ -23,7 +25,9 @@ typedef struct malloc_mutex_s malloc_mutex_t; #ifdef JEMALLOC_H_STRUCTS struct malloc_mutex_s { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) pthread_mutex_t lock; @@ -65,7 +69,9 @@ malloc_mutex_lock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + EnterCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else pthread_mutex_lock(&mutex->lock); @@ -78,7 +84,9 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + LeaveCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else pthread_mutex_unlock(&mutex->lock); diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index 20491c88..0037cf35 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -74,6 +74,10 @@ extern bool a_name##_booted; extern __thread a_type a_name##_tls; \ extern pthread_key_t a_name##_tsd; \ extern bool a_name##_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##_tsd; \ +extern bool a_name##_booted; #else #define malloc_tsd_externs(a_name, a_type) \ extern pthread_key_t a_name##_tsd; \ @@ -94,6 +98,10 @@ a_attr __thread a_type JEMALLOC_TLS_MODEL \ a_name##_tls = a_initializer; \ a_attr pthread_key_t a_name##_tsd; \ a_attr bool a_name##_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##_tsd; \ +a_attr bool a_name##_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr pthread_key_t a_name##_tsd; \ @@ -182,6 +190,99 @@ a_name##_tsd_set(a_type *val) \ } \ } \ } +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + a_type val = wrapper->val; \ + a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + a_cleanup(&val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + a_name##_tsd = TlsAlloc(); \ + if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + TlsGetValue(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9ff7480a..0ad65a10 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -7,7 +7,7 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, - bool unaligned, bool *zero); + bool *zero); /******************************************************************************/ @@ -16,6 +16,14 @@ pages_map(void *addr, size_t size) { void *ret; +#ifdef _WIN32 + /* + * If VirtualAlloc can't allocate at the given address when one is + * given, it fails and returns NULL. + */ + ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); +#else /* * We don't use MAP_FIXED here, because it can cause the *replacement* * of existing mappings, and we only want to create new mappings. @@ -41,7 +49,7 @@ pages_map(void *addr, size_t size) } ret = NULL; } - +#endif assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && ret == addr)); return (ret); @@ -51,55 +59,94 @@ static void pages_unmap(void *addr, size_t size) { - if (munmap(addr, size) == -1) { +#ifdef _WIN32 + if (VirtualFree(addr, 0, MEM_RELEASE) == 0) +#else + if (munmap(addr, size) == -1) +#endif + { char buf[BUFERROR_BUF]; buferror(errno, buf, sizeof(buf)); - malloc_printf(": Error in munmap(): %s\n", buf); + malloc_printf(": Error in " +#ifdef _WIN32 + "VirtualFree" +#else + "munmap" +#endif + "(): %s\n", buf); if (opt_abort) abort(); } } +static void * +pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) +{ + void *ret = (void *)((uintptr_t)addr + leadsize); + + assert(alloc_size >= leadsize + size); +#ifdef _WIN32 + { + void *new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) + return (ret); + if (new_addr) + pages_unmap(new_addr, size); + return (NULL); + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) + pages_unmap(addr, leadsize); + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + return (ret); + } +#endif +} + void pages_purge(void *addr, size_t length) { -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED -# define JEMALLOC_MADV_PURGE MADV_DONTNEED -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) -# define JEMALLOC_MADV_PURGE MADV_FREE +#ifdef _WIN32 + VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); #else -# error "No method defined for purging unused dirty pages." -#endif +# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +# elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +# else +# error "No method defined for purging unused dirty pages." +# endif madvise(addr, length, JEMALLOC_MADV_PURGE); +#endif } static void * -chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned, bool *zero) +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero) { void *ret, *pages; - size_t alloc_size, leadsize, trailsize; + size_t alloc_size, leadsize; alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ if (alloc_size < size) return (NULL); - pages = pages_map(NULL, alloc_size); - if (pages == NULL) - return (NULL); - leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - - (uintptr_t)pages; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; - ret = (void *)((uintptr_t)pages + leadsize); - if (leadsize != 0) { - /* Note that mmap() returned an unaligned mapping. */ - unaligned = true; - pages_unmap(pages, leadsize); - } - if (trailsize != 0) - pages_unmap((void *)((uintptr_t)ret + size), trailsize); + do { + pages = pages_map(NULL, alloc_size); + if (pages == NULL) + return (NULL); + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + ret = pages_trim(pages, alloc_size, leadsize, size); + } while (ret == NULL); assert(ret != NULL); *zero = true; @@ -144,6 +191,9 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { +#ifdef _WIN32 + return (chunk_alloc_mmap_slow(size, alignment, zero)); +#else /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset) == NULL) { @@ -152,13 +202,13 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * reliable-but-expensive method. */ pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, true, - zero)); + return (chunk_alloc_mmap_slow(size, alignment, zero)); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); ret = (void *)((uintptr_t)ret + (chunksize - offset)); } +#endif } assert(ret != NULL); diff --git a/src/jemalloc.c b/src/jemalloc.c index f9c89168..67ac90b2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -52,7 +52,19 @@ static bool malloc_initializer = NO_INITIALIZER; #endif /* Used to avoid initialization races. */ +#ifdef _WIN32 +static malloc_mutex_t init_lock; + +JEMALLOC_ATTR(constructor) +static void +init_init_lock() +{ + + malloc_mutex_init(&init_lock); +} +#else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; +#endif typedef struct { void *p; /* Input pointer (as in realloc(p, s)). */ @@ -229,11 +241,17 @@ malloc_ncpus(void) unsigned ret; long result; +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + result = si.dwNumberOfProcessors; +#else result = sysconf(_SC_NPROCESSORS_ONLN); if (result == -1) { /* Error. */ ret = 1; } +#endif ret = (unsigned)result; return (ret); @@ -369,13 +387,14 @@ malloc_conf_init(void) } break; case 1: { +#ifndef _WIN32 int linklen; const char *linkname = -#ifdef JEMALLOC_PREFIX +# ifdef JEMALLOC_PREFIX "/etc/"JEMALLOC_PREFIX"malloc.conf" -#else +# else "/etc/malloc.conf" -#endif +# endif ; if ((linklen = readlink(linkname, buf, @@ -386,7 +405,9 @@ malloc_conf_init(void) */ buf[linklen] = '\0'; opts = buf; - } else { + } else +#endif + { /* No configuration specified. */ buf[0] = '\0'; opts = buf; @@ -610,7 +631,8 @@ malloc_init_hard(void) malloc_conf_init(); -#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ + && !defined(_WIN32)) /* Register fork handlers. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { diff --git a/src/mutex.c b/src/mutex.c index 4b8ce570..159d82a3 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -1,10 +1,14 @@ #define JEMALLOC_MUTEX_C_ #include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) #include #endif +#ifndef _CRT_SPINCOUNT +#define _CRT_SPINCOUNT 4000 +#endif + /******************************************************************************/ /* Data. */ @@ -16,7 +20,7 @@ static bool postpone_init = true; static malloc_mutex_t *postponed_mutexes = NULL; #endif -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static void pthread_create_once(void); #endif @@ -26,7 +30,7 @@ static void pthread_create_once(void); * process goes multi-threaded. */ -#ifdef JEMALLOC_LAZY_LOCK +#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32) static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); @@ -68,7 +72,11 @@ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, bool malloc_mutex_init(malloc_mutex_t *mutex) { -#ifdef JEMALLOC_OSSPIN +#ifdef _WIN32 + if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, + _CRT_SPINCOUNT)) + return (true); +#elif (defined(JEMALLOC_OSSPIN)) mutex->lock = 0; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) if (postpone_init) { diff --git a/src/tsd.c b/src/tsd.c index 281a2e9b..09f06e88 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -31,7 +31,7 @@ malloc_tsd_no_cleanup(void *arg) not_reached(); } -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) @@ -70,3 +70,28 @@ malloc_tsd_boot(void) ncleanups = 0; } + +#ifdef _WIN32 +static BOOL WINAPI +_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + + switch (fdwReason) { +#ifdef JEMALLOC_LAZY_LOCK + case DLL_THREAD_ATTACH: + isthreaded = true; + break; +#endif + case DLL_THREAD_DETACH: + _malloc_thread_cleanup(); + break; + default: + break; + } + return (true); +} + +JEMALLOC_ATTR(section(".CRT$XLY")) JEMALLOC_ATTR(used) +static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, + DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; +#endif diff --git a/src/util.c b/src/util.c index 99ae26dd..ee9efdfa 100644 --- a/src/util.c +++ b/src/util.c @@ -67,7 +67,12 @@ void (*je_malloc_message)(void *, const char *s) int buferror(int errnum, char *buf, size_t buflen) { -#ifdef _GNU_SOURCE + +#ifdef _WIN32 + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0, + (LPSTR)buf, buflen, NULL); + return (0); +#elif defined(_GNU_SOURCE) char *b = strerror_r(errno, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in index 8833a03e..e38b48ef 100644 --- a/test/jemalloc_test.h.in +++ b/test/jemalloc_test.h.in @@ -7,6 +7,29 @@ #include "jemalloc/internal/jemalloc_internal.h" /* Abstraction layer for threading in tests */ +#ifdef _WIN32 +#include + +typedef HANDLE je_thread_t; + +void +je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg) +{ + LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc; + *thread = CreateThread(NULL, 0, routine, arg, 0, NULL); + if (*thread == NULL) { + malloc_printf("Error in CreateThread()\n"); + exit(1); + } +} + +void +je_thread_join(je_thread_t thread, void **ret) +{ + WaitForSingleObject(thread, INFINITE); +} + +#else #include typedef pthread_t je_thread_t; @@ -27,3 +50,4 @@ je_thread_join(je_thread_t thread, void **ret) pthread_join(thread, ret); } +#endif diff --git a/test/rallocm.c b/test/rallocm.c index 18db5eec..f2a47708 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -19,9 +19,15 @@ main(void) /* Get page size. */ { +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + pagesize = (size_t)si.dwPageSize; +#else long result = sysconf(_SC_PAGESIZE); assert(result != -1); pagesize = (size_t)result; +#endif } r = allocm(&p, &sz, 42, 0); From 08e2221e99ef96277331f43e87d3f3ff770d27a7 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:38 +0200 Subject: [PATCH 151/205] Remove leftovers from the vsnprintf check in malloc_vsnprintf Commit 4eeb52f removed vsnprintf validation, but left a now unused va_copy. It so happens that MSVC doesn't support va_copy. --- src/util.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/util.c b/src/util.c index ee9efdfa..2042329c 100644 --- a/src/util.c +++ b/src/util.c @@ -288,7 +288,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) int ret; size_t i; const char *f; - va_list tap; #define APPEND_C(c) do { \ if (i < size) \ @@ -359,9 +358,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) } \ } while (0) - if (config_debug) - va_copy(tap, ap); - i = 0; f = format; while (true) { From 14103d3598e7b828e79a81f2978dc08348677b02 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:39 +0200 Subject: [PATCH 152/205] Fix intmax_t configure error message --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5bdddfab..396b5ef3 100644 --- a/configure.ac +++ b/configure.ac @@ -164,7 +164,7 @@ elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then LG_SIZEOF_INTMAX_T=2 else - AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_long}]) + AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}]) fi AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T]) From 834f8770ee780ef439d7ee639caa668a5e23cf76 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:40 +0200 Subject: [PATCH 153/205] Remove #includes in tests Since we're now including jemalloc_internal.h, all the required headers are already pulled. This will avoid having to fiddle with headers that can or can't be used with MSVC. Also, now that we use malloc_printf, we can use util.h's definition of assert instead of assert.h's. --- test/aligned_alloc.c | 7 ------- test/allocated.c | 8 -------- test/allocm.c | 4 ---- test/bitmap.c | 6 ------ test/mremap.c | 6 ------ test/posix_memalign.c | 7 ------- test/rallocm.c | 6 ------ test/thread_arena.c | 5 ----- test/thread_tcache_enabled.c | 6 ------ 9 files changed, 55 deletions(-) diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 81b8f933..81caa0ad 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -1,10 +1,3 @@ -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/allocated.c b/test/allocated.c index 00039ed8..9884905d 100644 --- a/test/allocated.c +++ b/test/allocated.c @@ -1,11 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/allocm.c b/test/allocm.c index c6bc6f83..80be673b 100644 --- a/test/allocm.c +++ b/test/allocm.c @@ -1,7 +1,3 @@ -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/bitmap.c b/test/bitmap.c index ee9b1ecc..ff50ecb3 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -1,12 +1,6 @@ #define JEMALLOC_MANGLE #include "jemalloc_test.h" -/* - * Avoid using the assert() from jemalloc_internal.h, since it requires - * internal libjemalloc functionality. - * */ -#include - #if (LG_BITMAP_MAXBITS > 12) # define MAXBITS 4500 #else diff --git a/test/mremap.c b/test/mremap.c index 84c03491..47efa7c4 100644 --- a/test/mremap.c +++ b/test/mremap.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/posix_memalign.c b/test/posix_memalign.c index e1302df2..2185bcf7 100644 --- a/test/posix_memalign.c +++ b/test/posix_memalign.c @@ -1,10 +1,3 @@ -#include -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/rallocm.c b/test/rallocm.c index f2a47708..c5dedf48 100644 --- a/test/rallocm.c +++ b/test/rallocm.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/thread_arena.c b/test/thread_arena.c index 98354282..2020d994 100644 --- a/test/thread_arena.c +++ b/test/thread_arena.c @@ -1,8 +1,3 @@ -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" diff --git a/test/thread_tcache_enabled.c b/test/thread_tcache_enabled.c index 9f765841..2061b7bb 100644 --- a/test/thread_tcache_enabled.c +++ b/test/thread_tcache_enabled.c @@ -1,9 +1,3 @@ -#include -#include -#include -#include -#include - #define JEMALLOC_MANGLE #include "jemalloc_test.h" From a5288ca93434d98f91438de40d99177ffdfd2a17 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:41 +0200 Subject: [PATCH 154/205] Remove unused #includes --- include/jemalloc/internal/jemalloc_internal.h.in | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index fd5de725..691f50a9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -12,7 +12,6 @@ # include # include #endif -#include #include #include @@ -20,7 +19,6 @@ #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include #include #include #include From 52386b2dc689db3bf71307424c4e1a2b7044c363 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 22 Apr 2012 16:00:11 -0700 Subject: [PATCH 155/205] Fix heap profiling bugs. Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. Fix an off-by-one heap profile statistics bug that could be observed in interval- and growth-triggered heap profiles. Fix heap profile dump filename sequence numbers (regression during conversion to malloc_snprintf()). --- ChangeLog | 8 +- include/jemalloc/internal/private_namespace.h | 1 + include/jemalloc/internal/prof.h | 43 ++++- src/prof.c | 164 ++++++++++-------- 4 files changed, 129 insertions(+), 87 deletions(-) diff --git a/ChangeLog b/ChangeLog index b71fa165..42fc5197 100644 --- a/ChangeLog +++ b/ChangeLog @@ -64,18 +64,22 @@ found in the git revision history: - Remove the --enable-sysv configure option. Bug fixes: - - Fix fork-related bugs that could cause deadlock in children between fork - and exec. - Fix a statistics-related bug in the "thread.arena" mallctl that could cause invalid statistics and crashes. - Work around TLS dallocation via free() on Linux. This bug could cause write-after-free memory corruption. + - Fix a potential deadlock that could occur during interval- and + growth-triggered heap profile dumps. - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could cause memory corruption and crashes with --enable-dss specified. + - Fix fork-related bugs that could cause deadlock in children between fork + and exec. - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter. - Fix realloc(p, 0) to act like free(p). - Do not enforce minimum alignment in memalign(). - Check for NULL pointer in malloc_usable_size(). + - Fix an off-by-one heap profile statistics bug that could be observed in + interval- and growth-triggered heap profiles. - Fix bin->runcur management to fix a layout policy bug. This bug did not affect correctness. - Fix a bug in choose_arena_hard() that potentially caused more arenas to be diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index bb1b63e9..c467153a 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -240,6 +240,7 @@ #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) #define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) #define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_get JEMALLOC_N(prof_tdata_get) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) #define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) #define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index a4c563cc..093ac93c 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -113,9 +113,19 @@ struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; - /* Protects cnt_merged and cnts_ql. */ + /* Protects nlimbo, cnt_merged, and cnts_ql. */ malloc_mutex_t *lock; + /* + * Number of threads that currently cause this ctx to be in a state of + * limbo due to one of: + * - Initializing per thread counters associated with this ctx. + * - Preparing to destroy this ctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * ctx. + */ + unsigned nlimbo; + /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -152,6 +162,11 @@ struct prof_tdata_s { uint64_t prng_state; uint64_t threshold; uint64_t accum; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -211,14 +226,9 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = *prof_tdata_tsd_get(); \ - if (prof_tdata == NULL) { \ - prof_tdata = prof_tdata_init(); \ - if (prof_tdata == NULL) { \ - ret = NULL; \ - break; \ - } \ - } \ + prof_tdata = prof_tdata_get(); \ + if (prof_tdata == NULL) \ + break; \ \ if (opt_prof_active == false) { \ /* Sampling is currently inactive, so avoid sampling. */\ @@ -260,6 +270,7 @@ bool prof_boot2(void); #ifndef JEMALLOC_ENABLE_INLINE malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) +prof_tdata_t *prof_tdata_get(void); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -276,6 +287,20 @@ malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, prof_tdata_cleanup) +JEMALLOC_INLINE prof_tdata_t * +prof_tdata_get(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + + return (prof_tdata); +} + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { diff --git a/src/prof.c b/src/prof.c index 227560b8..187bda7d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -64,11 +64,6 @@ static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; -static malloc_mutex_t enq_mtx; -static bool enq; -static bool enq_idump; -static bool enq_gdump; - /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -148,20 +143,19 @@ bt_dup(prof_bt_t *bt) } static inline void -prof_enter(void) +prof_enter(prof_tdata_t *prof_tdata) { cassert(config_prof); - malloc_mutex_lock(&enq_mtx); - enq = true; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq == false); + prof_tdata->enq = true; malloc_mutex_lock(&bt2ctx_mtx); } static inline void -prof_leave(void) +prof_leave(prof_tdata_t *prof_tdata) { bool idump, gdump; @@ -169,13 +163,12 @@ prof_leave(void) malloc_mutex_unlock(&bt2ctx_mtx); - malloc_mutex_lock(&enq_mtx); - enq = false; - idump = enq_idump; - enq_idump = false; - gdump = enq_gdump; - enq_gdump = false; - malloc_mutex_unlock(&enq_mtx); + assert(prof_tdata->enq); + prof_tdata->enq = false; + idump = prof_tdata->enq_idump; + prof_tdata->enq_idump = false; + gdump = prof_tdata->enq_gdump; + prof_tdata->enq_gdump = false; if (idump) prof_idump(); @@ -446,12 +439,9 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } + prof_tdata = prof_tdata_get(); + if (prof_tdata == NULL) + return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { union { @@ -468,52 +458,48 @@ prof_lookup(prof_bt_t *bt) * This thread's cache lacks bt. Look for it in the global * cache. */ - prof_enter(); + prof_enter(prof_tdata); if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ ctx.v = imalloc(sizeof(prof_ctx_t)); if (ctx.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); return (NULL); } btkey.p = bt_dup(bt); if (btkey.v == NULL) { - prof_leave(); + prof_leave(prof_tdata); idalloc(ctx.v); return (NULL); } ctx.p->bt = btkey.p; ctx.p->lock = prof_ctx_mutex_choose(); + /* + * Set nlimbo to 1, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->nlimbo = 1; memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ - prof_leave(); + prof_leave(prof_tdata); idalloc(btkey.v); idalloc(ctx.v); return (NULL); } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - * - * No locking is necessary for ctx here because no other - * threads have had the opportunity to fetch it from - * bt2ctx yet. - */ - ctx.p->cnt_merged.curobjs++; new_ctx = true; } else { /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). + * Increment nlimbo, in order to avoid a race condition + * with prof_ctx_merge()/prof_ctx_destroy(). */ malloc_mutex_lock(ctx.p->lock); - ctx.p->cnt_merged.curobjs++; + ctx.p->nlimbo++; malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } - prof_leave(); + prof_leave(prof_tdata); /* Link a prof_thd_cnt_t into ctx for this thread. */ if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { @@ -555,7 +541,7 @@ prof_lookup(prof_bt_t *bt) ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - ctx.p->cnt_merged.curobjs--; + ctx.p->nlimbo--; malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ @@ -688,26 +674,30 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) static void prof_ctx_destroy(prof_ctx_t *ctx) { + prof_tdata_t *prof_tdata; cassert(config_prof); /* * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in - * order to avoid a race condition with this function, as does - * prof_ctx_merge() in order to avoid a race between the main body of - * prof_ctx_merge() and entry into this function. + * it. prof_lookup() increments ctx->nlimbo in order to avoid a race + * condition with this function, as does prof_ctx_merge() in order to + * avoid a race between the main body of prof_ctx_merge() and entry + * into this function. */ - prof_enter(); + prof_tdata = *prof_tdata_tsd_get(); + assert(prof_tdata != NULL); + prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && + ctx->nlimbo == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) assert(false); - prof_leave(); + prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); @@ -717,9 +707,9 @@ prof_ctx_destroy(prof_ctx_t *ctx) * Compensate for increment in prof_ctx_merge() or * prof_lookup(). */ - ctx->cnt_merged.curobjs--; + ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); - prof_leave(); + prof_leave(prof_tdata); } } @@ -738,12 +728,12 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, cnts_link); if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0) { + ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { /* - * Artificially raise ctx->cnt_merged.curobjs in order to keep - * another thread from winning the race to destroy ctx while - * this one has ctx->lock dropped. Without this, it would be - * possible for another thread to: + * Increment ctx->nlimbo in order to keep another thread from + * winning the race to destroy ctx while this one has ctx->lock + * dropped. Without this, it would be possible for another + * thread to: * * 1) Sample an allocation associated with ctx. * 2) Deallocate the sampled object. @@ -752,7 +742,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) * The result would be that ctx no longer exists by the time * this thread accesses it in prof_ctx_destroy(). */ - ctx->cnt_merged.curobjs++; + ctx->nlimbo++; destroy = true; } else destroy = false; @@ -768,7 +758,16 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) cassert(config_prof); - if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + /* + * Current statistics can sum to 0 as a result of unmerged per thread + * statistics. Additionally, interval- and growth-triggered dumps can + * occur between the time a ctx is created and when its statistics are + * filled in. Avoid dumping any ctx that is an artifact of either + * implementation detail. + */ + if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || + (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { + assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); @@ -831,6 +830,7 @@ prof_dump_maps(bool propagate_err) static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { + prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; union { @@ -845,7 +845,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); - prof_enter(); + prof_tdata = prof_tdata_get(); + if (prof_tdata == NULL) + return (true); + prof_enter(prof_tdata); prof_dump_fd = creat(filename, 0644); if (prof_dump_fd == -1) { if (propagate_err == false) { @@ -896,7 +899,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) if (prof_flush(propagate_err)) goto label_error; close(prof_dump_fd); - prof_leave(); + prof_leave(prof_tdata); if (leakcheck && cnt_all.curbytes != 0) { malloc_printf(": Leak summary: %"PRId64" byte%s, %" @@ -911,7 +914,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) return (false); label_error: - prof_leave(); + prof_leave(prof_tdata); return (true); } @@ -933,6 +936,7 @@ prof_dump_filename(char *filename, char v, int64_t vseq) "%s.%d.%"PRIu64".%c.heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } + prof_dump_seq++; } static void @@ -956,19 +960,24 @@ prof_fdump(void) void prof_idump(void) { + prof_tdata_t *prof_tdata; char filename[PATH_MAX + 1]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_idump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + return; + if (prof_tdata->enq) { + prof_tdata->enq_idump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1005,19 +1014,24 @@ prof_mdump(const char *filename) void prof_gdump(void) { + prof_tdata_t *prof_tdata; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_gdump = true; - malloc_mutex_unlock(&enq_mtx); + /* + * Don't call prof_tdata_get() here, because it could cause recursive + * allocation. + */ + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) + return; + if (prof_tdata->enq) { + prof_tdata->enq_gdump = true; return; } - malloc_mutex_unlock(&enq_mtx); if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); @@ -1110,6 +1124,10 @@ prof_tdata_init(void) prof_tdata->threshold = 0; prof_tdata->accum = 0; + prof_tdata->enq = false; + prof_tdata->enq_idump = false; + prof_tdata->enq_gdump = false; + prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); @@ -1206,12 +1224,6 @@ prof_boot2(void) if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); - if (malloc_mutex_init(&enq_mtx)) - return (true); - enq = false; - enq_idump = false; - enq_gdump = false; - if (atexit(prof_fdump) != 0) { malloc_write(": Error in atexit()\n"); if (opt_abort) From 40f514fd92d50320075bf9fd8748edb71092a1d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 22 Apr 2012 16:21:06 -0700 Subject: [PATCH 156/205] Document MinGW support. --- ChangeLog | 1 + README | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 42fc5197..97a90468 100644 --- a/ChangeLog +++ b/ChangeLog @@ -22,6 +22,7 @@ found in the git revision history: - Add support for additional operating systems: + FreeBSD + Mac OS X Lion + + MinGW - Add support for additional architectures: + MIPS + SH4 diff --git a/README b/README index a7864f33..411d52d4 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a stand-alone "portable" implementation that currently -targets FreeBSD, Linux and Apple OS X. jemalloc is included as the default -allocator in the FreeBSD and NetBSD operating systems, and it is used by the -Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending -on your needs, one of the other divergent versions may suit your needs better -than this distribution. +This distribution is a "portable" implementation that currently +targets FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the +default allocator in the FreeBSD and NetBSD operating systems, and it is used +by the Mozilla Firefox web browser on Microsoft Windows-related platforms. +Depending on your needs, one of the other divergent versions may suit your +needs better than this distribution. The COPYING file contains copyright and licensing information. From 461ad5c87ae5f89cd086e47b31372e9123dcfcdf Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Fri, 20 Apr 2012 08:38:42 +0200 Subject: [PATCH 157/205] Avoid using a union for ctl_node_s MSVC doesn't support C99, and as such doesn't support designated initialization of structs and unions. As there is never a mix of indexed and named nodes, it is pretty straightforward to use a different type for each. --- include/jemalloc/internal/ctl.h | 27 ++--- src/ctl.c | 168 +++++++++++++++++++------------- 2 files changed, 114 insertions(+), 81 deletions(-) diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index a48d09fe..c06b9af0 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -2,6 +2,8 @@ #ifdef JEMALLOC_H_TYPES typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; typedef struct ctl_arena_stats_s ctl_arena_stats_t; typedef struct ctl_stats_s ctl_stats_t; @@ -11,22 +13,23 @@ typedef struct ctl_stats_s ctl_stats_t; struct ctl_node_s { bool named; - union { - struct { - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - } named; - struct { - const ctl_node_t *(*index)(const size_t *, size_t, - size_t); - } indexed; - } u; +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; int (*ctl)(const size_t *, size_t, void *, size_t *, void *, size_t); }; +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); +}; + struct ctl_arena_stats_s { bool initialized; unsigned nthreads; diff --git a/src/ctl.c b/src/ctl.c index 98ea3d1c..52c85945 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -14,6 +14,32 @@ static bool ctl_initialized; static uint64_t ctl_epoch; static ctl_stats_t ctl_stats; +/******************************************************************************/ +/* Helpers for named and indexed nodes. */ + +static inline const ctl_named_node_t * +ctl_named_node(const ctl_node_t *node) +{ + + return ((node->named) ? (const ctl_named_node_t *)node : NULL); +} + +static inline const ctl_named_node_t * +ctl_named_children(const ctl_named_node_t *node, int index) +{ + const ctl_named_node_t *children = ctl_named_node(node->children); + + return (children ? &children[index] : NULL); +} + +static inline const ctl_indexed_node_t * +ctl_indexed_node(const ctl_node_t *node) +{ + + return ((node->named == false) ? (const ctl_indexed_node_t *)node : + NULL); +} + /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -22,7 +48,7 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ +const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); @@ -149,22 +175,23 @@ CTL_PROTO(stats_mapped) /* Maximum tree depth. */ #define CTL_MAX_DEPTH 6 -#define NAME(n) true, {.named = {n -#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL -#define CTL(c) 0, NULL}}, c##_ctl +#define NAME(n) {true}, n +#define CHILD(c) \ + sizeof(c##_node) / sizeof(ctl_node_t), (ctl_node_t *)c##_node, NULL +#define CTL(c) 0, NULL, c##_ctl /* * Only handles internal indexed nodes, since there are currently no external * ones. */ -#define INDEX(i) false, {.indexed = {i##_index}}, NULL +#define INDEX(i) {false}, i##_index -static const ctl_node_t tcache_node[] = { +static const ctl_named_node_t tcache_node[] = { {NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("flush"), CTL(thread_tcache_flush)} }; -static const ctl_node_t thread_node[] = { +static const ctl_named_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)}, {NAME("allocated"), CTL(thread_allocated)}, {NAME("allocatedp"), CTL(thread_allocatedp)}, @@ -173,7 +200,7 @@ static const ctl_node_t thread_node[] = { {NAME("tcache"), CHILD(tcache)} }; -static const ctl_node_t config_node[] = { +static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, @@ -190,7 +217,7 @@ static const ctl_node_t config_node[] = { {NAME("xmalloc"), CTL(config_xmalloc)} }; -static const ctl_node_t opt_node[] = { +static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, @@ -216,31 +243,31 @@ static const ctl_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; -static const ctl_node_t arenas_bin_i_node[] = { +static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; -static const ctl_node_t super_arenas_bin_i_node[] = { +static const ctl_named_node_t super_arenas_bin_i_node[] = { {NAME(""), CHILD(arenas_bin_i)} }; -static const ctl_node_t arenas_bin_node[] = { +static const ctl_indexed_node_t arenas_bin_node[] = { {INDEX(arenas_bin_i)} }; -static const ctl_node_t arenas_lrun_i_node[] = { +static const ctl_named_node_t arenas_lrun_i_node[] = { {NAME("size"), CTL(arenas_lrun_i_size)} }; -static const ctl_node_t super_arenas_lrun_i_node[] = { +static const ctl_named_node_t super_arenas_lrun_i_node[] = { {NAME(""), CHILD(arenas_lrun_i)} }; -static const ctl_node_t arenas_lrun_node[] = { +static const ctl_indexed_node_t arenas_lrun_node[] = { {INDEX(arenas_lrun_i)} }; -static const ctl_node_t arenas_node[] = { +static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, @@ -254,39 +281,39 @@ static const ctl_node_t arenas_node[] = { {NAME("purge"), CTL(arenas_purge)} }; -static const ctl_node_t prof_node[] = { +static const ctl_named_node_t prof_node[] = { {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)}, {NAME("interval"), CTL(prof_interval)} }; -static const ctl_node_t stats_chunks_node[] = { +static const ctl_named_node_t stats_chunks_node[] = { {NAME("current"), CTL(stats_chunks_current)}, {NAME("total"), CTL(stats_chunks_total)}, {NAME("high"), CTL(stats_chunks_high)} }; -static const ctl_node_t stats_huge_node[] = { +static const ctl_named_node_t stats_huge_node[] = { {NAME("allocated"), CTL(stats_huge_allocated)}, {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, {NAME("ndalloc"), CTL(stats_huge_ndalloc)} }; -static const ctl_node_t stats_arenas_i_small_node[] = { +static const ctl_named_node_t stats_arenas_i_small_node[] = { {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} }; -static const ctl_node_t stats_arenas_i_large_node[] = { +static const ctl_named_node_t stats_arenas_i_large_node[] = { {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} }; -static const ctl_node_t stats_arenas_i_bins_j_node[] = { +static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, @@ -297,29 +324,29 @@ static const ctl_node_t stats_arenas_i_bins_j_node[] = { {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { +static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { {NAME(""), CHILD(stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_bins_node[] = { +static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { {INDEX(stats_arenas_i_bins_j)} }; -static const ctl_node_t stats_arenas_i_lruns_j_node[] = { +static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; -static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { +static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { {NAME(""), CHILD(stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_lruns_node[] = { +static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { {INDEX(stats_arenas_i_lruns_j)} }; -static const ctl_node_t stats_arenas_i_node[] = { +static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, @@ -332,15 +359,15 @@ static const ctl_node_t stats_arenas_i_node[] = { {NAME("bins"), CHILD(stats_arenas_i_bins)}, {NAME("lruns"), CHILD(stats_arenas_i_lruns)} }; -static const ctl_node_t super_stats_arenas_i_node[] = { +static const ctl_named_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(stats_arenas_i)} }; -static const ctl_node_t stats_arenas_node[] = { +static const ctl_indexed_node_t stats_arenas_node[] = { {INDEX(stats_arenas_i)} }; -static const ctl_node_t stats_node[] = { +static const ctl_named_node_t stats_node[] = { {NAME("cactive"), CTL(stats_cactive)}, {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, @@ -350,7 +377,7 @@ static const ctl_node_t stats_node[] = { {NAME("arenas"), CHILD(stats_arenas)} }; -static const ctl_node_t root_node[] = { +static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("thread"), CHILD(thread)}, @@ -360,7 +387,7 @@ static const ctl_node_t root_node[] = { {NAME("prof"), CHILD(prof)}, {NAME("stats"), CHILD(stats)} }; -static const ctl_node_t super_root_node[] = { +static const ctl_named_node_t super_root_node[] = { {NAME(""), CHILD(root)} }; @@ -597,7 +624,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, int ret; const char *elm, *tdot, *dot; size_t elen, i, j; - const ctl_node_t *node; + const ctl_named_node_t *node; elm = name; /* Equivalent to strchrnul(). */ @@ -609,21 +636,21 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } node = super_root_node; for (i = 0; i < *depthp; i++) { - assert(node->named); - assert(node->u.named.nchildren > 0); - if (node->u.named.children[0].named) { - const ctl_node_t *pnode = node; + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { + const ctl_named_node_t *pnode = node; /* Children are named. */ - for (j = 0; j < node->u.named.nchildren; j++) { - const ctl_node_t *child = - &node->u.named.children[j]; - if (strlen(child->u.named.name) == elen - && strncmp(elm, child->u.named.name, - elen) == 0) { + for (j = 0; j < node->nchildren; j++) { + const ctl_named_node_t *child = + ctl_named_children(node, j); + if (strlen(child->name) == elen && + strncmp(elm, child->name, elen) == 0) { node = child; if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = + (const ctl_node_t *)node; mibp[i] = j; break; } @@ -634,7 +661,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, } } else { uintmax_t index; - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Children are indexed. */ index = malloc_strtoumax(elm, NULL, 10); @@ -643,16 +670,15 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, goto label_return; } - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mibp, *depthp, - (size_t)index); + inode = ctl_indexed_node(node->children); + node = inode->index(mibp, *depthp, (size_t)index); if (node == NULL) { ret = ENOENT; goto label_return; } if (nodesp != NULL) - nodesp[i] = node; + nodesp[i] = (const ctl_node_t *)node; mibp[i] = (size_t)index; } @@ -696,6 +722,7 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t depth; ctl_node_t const *nodes[CTL_MAX_DEPTH]; size_t mib[CTL_MAX_DEPTH]; + const ctl_named_node_t *node; if (ctl_initialized == false && ctl_init()) { ret = EAGAIN; @@ -707,13 +734,14 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, if (ret != 0) goto label_return; - if (nodes[depth-1]->ctl == NULL) { + node = ctl_named_node(nodes[depth-1]); + if (node != NULL && node->ctl) + ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen); + else { /* The name refers to a partial path through the ctl tree. */ ret = ENOENT; - goto label_return; } - ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); label_return: return(ret); } @@ -738,7 +766,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - const ctl_node_t *node; + const ctl_named_node_t *node; size_t i; if (ctl_initialized == false && ctl_init()) { @@ -749,19 +777,21 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, /* Iterate down the tree. */ node = super_root_node; for (i = 0; i < miblen; i++) { - if (node->u.named.children[0].named) { + assert(node); + assert(node->nchildren > 0); + if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->u.named.nchildren <= mib[i]) { + if (node->nchildren <= mib[i]) { ret = ENOENT; goto label_return; } - node = &node->u.named.children[mib[i]]; + node = ctl_named_children(node, mib[i]); } else { - const ctl_node_t *inode; + const ctl_indexed_node_t *inode; /* Indexed element. */ - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mib, miblen, mib[i]); + inode = ctl_indexed_node(node->children); + node = inode->index(mib, miblen, mib[i]); if (node == NULL) { ret = ENOENT; goto label_return; @@ -770,12 +800,12 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Call the ctl function. */ - if (node->ctl == NULL) { + if (node && node->ctl) + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + else { /* Partial MIB. */ ret = ENOENT; - goto label_return; } - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); label_return: return(ret); @@ -1133,7 +1163,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1143,7 +1173,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_node_t * +const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1326,7 +1356,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1344,7 +1374,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1365,10 +1395,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_node_t * +const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { - const ctl_node_t * ret; + const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); if (ctl_stats.arenas[i].initialized == false) { From a4936ce4d635ef129be201f53cdc0786315ac3b6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 12:46:46 -0700 Subject: [PATCH 158/205] Fix jemalloc.sh code generation. Fix jemalloc.sh code generation by adding @sorev@ and using it instead of @SOREV@ (which contains Makefile-specific variables). --- bin/jemalloc.sh.in | 2 +- configure.ac | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index cdf36737..58683f5d 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@sorev@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 396b5ef3..73389905 100644 --- a/configure.ac +++ b/configure.ac @@ -202,6 +202,7 @@ libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV='$(SO).$(REV)' +sorev="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' dnl Heap profiling uses the log(3) function. @@ -226,6 +227,7 @@ case "${host}" in force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' SOREV='$(REV).$(SO)' + sorev="${rev}.${so}" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -284,6 +286,7 @@ case "${host}" in libprefix="" exe=".exe" SOREV='$(SO)' + sorev="${so}" PIC_CFLAGS="" ;; *) @@ -301,6 +304,7 @@ AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) +AC_SUBST([sorev]) AC_SUBST([PIC_CFLAGS]) JE_COMPILABLE([__attribute__ syntax], From 079687bb87e2ac13274c2c4ff1134d42a78e9c7a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 12:49:23 -0700 Subject: [PATCH 159/205] Clean up documentation and formatting. --- ChangeLog | 4 ++-- INSTALL | 6 +----- Makefile.in | 4 ++-- README | 12 ++++++------ 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index 97a90468..c93f5df2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -19,7 +19,7 @@ found in the git revision history: New features: - Implement Valgrind support, redzones, and quarantine. - - Add support for additional operating systems: + - Add support for additional platforms: + FreeBSD + Mac OS X Lion + MinGW @@ -67,7 +67,7 @@ found in the git revision history: Bug fixes: - Fix a statistics-related bug in the "thread.arena" mallctl that could cause invalid statistics and crashes. - - Work around TLS dallocation via free() on Linux. This bug could cause + - Work around TLS deallocation via free() on Linux. This bug could cause write-after-free memory corruption. - Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. diff --git a/INSTALL b/INSTALL index 04671a1d..7e3051ac 100644 --- a/INSTALL +++ b/INSTALL @@ -267,10 +267,6 @@ directory, issue configuration and build commands: The manual page is generated in both html and roff formats. Any web browser can be used to view the html manual. The roff manual page can be formatted -prior to installation via any of the following commands: +prior to installation via the following command: nroff -man -t doc/jemalloc.3 - - groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf - - (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html) diff --git a/Makefile.in b/Makefile.in index b9917da4..e39b8ea0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -227,8 +227,8 @@ check: tests for t in $(CTESTS:$(srcroot)%.c=$(objroot)%); do \ total=`expr $$total + 1`; \ /bin/echo -n "$${t} ... "; \ - $(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) $(abs_objroot) \ - > $(objroot)$${t}.out 2>&1; \ + $(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) \ + $(abs_objroot) > $(objroot)$${t}.out 2>&1; \ if test -e "$(srcroot)$${t}.exp"; then \ diff -w -u $(srcroot)$${t}.exp \ $(objroot)$${t}.out >/dev/null 2>&1; \ diff --git a/README b/README index 411d52d4..7661683b 100644 --- a/README +++ b/README @@ -1,10 +1,10 @@ jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a "portable" implementation that currently -targets FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the -default allocator in the FreeBSD and NetBSD operating systems, and it is used -by the Mozilla Firefox web browser on Microsoft Windows-related platforms. -Depending on your needs, one of the other divergent versions may suit your -needs better than this distribution. +This distribution is a "portable" implementation that currently targets +FreeBSD, Linux, Apple OS X, and MinGW. jemalloc is included as the default +allocator in the FreeBSD and NetBSD operating systems, and it is used by the +Mozilla Firefox web browser on Microsoft Windows-related platforms. Depending +on your needs, one of the other divergent versions may suit your needs better +than this distribution. The COPYING file contains copyright and licensing information. From 6716aa83526b3f866d73a033970cc920bc61c13f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 13:04:55 -0700 Subject: [PATCH 160/205] Force use of TLS if heap profiling is enabled. --- configure.ac | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configure.ac b/configure.ac index 73389905..98211c8c 100644 --- a/configure.ac +++ b/configure.ac @@ -678,6 +678,10 @@ fi AC_MSG_CHECKING([configured backtracing method]) AC_MSG_RESULT([$backtrace_method]) if test "x$enable_prof" = "x1" ; then + if test "x${force_tls}" = "x0" ; then + AC_MSG_ERROR([Heap profiling requires TLS]); + fi + force_tls="1" AC_DEFINE([JEMALLOC_PROF], [ ]) fi AC_SUBST([enable_prof]) From 8694e2e7b901eb3254a7da2461709ba2ce135aba Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 13:05:32 -0700 Subject: [PATCH 161/205] Silence compiler warnings. --- src/jemalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 67ac90b2..d9fecef5 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -793,7 +793,7 @@ void * je_malloc(size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -973,7 +973,7 @@ je_calloc(size_t num, size_t size) { void *ret; size_t num_size; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); if (malloc_init()) { @@ -1048,7 +1048,7 @@ void * je_realloc(void *ptr, size_t size) { void *ret; - size_t usize; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_size = 0; size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); From 598779aa554dd4356a8c4464b67b99b29e9a8489 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 18:04:52 -0700 Subject: [PATCH 162/205] Don't link tests with superfluous libraries. Don't link tests with libraries that only libjemalloc needs to be linked to. --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index e39b8ea0..7685f156 100644 --- a/Makefile.in +++ b/Makefile.in @@ -163,7 +163,7 @@ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) $(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS) @mkdir -p $(@D) - $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(LIBS) + $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(filter -lpthread,$(LIBS)) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) From 65f343a632aa1f6bd9b8a65761706391469d2620 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 19:31:45 -0700 Subject: [PATCH 163/205] Fix ctl regression. Fix ctl to correctly compute the number of children at each level of the ctl tree. --- include/jemalloc/internal/ctl.h | 12 ++++---- src/ctl.c | 50 +++++++++++++++++---------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index c06b9af0..adf3827f 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -17,17 +17,17 @@ struct ctl_node_s { struct ctl_named_node_s { struct ctl_node_s node; - const char *name; + const char *name; /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - int (*ctl)(const size_t *, size_t, void *, size_t *, void *, - size_t); + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); }; struct ctl_indexed_node_s { struct ctl_node_s node; - const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); }; struct ctl_arena_stats_s { diff --git a/src/ctl.c b/src/ctl.c index 52c85945..59499731 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -176,8 +176,10 @@ CTL_PROTO(stats_mapped) #define CTL_MAX_DEPTH 6 #define NAME(n) {true}, n -#define CHILD(c) \ - sizeof(c##_node) / sizeof(ctl_node_t), (ctl_node_t *)c##_node, NULL +#define CHILD(t, c) \ + sizeof(c##_node) / sizeof(ctl_##t##_node_t), \ + (ctl_node_t *)c##_node, \ + NULL #define CTL(c) 0, NULL, c##_ctl /* @@ -197,7 +199,7 @@ static const ctl_named_node_t thread_node[] = { {NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)}, - {NAME("tcache"), CHILD(tcache)} + {NAME("tcache"), CHILD(named, tcache)} }; static const ctl_named_node_t config_node[] = { @@ -249,7 +251,7 @@ static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("run_size"), CTL(arenas_bin_i_run_size)} }; static const ctl_named_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(arenas_bin_i)} + {NAME(""), CHILD(named, arenas_bin_i)} }; static const ctl_indexed_node_t arenas_bin_node[] = { @@ -260,7 +262,7 @@ static const ctl_named_node_t arenas_lrun_i_node[] = { {NAME("size"), CTL(arenas_lrun_i_size)} }; static const ctl_named_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(arenas_lrun_i)} + {NAME(""), CHILD(named, arenas_lrun_i)} }; static const ctl_indexed_node_t arenas_lrun_node[] = { @@ -275,9 +277,9 @@ static const ctl_named_node_t arenas_node[] = { {NAME("tcache_max"), CTL(arenas_tcache_max)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, - {NAME("bin"), CHILD(arenas_bin)}, + {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("lrun"), CHILD(indexed, arenas_lrun)}, {NAME("purge"), CTL(arenas_purge)} }; @@ -325,7 +327,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = { {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_bins_j)} + {NAME(""), CHILD(named, stats_arenas_i_bins_j)} }; static const ctl_indexed_node_t stats_arenas_i_bins_node[] = { @@ -339,7 +341,7 @@ static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = { {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} }; static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_lruns_j)} + {NAME(""), CHILD(named, stats_arenas_i_lruns_j)} }; static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { @@ -354,13 +356,13 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("npurge"), CTL(stats_arenas_i_npurge)}, {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(stats_arenas_i_small)}, - {NAME("large"), CHILD(stats_arenas_i_large)}, - {NAME("bins"), CHILD(stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(stats_arenas_i_lruns)} + {NAME("small"), CHILD(named, stats_arenas_i_small)}, + {NAME("large"), CHILD(named, stats_arenas_i_large)}, + {NAME("bins"), CHILD(named, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(named, stats_arenas_i_lruns)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(stats_arenas_i)} + {NAME(""), CHILD(named, stats_arenas_i)} }; static const ctl_indexed_node_t stats_arenas_node[] = { @@ -372,23 +374,23 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(stats_chunks)}, - {NAME("huge"), CHILD(stats_huge)}, - {NAME("arenas"), CHILD(stats_arenas)} + {NAME("chunks"), CHILD(named, stats_chunks)}, + {NAME("huge"), CHILD(named, stats_huge)}, + {NAME("arenas"), CHILD(indexed, stats_arenas)} }; static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, - {NAME("thread"), CHILD(thread)}, - {NAME("config"), CHILD(config)}, - {NAME("opt"), CHILD(opt)}, - {NAME("arenas"), CHILD(arenas)}, - {NAME("prof"), CHILD(prof)}, - {NAME("stats"), CHILD(stats)} + {NAME("thread"), CHILD(named, thread)}, + {NAME("config"), CHILD(named, config)}, + {NAME("opt"), CHILD(named, opt)}, + {NAME("arenas"), CHILD(named, arenas)}, + {NAME("prof"), CHILD(named, prof)}, + {NAME("stats"), CHILD(named, stats)} }; static const ctl_named_node_t super_root_node[] = { - {NAME(""), CHILD(root)} + {NAME(""), CHILD(named, root)} }; #undef NAME From 87667a86a072ed5127343cc7698c3144cac37e05 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 19:54:15 -0700 Subject: [PATCH 164/205] Fix two CHILD() macro calls in the ctl tree. --- src/ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index 59499731..4dbbefc2 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -358,8 +358,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("purged"), CTL(stats_arenas_i_purged)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, {NAME("large"), CHILD(named, stats_arenas_i_large)}, - {NAME("bins"), CHILD(named, stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(named, stats_arenas_i_lruns)} + {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)} }; static const ctl_named_node_t super_stats_arenas_i_node[] = { {NAME(""), CHILD(named, stats_arenas_i)} From 577dd84660351c727a62db99b308e35d9da224a1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 21:14:26 -0700 Subject: [PATCH 165/205] Handle quarantine resurrection during thread exit. Handle quarantine resurrection during thread exit in much the same way as tcache resurrection is handled. --- src/quarantine.c | 50 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 5fb6c390..26ec5896 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -1,5 +1,13 @@ #include "jemalloc/internal/jemalloc_internal.h" +/* + * quarantine pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1) +#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2) +#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY + /******************************************************************************/ /* Data. */ @@ -105,10 +113,25 @@ quarantine(void *ptr) assert(opt_quarantine); quarantine = *quarantine_tsd_get(); - if (quarantine == NULL && (quarantine = - quarantine_init(LG_MAXOBJS_INIT)) == NULL) { - idalloc(ptr); - return; + if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) { + if (quarantine == NULL) { + if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) == + NULL) { + idalloc(ptr); + return; + } + } else { + if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * Make a note that quarantine() was called + * after quarantine_cleanup() was called. + */ + quarantine = QUARANTINE_STATE_REINCARNATED; + quarantine_tsd_set(&quarantine); + } + idalloc(ptr); + return; + } } /* * Drain one or more objects if the quarantine size limit would be @@ -144,9 +167,26 @@ quarantine_cleanup(void *arg) { quarantine_t *quarantine = *(quarantine_t **)arg; - if (quarantine != NULL) { + if (quarantine == QUARANTINE_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY + * in order to receive another callback. + */ + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); + } else if (quarantine == QUARANTINE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to QUARANTINE_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the quarantine. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (quarantine != NULL) { quarantine_drain(quarantine, 0); idalloc(quarantine); + quarantine = QUARANTINE_STATE_PURGATORY; + quarantine_tsd_set(&quarantine); } } From 9cd351d147d1e79bff6b89586f168e81c0be034e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 21:43:18 -0700 Subject: [PATCH 166/205] Add usize sanity checking to quarantine. --- src/quarantine.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 26ec5896..88d0c77e 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -11,15 +11,21 @@ /******************************************************************************/ /* Data. */ +typedef struct quarantine_obj_s quarantine_obj_t; typedef struct quarantine_s quarantine_t; +struct quarantine_obj_s { + void *ptr; + size_t usize; +}; + struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; + size_t curbytes; + size_t curobjs; + size_t first; #define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - void *objs[1]; /* Dynamically sized ring buffer. */ + size_t lg_maxobjs; + quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ }; static void quarantine_cleanup(void *arg); @@ -43,7 +49,7 @@ quarantine_init(size_t lg_maxobjs) quarantine_t *quarantine; quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + - ((ZU(1) << lg_maxobjs) * sizeof(void *))); + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t))); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -70,14 +76,14 @@ quarantine_grow(quarantine_t *quarantine) quarantine->lg_maxobjs)) { /* objs ring buffer data are contiguous. */ memcpy(ret->objs, &quarantine->objs[quarantine->first], - quarantine->curobjs * sizeof(void *)); + quarantine->curobjs * sizeof(quarantine_obj_t)); ret->curobjs = quarantine->curobjs; } else { /* objs ring buffer data wrap around. */ size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - quarantine->first; memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * - sizeof(void *)); + sizeof(quarantine_obj_t)); ret->curobjs = ncopy; if (quarantine->curobjs != 0) { memcpy(&ret->objs[ret->curobjs], quarantine->objs, @@ -93,10 +99,10 @@ quarantine_drain(quarantine_t *quarantine, size_t upper_bound) { while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { - void *ptr = quarantine->objs[quarantine->first]; - size_t usize = isalloc(ptr, config_prof); - idalloc(ptr); - quarantine->curbytes -= usize; + quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; + assert(obj->usize == isalloc(obj->ptr, config_prof)); + idalloc(obj->ptr); + quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << quarantine->lg_maxobjs) - 1); @@ -151,7 +157,9 @@ quarantine(void *ptr) if (quarantine->curbytes + usize <= opt_quarantine) { size_t offset = (quarantine->first + quarantine->curobjs) & ((ZU(1) << quarantine->lg_maxobjs) - 1); - quarantine->objs[offset] = ptr; + quarantine_obj_t *obj = &quarantine->objs[offset]; + obj->ptr = ptr; + obj->usize = usize; quarantine->curbytes += usize; quarantine->curobjs++; if (opt_junk) From 7e060397a38e301d7d3317fbf138f342c2bbd1b9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 22:07:30 -0700 Subject: [PATCH 167/205] Fix quarantine_grow() bugs. --- src/quarantine.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/quarantine.c b/src/quarantine.c index 88d0c77e..9005ab3b 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -72,23 +72,22 @@ quarantine_grow(quarantine_t *quarantine) return (quarantine); ret->curbytes = quarantine->curbytes; - if (quarantine->first + quarantine->curobjs < (ZU(1) << + ret->curobjs = quarantine->curobjs; + if (quarantine->first + quarantine->curobjs <= (ZU(1) << quarantine->lg_maxobjs)) { /* objs ring buffer data are contiguous. */ memcpy(ret->objs, &quarantine->objs[quarantine->first], quarantine->curobjs * sizeof(quarantine_obj_t)); - ret->curobjs = quarantine->curobjs; } else { /* objs ring buffer data wrap around. */ - size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) - quarantine->first; - memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * + size_t ncopy_b = quarantine->curobjs - ncopy_a; + + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a + * sizeof(quarantine_obj_t)); + memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); - ret->curobjs = ncopy; - if (quarantine->curobjs != 0) { - memcpy(&ret->objs[ret->curobjs], quarantine->objs, - quarantine->curobjs - ncopy); - } } return (ret); From f54166e7ef5313c3b5c773cbb0ca2af95f5a15ae Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 23 Apr 2012 22:41:36 -0700 Subject: [PATCH 168/205] Add missing Valgrind annotations. --- src/arena.c | 3 +++ src/chunk.c | 1 + 2 files changed, 4 insertions(+) diff --git a/src/arena.c b/src/arena.c index 6f28abe9..f13b5e1e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1131,6 +1131,8 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) (uintptr_t)bin_info->bitmap_offset); /* Initialize run internals. */ + VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset - + bin_info->redzone_size); run->bin = bin; run->nextind = 0; run->nfree = bin_info->nregs; @@ -1924,6 +1926,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * expectation that the extra bytes will be reliably preserved. */ copysize = (size < oldsize) ? size : oldsize; + VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); iqalloc(ptr); return (ret); diff --git a/src/chunk.c b/src/chunk.c index 5426b027..7ac229cb 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -168,6 +168,7 @@ label_return: size_t i; size_t *p = (size_t *)(uintptr_t)ret; + VALGRIND_MAKE_MEM_DEFINED(ret, size); for (i = 0; i < size / sizeof(size_t); i++) assert(p[i] == 0); } From 6b9ed67b4b9d65731d1eeb7937989ef96288b706 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 13:12:46 -0700 Subject: [PATCH 169/205] Fix the "epoch" mallctl. Fix the "epoch" mallctl to update cached stats even if the passed in epoch is 0. --- ChangeLog | 2 ++ src/ctl.c | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index c93f5df2..691630bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -81,6 +81,8 @@ found in the git revision history: - Check for NULL pointer in malloc_usable_size(). - Fix an off-by-one heap profile statistics bug that could be observed in interval- and growth-triggered heap profiles. + - Fix the "epoch" mallctl to update cached stats even if the passed in epoch + is 0. - Fix bin->runcur management to fix a layout policy bug. This bug did not affect correctness. - Fix a bug in choose_arena_hard() that potentially caused more arenas to be diff --git a/src/ctl.c b/src/ctl.c index 4dbbefc2..4b41d1d3 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -831,14 +831,14 @@ ctl_boot(void) #define READONLY() do { \ if (newp != NULL || newlen != 0) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) #define WRITEONLY() do { \ if (oldp != NULL || oldlenp != NULL) { \ ret = EPERM; \ - goto label_return; \ + goto label_return; \ } \ } while (0) @@ -854,7 +854,7 @@ ctl_boot(void) ? sizeof(t) : *oldlenp; \ memcpy(oldp, (void *)&v, copylen); \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } else \ *(t *)oldp = v; \ } \ @@ -864,7 +864,7 @@ ctl_boot(void) if (newp != NULL) { \ if (newlen != sizeof(t)) { \ ret = EINVAL; \ - goto label_return; \ + goto label_return; \ } \ v = *(t *)newp; \ } \ @@ -891,7 +891,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ if (l) \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ @@ -913,7 +913,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -932,7 +932,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ malloc_mutex_unlock(&ctl_mtx); \ return (ret); \ } @@ -956,7 +956,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -973,7 +973,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, t); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -990,7 +990,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ READ(oldval, bool); \ \ ret = 0; \ -label_return: \ +label_return: \ return (ret); \ } @@ -1004,9 +1004,8 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, uint64_t newval; malloc_mutex_lock(&ctl_mtx); - newval = 0; WRITE(newval, uint64_t); - if (newval != 0) + if (newp != NULL) ctl_refresh(); READ(ctl_epoch, uint64_t); From 3fb50b0407ff7dfe14727995706e2b42836f0f7e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 13:13:44 -0700 Subject: [PATCH 170/205] Fix a PROF_ALLOC_PREP() error path. Fix a PROF_ALLOC_PREP() error path to initialize the return value to NULL. --- include/jemalloc/internal/prof.h | 4 +++- src/jemalloc.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 093ac93c..4c398515 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -227,8 +227,10 @@ bool prof_boot2(void); assert(size == s2u(size)); \ \ prof_tdata = prof_tdata_get(); \ - if (prof_tdata == NULL) \ + if (prof_tdata == NULL) { \ + ret = NULL; \ break; \ + } \ \ if (opt_prof_active == false) { \ /* Sampling is currently inactive, so avoid sampling. */\ diff --git a/src/jemalloc.c b/src/jemalloc.c index d9fecef5..52296e07 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1384,7 +1384,6 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(size != 0); @@ -1397,6 +1396,8 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) goto label_oom; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + PROF_ALLOC_PREP(1, usize, cnt); if (cnt == NULL) goto label_oom; @@ -1456,7 +1457,6 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; - prof_thr_cnt_t *cnt; assert(ptr != NULL); assert(*ptr != NULL); @@ -1466,6 +1466,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) p = *ptr; if (config_prof && opt_prof) { + prof_thr_cnt_t *cnt; + /* * usize isn't knowable before iralloc() returns when extra is * non-zero. Therefore, compute its maximum possible value and From d926c90500d47c5e18811a944c70924c09b5890e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 25 Apr 2012 23:17:57 -0700 Subject: [PATCH 171/205] Fix Valgrind URL in documentation. Reported by Daichi GOTO. --- doc/jemalloc.xml.in | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index e8a57225..02961f6b 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -853,9 +853,9 @@ for (i = 0; i < nbins; i++) { junk-filled if the opt.junk option is enabled. This feature is of particular use in combination with Valgrind, which can detect - attempts to access quarantined objects. This is intended for debugging - and will impact performance negatively. The default quarantine size is + url="http://valgrind.org/">Valgrind, which can detect attempts + to access quarantined objects. This is intended for debugging and will + impact performance negatively. The default quarantine size is 0. @@ -871,11 +871,11 @@ for (i = 0; i < nbins; i++) { opt.junk option is enabled, the redzones are checked for corruption during deallocation. However, the primary intended purpose of this feature is to be used in - combination with Valgrind, which needs - redzones in order to do effective buffer overflow/underflow detection. - This option is intended for debugging and will impact performance - negatively. This option is disabled by default. + combination with Valgrind, + which needs redzones in order to do effective buffer overflow/underflow + detection. This option is intended for debugging and will impact + performance negatively. This option is disabled by + default. @@ -915,14 +915,14 @@ for (i = 0; i < nbins; i++) { r- [] - Valgrind support - enabled/disabled. If enabled, several other options are automatically - modified during options processing to work well with Valgrind: opt.junk and opt.zero are set to false, - opt.quarantine - is set to 16 MiB, and Valgrind + support enabled/disabled. If enabled, several other options are + automatically modified during options processing to work well with + Valgrind: opt.junk + and opt.zero are set + to false, opt.quarantine is + set to 16 MiB, and opt.redzone is set to true. This option is disabled by default. @@ -1854,10 +1854,10 @@ malloc_conf = "xmalloc:true";]]> This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive. However, jemalloc does integrate with the most - excellent Valgrind tool if - the configuration option is enabled and - the opt.valgrind - option is enabled. + excellent Valgrind tool if the + configuration option is enabled and the + opt.valgrind option + is enabled. DIAGNOSTIC MESSAGES From 95ff6aadca1e91641e093c61091e70344d048b50 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 14:15:28 -0700 Subject: [PATCH 172/205] Don't set prof_tdata during thread cleanup. Don't set prof_tdata during thread cleanup, because doing so will cause the cleanup function to be called again, the second time with a NULL argument. --- src/prof.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/prof.c b/src/prof.c index 187bda7d..cad56929 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1155,10 +1155,7 @@ prof_tdata_cleanup(void *arg) } idalloc(prof_tdata->vec); - idalloc(prof_tdata); - prof_tdata = NULL; - prof_tdata_tsd_set(&prof_tdata); } void From 0050a0f7e6ea5a33c9aed769e2652afe20714194 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 18:14:24 -0700 Subject: [PATCH 173/205] Handle prof_tdata resurrection. Handle prof_tdata resurrection during thread shutdown, similarly to how tcache and quarantine handle resurrection. --- include/jemalloc/internal/prof.h | 18 +++++++++-- src/prof.c | 54 ++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 4c398515..b165e196 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -37,6 +37,14 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_NCTX_LOCKS 1024 +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -297,8 +305,12 @@ prof_tdata_get(void) cassert(config_prof); prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) - prof_tdata = prof_tdata_init(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + else + prof_tdata = NULL; + } return (prof_tdata); } @@ -382,7 +394,7 @@ prof_sample_accum_update(size_t size) assert(opt_lg_prof_sample != 0); prof_tdata = *prof_tdata_tsd_get(); - assert(prof_tdata != NULL); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { diff --git a/src/prof.c b/src/prof.c index cad56929..e3549662 100644 --- a/src/prof.c +++ b/src/prof.c @@ -686,7 +686,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * into this function. */ prof_tdata = *prof_tdata_tsd_get(); - assert(prof_tdata != NULL); + assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && @@ -972,7 +972,7 @@ prof_idump(void) * allocation. */ prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_idump = true; @@ -1026,7 +1026,7 @@ prof_gdump(void) * allocation. */ prof_tdata = *prof_tdata_tsd_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_gdump = true; @@ -1141,21 +1141,41 @@ prof_tdata_cleanup(void *arg) cassert(config_prof); - /* - * Delete the hash table. All of its contents can still be iterated - * over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* Iteratively merge cnt's into the global stats and delete them. */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - prof_ctx_merge(cnt->ctx, cnt); - idalloc(cnt); + if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { + /* + * Another destructor deallocated memory after this destructor + * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY + * in order to receive another callback. + */ + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); + } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to PROF_TDATA_STATE_PURGATORY so that other destructors + * wouldn't cause re-creation of the prof_tdata. This time, do + * nothing, so that the destructor will not be called again. + */ + } else if (prof_tdata != NULL) { + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } + idalloc(prof_tdata->vec); + idalloc(prof_tdata); + prof_tdata = PROF_TDATA_STATE_PURGATORY; + prof_tdata_tsd_set(&prof_tdata); } - - idalloc(prof_tdata->vec); - idalloc(prof_tdata); } void From f27899402914065a6c1484ea8d81a2c8b70aa659 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 28 Apr 2012 23:27:13 -0700 Subject: [PATCH 174/205] Fix more prof_tdata resurrection corner cases. --- include/jemalloc/internal/prof.h | 12 +++++++----- src/prof.c | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index b165e196..41a66923 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -235,8 +235,11 @@ bool prof_boot2(void); assert(size == s2u(size)); \ \ prof_tdata = prof_tdata_get(); \ - if (prof_tdata == NULL) { \ - ret = NULL; \ + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ + if (prof_tdata != NULL) \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + else \ + ret = NULL; \ break; \ } \ \ @@ -308,8 +311,6 @@ prof_tdata_get(void) if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { if (prof_tdata == NULL) prof_tdata = prof_tdata_init(); - else - prof_tdata = NULL; } return (prof_tdata); @@ -394,7 +395,8 @@ prof_sample_accum_update(size_t size) assert(opt_lg_prof_sample != 0); prof_tdata = *prof_tdata_tsd_get(); - assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { diff --git a/src/prof.c b/src/prof.c index e3549662..de1d3929 100644 --- a/src/prof.c +++ b/src/prof.c @@ -440,7 +440,7 @@ prof_lookup(prof_bt_t *bt) cassert(config_prof); prof_tdata = prof_tdata_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { @@ -846,7 +846,7 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck) cassert(config_prof); prof_tdata = prof_tdata_get(); - if (prof_tdata == NULL) + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); prof_enter(prof_tdata); prof_dump_fd = creat(filename, 0644); From 8b49971d0ce0819af78aa2a278c26ecb298ee134 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 24 Apr 2012 23:22:02 +0200 Subject: [PATCH 175/205] Avoid variable length arrays and remove declarations within code MSVC doesn't support C99, and building as C++ to be able to use them is dangerous, as C++ and C99 are incompatible. Introduce a VARIABLE_ARRAY macro that either uses VLA when supported, or alloca() otherwise. Note that using alloca() inside loops doesn't quite work like VLAs, thus the use of VARIABLE_ARRAY there is discouraged. It might be worth investigating ways to check whether VARIABLE_ARRAY is used in such context at runtime in debug builds and bail out if that happens. --- .../jemalloc/internal/jemalloc_internal.h.in | 14 ++++++++++++++ include/jemalloc/internal/prof.h | 3 ++- src/arena.c | 18 ++++++++++-------- src/ctl.c | 4 ++-- src/stats.c | 4 ++-- src/tsd.c | 2 +- test/bitmap.c | 16 ++++++++++++---- 7 files changed, 43 insertions(+), 18 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 691f50a9..a364d7a3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -319,6 +319,20 @@ static const bool config_ivsalloc = #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) +/* Declare a variable length array */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# include +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * count) +#else +# define VARIABLE_ARRAY(type, name, count) type name[count] +#endif + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 41a66923..c3e3f9e4 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -542,8 +542,9 @@ prof_free(const void *ptr, size_t size) cassert(config_prof); if ((uintptr_t)ctx > (uintptr_t)1) { + prof_thr_cnt_t *tcnt; assert(size == isalloc(ptr, true)); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; diff --git a/src/arena.c b/src/arena.c index f13b5e1e..7fac3619 100644 --- a/src/arena.c +++ b/src/arena.c @@ -640,14 +640,14 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) if (mapelm->bits & CHUNK_MAP_LARGE) pageind += mapelm->bits >> LG_PAGE; else { + size_t binind; + arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) chunk + (uintptr_t)(pageind << LG_PAGE)); assert((mapelm->bits >> LG_PAGE) == 0); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; + binind = arena_bin_index(arena, run->bin); + bin_info = &arena_bin_info[binind]; pageind += bin_info->run_size >> LG_PAGE; } } @@ -1056,11 +1056,12 @@ arena_bin_runs_first(arena_bin_t *bin) if (mapelm != NULL) { arena_chunk_t *chunk; size_t pageind; + arena_run_t *run; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); return (run); @@ -1596,14 +1597,15 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind; arena_run_t *run; arena_bin_t *bin; - size_t size; + arena_bin_info_t *bin_info; + size_t size, binind; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; diff --git a/src/ctl.c b/src/ctl.c index 4b41d1d3..27346045 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -520,7 +520,7 @@ static void ctl_refresh(void) { unsigned i; - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -1232,7 +1232,7 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = EFAULT; goto label_return; } else { - arena_t *tarenas[narenas]; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); malloc_mutex_lock(&arenas_lock); memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); diff --git a/src/stats.c b/src/stats.c index 08f7098c..2854b309 100644 --- a/src/stats.c +++ b/src/stats.c @@ -498,7 +498,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i, ninitialized; @@ -527,7 +527,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.narenas", &narenas, unsigned); { - bool initialized[narenas]; + VARIABLE_ARRAY(bool, initialized, narenas); size_t isz; unsigned i; diff --git a/src/tsd.c b/src/tsd.c index 09f06e88..d7714b02 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -36,7 +36,7 @@ JEMALLOC_ATTR(visibility("default")) void _malloc_thread_cleanup(void) { - bool pending[ncleanups], again; + bool pending[MALLOC_TSD_CLEANUPS_MAX], again; unsigned i; for (i = 0; i < ncleanups; i++) diff --git a/test/bitmap.c b/test/bitmap.c index ff50ecb3..b2cb6300 100644 --- a/test/bitmap.c +++ b/test/bitmap.c @@ -30,11 +30,13 @@ test_bitmap_init(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) assert(bitmap_get(bitmap, &binfo, j) == false); + free(bitmap); } } @@ -50,12 +52,14 @@ test_bitmap_set(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } @@ -70,7 +74,8 @@ test_bitmap_unset(void) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -81,6 +86,7 @@ test_bitmap_unset(void) for (j = 0; j < i; j++) bitmap_set(bitmap, &binfo, j); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } @@ -95,7 +101,8 @@ test_bitmap_sfu(void) bitmap_info_init(&binfo, i); { ssize_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_t *bitmap = malloc(sizeof(bitmap_t) * + bitmap_info_ngroups(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ @@ -125,6 +132,7 @@ test_bitmap_sfu(void) } assert(bitmap_sfu(bitmap, &binfo) == i - 1); assert(bitmap_full(bitmap, &binfo)); + free(bitmap); } } } From af04b744bda40842631d80ad04e1510308b13e54 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:25 +0200 Subject: [PATCH 176/205] Remove the VOID macro Windows headers define a VOID macro. --- src/ctl.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index 27346045..dddf3bee 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -842,11 +842,6 @@ ctl_boot(void) } \ } while (0) -#define VOID() do { \ - READONLY(); \ - WRITEONLY(); \ -} while (0) - #define READ(v, t) do { \ if (oldp != NULL && oldlenp != NULL) { \ if (*oldlenp != sizeof(t)) { \ @@ -1049,7 +1044,8 @@ thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, if (config_tcache == false) return (ENOENT); - VOID(); + READONLY(); + WRITEONLY(); tcache_flush(); From a14bce85e885f83c96116cc5438ae52d740f3727 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:26 +0200 Subject: [PATCH 177/205] Use Get/SetLastError on Win32 Using errno on win32 doesn't quite work, because the value set in a shared library can't be read from e.g. an executable calling the function setting errno. At the same time, since buferror always uses errno/GetLastError, don't pass it. --- .../jemalloc/internal/jemalloc_internal.h.in | 12 ++++++-- include/jemalloc/internal/util.h | 28 ++++++++++++++++++- src/chunk_mmap.c | 4 +-- src/huge.c | 2 +- src/jemalloc.c | 18 ++++++------ src/util.c | 15 +++++----- test/aligned_alloc.c | 25 +++++++++-------- 7 files changed, 71 insertions(+), 33 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a364d7a3..d4c4b4cb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,8 +1,17 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H #include +#include #ifdef _WIN32 # include +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA #else # include # include @@ -11,10 +20,10 @@ # endif # include # include +# include #endif #include -#include #include #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX @@ -34,7 +43,6 @@ #include #include #include -#include #define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index d360ae3f..9661c7b1 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -84,7 +84,7 @@ extern void (*je_malloc_message)(void *wcbopaque, const char *s); -int buferror(int errnum, char *buf, size_t buflen); +int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); /* @@ -109,6 +109,8 @@ void malloc_printf(const char *format, ...) #ifndef JEMALLOC_ENABLE_INLINE size_t pow2_ceil(size_t x); void malloc_write(const char *s); +void set_errno(int errnum); +int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) @@ -140,6 +142,30 @@ malloc_write(const char *s) je_malloc_message(NULL, s); } + +/* Sets error code */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return GetLastError(); +#else + return errno; +#endif +} #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 0ad65a10..9f388d28 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -41,7 +41,7 @@ pages_map(void *addr, size_t size) if (munmap(ret, size) == -1) { char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in " #ifdef _WIN32 "VirtualFree" diff --git a/src/huge.c b/src/huge.c index 23eb074a..67b282d1 100644 --- a/src/huge.c +++ b/src/huge.c @@ -168,7 +168,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ char buf[BUFERROR_BUF]; - buferror(errno, buf, sizeof(buf)); + buferror(buf, sizeof(buf)); malloc_printf(": Error in mremap(): %s\n", buf); if (opt_abort) diff --git a/src/jemalloc.c b/src/jemalloc.c index 52296e07..cae0098b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -472,9 +472,9 @@ malloc_conf_init(void) uintmax_t um; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ um = malloc_strtoumax(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -493,9 +493,9 @@ malloc_conf_init(void) long l; \ char *end; \ \ - errno = 0; \ + set_errno(0); \ l = strtol(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ + if (get_errno() != 0 || (uintptr_t)end -\ (uintptr_t)v != vlen) { \ malloc_conf_error( \ "Invalid conf value", \ @@ -831,7 +831,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) prof_malloc(ret, usize, cnt); @@ -959,7 +959,7 @@ je_aligned_alloc(size_t alignment, size_t size) if ((err = imemalign(&ret, alignment, size, 1)) != 0) { ret = NULL; - errno = err; + set_errno(err); } JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), false); @@ -1029,7 +1029,7 @@ label_return: "memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } if (config_prof && opt_prof && ret != NULL) @@ -1130,7 +1130,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ @@ -1172,7 +1172,7 @@ label_oom: "out of memory\n"); abort(); } - errno = ENOMEM; + set_errno(ENOMEM); } } diff --git a/src/util.c b/src/util.c index 2042329c..64d53dd9 100644 --- a/src/util.c +++ b/src/util.c @@ -65,7 +65,7 @@ void (*je_malloc_message)(void *, const char *s) * provide a wrapper. */ int -buferror(int errnum, char *buf, size_t buflen) +buferror(char *buf, size_t buflen) { #ifdef _WIN32 @@ -93,7 +93,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) const char *p, *ns; if (base < 0 || base == 1 || base > 36) { - errno = EINVAL; + set_errno(EINVAL); return (UINTMAX_MAX); } b = base; @@ -168,7 +168,7 @@ malloc_strtoumax(const char *nptr, char **endptr, int base) ret += digit; if (ret < pret) { /* Overflow. */ - errno = ERANGE; + set_errno(ERANGE); return (UINTMAX_MAX); } p++; @@ -416,9 +416,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uwidth; - errno = 0; + set_errno(0); uwidth = malloc_strtoumax(f, (char **)&f, 10); - assert(uwidth != UINTMAX_MAX || errno != + assert(uwidth != UINTMAX_MAX || get_errno() != ERANGE); width = (int)uwidth; if (*f == '.') { @@ -442,9 +442,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { uintmax_t uprec; - errno = 0; + set_errno(0); uprec = malloc_strtoumax(f, (char **)&f, 10); - assert(uprec != UINTMAX_MAX || errno != ERANGE); + assert(uprec != UINTMAX_MAX || get_errno() != + ERANGE); prec = (int)uprec; break; } diff --git a/test/aligned_alloc.c b/test/aligned_alloc.c index 81caa0ad..5a9b0cae 100644 --- a/test/aligned_alloc.c +++ b/test/aligned_alloc.c @@ -17,18 +17,18 @@ main(void) /* Test error conditions. */ alignment = 0; - errno = 0; + set_errno(0); p = aligned_alloc(alignment, 1); - if (p != NULL || errno != EINVAL) { + if (p != NULL || get_errno() != EINVAL) { malloc_printf( "Expected error for invalid alignment %zu\n", alignment); } for (alignment = sizeof(size_t); alignment < MAXALIGN; alignment <<= 1) { - errno = 0; + set_errno(0); p = aligned_alloc(alignment + 1, 1); - if (p != NULL || errno != EINVAL) { + if (p != NULL || get_errno() != EINVAL) { malloc_printf( "Expected error for invalid alignment %zu\n", alignment + 1); @@ -42,9 +42,9 @@ main(void) alignment = 0x80000000LU; size = 0x80000000LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); @@ -57,9 +57,9 @@ main(void) alignment = 0x40000000LU; size = 0x84000001LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(%zu, %zu)\n", alignment, size); @@ -71,9 +71,9 @@ main(void) #else size = 0xfffffff0LU; #endif - errno = 0; + set_errno(0); p = aligned_alloc(alignment, size); - if (p != NULL || errno != ENOMEM) { + if (p != NULL || get_errno() != ENOMEM) { malloc_printf( "Expected error for aligned_alloc(&p, %zu, %zu)\n", alignment, size); @@ -93,9 +93,12 @@ main(void) for (i = 0; i < NITER; i++) { ps[i] = aligned_alloc(alignment, size); if (ps[i] == NULL) { + char buf[BUFERROR_BUF]; + + buferror(buf, sizeof(buf)); malloc_printf( "Error for size %zu (%#zx): %s\n", - size, size, strerror(errno)); + size, size, buf); exit(1); } total += malloc_usable_size(ps[i]); From 7cdea3973cab8640d1f44c7638ed5e30ed18be24 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:27 +0200 Subject: [PATCH 178/205] Few configure.ac adjustments - Use the extensions autoconf finds for object and executable files. - Remove the sorev variable, and replace SOREV definition with sorev's. - Default to je_ prefix on win32. --- bin/jemalloc.sh.in | 2 +- configure.ac | 18 ++++++------------ include/jemalloc/internal/util.h | 4 ++-- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/bin/jemalloc.sh.in b/bin/jemalloc.sh.in index 58683f5d..cdf36737 100644 --- a/bin/jemalloc.sh.in +++ b/bin/jemalloc.sh.in @@ -4,6 +4,6 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ -@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@sorev@ +@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@ export @LD_PRELOAD_VAR@ exec "$@" diff --git a/configure.ac b/configure.ac index 98211c8c..6e74238f 100644 --- a/configure.ac +++ b/configure.ac @@ -195,14 +195,13 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" -o="o" +o="$ac_objext" a="a" -exe= +exe="$ac_exeext" libprefix="lib" DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' -SOREV='$(SO).$(REV)' -sorev="${so}.${rev}" +SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' dnl Heap profiling uses the log(3) function. @@ -226,8 +225,7 @@ case "${host}" in so="dylib" force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' - SOREV='$(REV).$(SO)' - sorev="${rev}.${so}" + SOREV="${rev}.${so}" ;; *-*-freebsd*) CFLAGS="$CFLAGS" @@ -281,12 +279,9 @@ case "${host}" in RPATH="" so="dll" DSO_LDFLAGS="-shared" - o="obj" a="lib" libprefix="" - exe=".exe" - SOREV='$(SO)' - sorev="${so}" + SOREV="${so}" PIC_CFLAGS="" ;; *) @@ -304,7 +299,6 @@ AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) AC_SUBST([SOREV]) -AC_SUBST([sorev]) AC_SUBST([PIC_CFLAGS]) JE_COMPILABLE([__attribute__ syntax], @@ -419,7 +413,7 @@ dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], [JEMALLOC_PREFIX="$with_jemalloc_prefix"], - [if test "x$abi" != "xmacho" ; then + [if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then JEMALLOC_PREFIX="" else JEMALLOC_PREFIX="je_" diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 9661c7b1..d84c7a98 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -161,9 +161,9 @@ get_errno(void) { #ifdef _WIN32 - return GetLastError(); + return (GetLastError()); #else - return errno; + return (errno); #endif } #endif From da99e31105eb709ef4ec8a120b115c32a6b9723a Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:29 +0200 Subject: [PATCH 179/205] Replace JEMALLOC_ATTR with various different macros when it makes sense Theses newly added macros will be used to implement the equivalent under MSVC. Also, move the definitions to headers, where they make more sense, and for some, are even more useful there (e.g. malloc). --- include/jemalloc/internal/util.h | 2 -- include/jemalloc/jemalloc.h.in | 49 ++++++++++++++------------ include/jemalloc/jemalloc_defs.h.in | 14 +++++--- src/arena.c | 2 +- src/jemalloc.c | 53 +++++------------------------ src/mutex.c | 5 ++- src/tsd.c | 6 ++-- src/util.c | 7 ++-- 8 files changed, 56 insertions(+), 82 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index d84c7a98..fa88bf3f 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -82,8 +82,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern void (*je_malloc_message)(void *wcbopaque, const char *s); - int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index f0581dbd..47a4b9b7 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -36,35 +36,40 @@ extern "C" { * namespace management, and should be omitted in application code unless * JEMALLOC_NO_DEMANGLE is defined (see below). */ -extern const char *je_malloc_conf; -extern void (*je_malloc_message)(void *, const char *); +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); -void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); -void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int je_posix_memalign(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); -void *je_realloc(void *ptr, size_t size); -void je_free(void *ptr); +JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void je_free(void *ptr); -size_t je_malloc_usable_size(const void *ptr); -void je_malloc_stats_print(void (*write_cb)(void *, const char *), - void *je_cbopaque, const char *opts); -int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); -int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, +JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); #ifdef JEMALLOC_EXPERIMENTAL -int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) - JEMALLOC_ATTR(nonnull(1)); -int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, +JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_sallocm(const void *ptr, size_t *rsize, int flags) +JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); -int je_nallocm(size_t *rsize, size_t size, int flags); +JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); #endif /* diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 6e816557..126f6b76 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -104,11 +104,17 @@ /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_CATTR(s, a) __attribute__((s)) -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) #else -# define JEMALLOC_CATTR(s, a) a -# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +# define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE #endif /* Defined if sbrk() is supported. */ diff --git a/src/arena.c b/src/arena.c index 7fac3619..51c268c2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,7 +7,7 @@ ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; arena_bin_info_t arena_bin_info[NBINS]; -JEMALLOC_ATTR(aligned(CACHELINE)) +JEMALLOC_ALIGNED(CACHELINE) const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) diff --git a/src/jemalloc.c b/src/jemalloc.c index cae0098b..2f858c3c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -9,7 +9,7 @@ malloc_tsd_data(, thread_allocated, thread_allocated_t, THREAD_ALLOCATED_INITIALIZER) /* Runtime configuration options. */ -const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); +const char *je_malloc_conf; #ifdef JEMALLOC_DEBUG bool opt_abort = true; # ifdef JEMALLOC_FILL @@ -787,8 +787,6 @@ malloc_init_hard(void) * Begin malloc(3)-compatible functions. */ -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_malloc(size_t size) { @@ -938,8 +936,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_posix_memalign(void **memptr, size_t alignment, size_t size) { @@ -949,8 +945,6 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_aligned_alloc(size_t alignment, size_t size) { @@ -966,8 +960,6 @@ je_aligned_alloc(size_t alignment, size_t size) return (ret); } -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_calloc(size_t num, size_t size) { @@ -1043,7 +1035,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(visibility("default")) void * je_realloc(void *ptr, size_t size) { @@ -1191,7 +1182,6 @@ label_return: return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_free(void *ptr) { @@ -1226,8 +1216,6 @@ je_free(void *ptr) */ #ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_memalign(size_t alignment, size_t size) { @@ -1239,8 +1227,6 @@ je_memalign(size_t alignment, size_t size) #endif #ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) void * je_valloc(size_t size) { @@ -1269,17 +1255,12 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_ATTR(visibility("default")) -void (* const __free_hook)(void *ptr) = je_free; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __malloc_hook)(size_t size) = je_malloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; - -JEMALLOC_ATTR(visibility("default")) -void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; +JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) = + je_realloc; +JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = + je_memalign; #endif /* @@ -1290,7 +1271,6 @@ void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; * Begin non-standard functions. */ -JEMALLOC_ATTR(visibility("default")) size_t je_malloc_usable_size(const void *ptr) { @@ -1306,7 +1286,6 @@ je_malloc_usable_size(const void *ptr) return (ret); } -JEMALLOC_ATTR(visibility("default")) void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) @@ -1315,7 +1294,6 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, stats_print(write_cb, cbopaque, opts); } -JEMALLOC_ATTR(visibility("default")) int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1327,7 +1305,6 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, return (ctl_byname(name, oldp, oldlenp, newp, newlen)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { @@ -1338,7 +1315,6 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) return (ctl_nametomib(name, mibp, miblenp)); } -JEMALLOC_ATTR(visibility("default")) int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1374,8 +1350,6 @@ iallocm(size_t usize, size_t alignment, bool zero) return (imalloc(usize)); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) { @@ -1444,8 +1418,6 @@ label_oom: return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) { @@ -1555,8 +1527,6 @@ label_oom: return (ALLOCM_ERR_OOM); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_sallocm(const void *ptr, size_t *rsize, int flags) { @@ -1576,8 +1546,6 @@ je_sallocm(const void *ptr, size_t *rsize, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) int je_dallocm(void *ptr, int flags) { @@ -1605,7 +1573,6 @@ je_dallocm(void *ptr, int flags) return (ALLOCM_SUCCESS); } -JEMALLOC_ATTR(visibility("default")) int je_nallocm(size_t *rsize, size_t size, int flags) { @@ -1641,8 +1608,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) void jemalloc_prefork(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_prefork(void) #endif { @@ -1663,8 +1629,7 @@ _malloc_prefork(void) void jemalloc_postfork_parent(void) #else -JEMALLOC_ATTR(visibility("default")) -void +JEMALLOC_EXPORT void _malloc_postfork(void) #endif { diff --git a/src/mutex.c b/src/mutex.c index 159d82a3..37a843e6 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -48,8 +48,7 @@ pthread_create_once(void) isthreaded = true; } -JEMALLOC_ATTR(visibility("default")) -int +JEMALLOC_EXPORT int pthread_create(pthread_t *__restrict thread, const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), void *__restrict arg) @@ -72,6 +71,7 @@ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, bool malloc_mutex_init(malloc_mutex_t *mutex) { + #ifdef _WIN32 if (!InitializeCriticalSectionAndSpinCount(&mutex->lock, _CRT_SPINCOUNT)) @@ -98,7 +98,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (true); } pthread_mutexattr_destroy(&attr); - #endif return (false); } diff --git a/src/tsd.c b/src/tsd.c index d7714b02..0506c8a0 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -32,7 +32,9 @@ malloc_tsd_no_cleanup(void *arg) } #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) -JEMALLOC_ATTR(visibility("default")) +#ifndef _WIN32 +JEMALLOC_EXPORT +#endif void _malloc_thread_cleanup(void) { @@ -91,7 +93,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) return (true); } -JEMALLOC_ATTR(section(".CRT$XLY")) JEMALLOC_ATTR(used) +JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif diff --git a/src/util.c b/src/util.c index 64d53dd9..3c92ad2c 100644 --- a/src/util.c +++ b/src/util.c @@ -40,8 +40,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /******************************************************************************/ /* malloc_message() setup. */ -JEMALLOC_CATTR(visibility("hidden"), static) -void +static void wrtmessage(void *cbopaque, const char *s) { @@ -57,8 +56,8 @@ wrtmessage(void *cbopaque, const char *s) #endif } -void (*je_malloc_message)(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = + wrtmessage; /* * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so From b45c57ecaf2bf7ff8abe013b856be90fb4e365f3 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:30 +0200 Subject: [PATCH 180/205] Import msinttypes http://code.google.com/p/msinttypes/ --- include/msc_compat/inttypes.h | 305 ++++++++++++++++++++++++++++++++++ include/msc_compat/stdint.h | 247 +++++++++++++++++++++++++++ 2 files changed, 552 insertions(+) create mode 100644 include/msc_compat/inttypes.h create mode 100644 include/msc_compat/stdint.h diff --git a/include/msc_compat/inttypes.h b/include/msc_compat/inttypes.h new file mode 100644 index 00000000..4b3828a2 --- /dev/null +++ b/include/msc_compat/inttypes.h @@ -0,0 +1,305 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/include/msc_compat/stdint.h b/include/msc_compat/stdint.h new file mode 100644 index 00000000..d02608a5 --- /dev/null +++ b/include/msc_compat/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] From fd97b1dfc76647c3f90f28dc63cc987041fe20df Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 12:38:31 +0200 Subject: [PATCH 181/205] Add support for MSVC Tested with MSVC 8 32 and 64 bits. --- Makefile.in | 21 ++++++-- configure.ac | 54 +++++++++++++++++-- include/jemalloc/internal/atomic.h | 28 ++++++++++ .../jemalloc/internal/jemalloc_internal.h.in | 21 ++++++-- include/jemalloc/jemalloc_defs.h.in | 10 ++++ .../{msc_compat => msvc_compat}/inttypes.h | 0 include/msvc_compat/stdbool.h | 16 ++++++ include/{msc_compat => msvc_compat}/stdint.h | 0 include/msvc_compat/strings.h | 23 ++++++++ src/jemalloc.c | 11 +++- src/tsd.c | 8 +++ 11 files changed, 177 insertions(+), 15 deletions(-) rename include/{msc_compat => msvc_compat}/inttypes.h (100%) create mode 100644 include/msvc_compat/stdbool.h rename include/{msc_compat => msvc_compat}/stdint.h (100%) create mode 100644 include/msvc_compat/strings.h diff --git a/Makefile.in b/Makefile.in index 7685f156..94f48699 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,9 +26,11 @@ abs_objroot := @abs_objroot@ CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include CFLAGS := @CFLAGS@ LDFLAGS := @LDFLAGS@ +EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ SO := @so@ +IMPORTLIB := @importlib@ O := @o@ A := @a@ EXE := @exe@ @@ -49,6 +51,9 @@ enable_experimental := @enable_experimental@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ +CTARGET = @CTARGET@ +LDTARGET = @LDTARGET@ +MKLIB = @MKLIB@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -77,9 +82,13 @@ CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \ ifeq (macho, $(ABI)) CSRCS += $(srcroot)src/zone.c endif +ifeq ($(IMPORTLIB),$(SO)) STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A) +endif ifdef PIC_CFLAGS STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) +else +STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A) endif DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) @@ -136,10 +145,13 @@ $(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c $(CPICOBJS): CFLAGS += $(PIC_CFLAGS) $(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(CTESTOBJS): CPPFLAGS += -I$(objroot)test +ifneq ($(IMPORTLIB),$(SO)) +$(COBJS): CPPFLAGS += -DDLLEXPORT +endif $(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) - $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< + $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< ifneq ($(SOREV),$(SO)) @@ -150,20 +162,21 @@ endif $(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(CPICOBJS),$(COBJS)) @mkdir -p $(@D) - $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) -o $@ $+ $(LDFLAGS) $(LIBS) + $(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS) $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CPICOBJS) $(objroot)lib/$(LIBJEMALLOC).$(A) : $(COBJS) +$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS) $(STATIC_LIBS): @mkdir -p $(@D) - ar crus $@ $+ + $(MKLIB) $+ $(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O) $(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS) @mkdir -p $(@D) - $(CC) -o $@ $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) -L$(objroot)lib -ljemalloc$(install_suffix) $(filter -lpthread,$(LIBS)) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) diff --git a/configure.ac b/configure.ac index 6e74238f..50eaae65 100644 --- a/configure.ac +++ b/configure.ac @@ -111,6 +111,19 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, dnl just prevent autoconf from molesting CFLAGS. CFLAGS=$CFLAGS AC_PROG_CC +if test "x$GCC" != "xyes" ; then + AC_CACHE_CHECK([whether compiler is MSVC], + [je_cv_msvc], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], + [ +#ifndef _MSC_VER + int fail[-1]; +#endif +])], + [je_cv_msvc=yes], + [je_cv_msvc=no])]) +fi + if test "x$CFLAGS" = "x" ; then no_CFLAGS="yes" if test "x$GCC" = "xyes" ; then @@ -118,6 +131,12 @@ if test "x$CFLAGS" = "x" ; then JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) + elif test "x$je_cv_msvc" = "xyes" ; then + CC="$CC -nologo" + JE_CFLAGS_APPEND([-Zi]) + JE_CFLAGS_APPEND([-MT]) + JE_CFLAGS_APPEND([-W3]) + CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat" fi fi dnl Append EXTRA_CFLAGS to CFLAGS, if defined. @@ -195,6 +214,7 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) LD_PRELOAD_VAR="LD_PRELOAD" so="so" +importlib="${so}" o="$ac_objext" a="a" exe="$ac_exeext" @@ -203,9 +223,10 @@ DSO_LDFLAGS='-shared -Wl,-soname,$(@F)' RPATH='-Wl,-rpath,$(1)' SOREV="${so}.${rev}" PIC_CFLAGS='-fPIC -DPIC' - -dnl Heap profiling uses the log(3) function. -LIBS="$LIBS -lm" +CTARGET='-o $@' +LDTARGET='-o $@' +EXTRA_LDFLAGS= +MKLIB='ar crus $@' dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -223,6 +244,7 @@ case "${host}" in RPATH="" LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES" so="dylib" + importlib="${so}" force_tls="0" DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)' SOREV="${rev}.${so}" @@ -278,7 +300,17 @@ case "${host}" in force_tls="0" RPATH="" so="dll" - DSO_LDFLAGS="-shared" + if test "x$je_cv_msvc" = "xyes" ; then + importlib="lib" + DSO_LDFLAGS="-LD" + EXTRA_LDFLAGS="-link -DEBUG" + CTARGET='-Fo$@' + LDTARGET='-Fe$@' + MKLIB='lib -nologo -out:$@' + else + importlib="${so}" + DSO_LDFLAGS="-shared" + fi a="lib" libprefix="" SOREV="${so}" @@ -293,13 +325,23 @@ AC_SUBST([abi]) AC_SUBST([RPATH]) AC_SUBST([LD_PRELOAD_VAR]) AC_SUBST([so]) +AC_SUBST([importlib]) AC_SUBST([o]) AC_SUBST([a]) AC_SUBST([exe]) AC_SUBST([libprefix]) AC_SUBST([DSO_LDFLAGS]) +AC_SUBST([EXTRA_LDFLAGS]) AC_SUBST([SOREV]) AC_SUBST([PIC_CFLAGS]) +AC_SUBST([CTARGET]) +AC_SUBST([LDTARGET]) +AC_SUBST([MKLIB]) + +if test "x$abi" != "xpecoff"; then + dnl Heap profiling uses the log(3) function. + LIBS="$LIBS -lm" +fi JE_COMPILABLE([__attribute__ syntax], [static __attribute__((unused)) void foo(void){}], @@ -530,6 +572,8 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then if test "x$GCC" = "xyes" ; then JE_CFLAGS_APPEND([-O3]) JE_CFLAGS_APPEND([-funroll-loops]) + elif test "x$je_cv_msvc" = "xyes" ; then + JE_CFLAGS_APPEND([-O2]) else JE_CFLAGS_APPEND([-O]) fi @@ -833,11 +877,11 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT], [je_cv_static_page_shift], AC_RUN_IFELSE([AC_LANG_PROGRAM( [[ +#include #ifdef _WIN32 #include #else #include -#include #endif #include ]], diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 016c472a..11a7b47f 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -47,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -145,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d4c4b4cb..18861476 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,6 +1,5 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include #include #ifdef _WIN32 # include @@ -13,6 +12,7 @@ # undef ERANGE # define ERANGE ERROR_INVALID_DATA #else +# include # include # include # if !defined(SYS_write) && defined(__NR_write) @@ -41,7 +41,17 @@ #include #include #include -#include +#ifdef _MSC_VER +# include +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include +#endif #include #define JEMALLOC_NO_DEMANGLE @@ -221,6 +231,9 @@ static const bool config_ivsalloc = #else # define JEMALLOC_ENABLE_INLINE # define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif #endif /* Smallest size class to support. */ @@ -232,7 +245,7 @@ static const bool config_ivsalloc = * classes). */ #ifndef LG_QUANTUM -# ifdef __i386__ +# if (defined(__i386__) || defined(_M_IX86)) # define LG_QUANTUM 4 # endif # ifdef __ia64__ @@ -244,7 +257,7 @@ static const bool config_ivsalloc = # ifdef __sparc64__ # define LG_QUANTUM 4 # endif -# if (defined(__amd64__) || defined(__x86_64__)) +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) # define LG_QUANTUM 4 # endif # ifdef __arm__ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 126f6b76..e61597c0 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -109,6 +109,16 @@ # define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) # define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) # define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +#ifdef DLLEXPORT +# define EXPORT __declspec(dllexport) +#else +# define EXPORT __declspec(dllimport) +#endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) #else # define JEMALLOC_ATTR(s) # define JEMALLOC_EXPORT diff --git a/include/msc_compat/inttypes.h b/include/msvc_compat/inttypes.h similarity index 100% rename from include/msc_compat/inttypes.h rename to include/msvc_compat/inttypes.h diff --git a/include/msvc_compat/stdbool.h b/include/msvc_compat/stdbool.h new file mode 100644 index 00000000..da9ee8b8 --- /dev/null +++ b/include/msvc_compat/stdbool.h @@ -0,0 +1,16 @@ +#ifndef stdbool_h +#define stdbool_h + +#include + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +typedef BOOL _Bool; + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/include/msc_compat/stdint.h b/include/msvc_compat/stdint.h similarity index 100% rename from include/msc_compat/stdint.h rename to include/msvc_compat/stdint.h diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h new file mode 100644 index 00000000..c84975b6 --- /dev/null +++ b/include/msvc_compat/strings.h @@ -0,0 +1,23 @@ +#ifndef strings_h +#define strings_h + +/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided + * for both */ +#include +#pragma intrinsic(_BitScanForward) +static __forceinline int ffsl(long x) +{ + unsigned long i; + + if (_BitScanForward(&i, x)) + return (i + 1); + return (0); +} + +static __forceinline int ffs(int x) +{ + + return (ffsl(x)); +} + +#endif diff --git a/src/jemalloc.c b/src/jemalloc.c index 2f858c3c..d42e91db 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -56,12 +56,19 @@ static bool malloc_initializer = NO_INITIALIZER; static malloc_mutex_t init_lock; JEMALLOC_ATTR(constructor) -static void -init_init_lock() +static void WINAPI +_init_init_lock(void) { malloc_mutex_init(&init_lock); } + +#ifdef _MSC_VER +# pragma section(".CRT$XCU", read) +JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used) +static const void (WINAPI *init_init_lock)(void) = _init_init_lock; +#endif + #else static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #endif diff --git a/src/tsd.c b/src/tsd.c index 0506c8a0..00d7b45d 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -93,6 +93,14 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) return (true); } +#ifdef _MSC_VER +# ifdef _M_IX86 +# pragma comment(linker, "/INCLUDE:__tls_used") +# else +# pragma comment(linker, "/INCLUDE:_tls_used") +# endif +# pragma section(".CRT$XLY",long,read) +#endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; From 203484e2ea267e068a68fd2922263f0ff1d5ac6f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 00:30:36 -0700 Subject: [PATCH 182/205] Optimize malloc() and free() fast paths. Embed the bin index for small page runs into the chunk page map, in order to omit [...] in the following dependent load sequence: ptr-->mapelm-->[run-->bin-->]bin_info Move various non-critcal code out of the inlined function chain into helper functions (tcache_event_hard(), arena_dalloc_small(), and locking). --- include/jemalloc/internal/arena.h | 353 ++++++++++--- .../jemalloc/internal/jemalloc_internal.h.in | 11 +- include/jemalloc/internal/private_namespace.h | 21 + include/jemalloc/internal/tcache.h | 71 +-- src/arena.c | 483 +++++++++--------- src/tcache.c | 59 ++- src/tsd.c | 2 +- 7 files changed, 613 insertions(+), 387 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2eb41cd9..067b75a8 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -109,7 +109,8 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : (binind+1) for size class (used only if prof_promote is true) + * n : binind for size class; large objects set these to BININD_INVALID + * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -117,35 +118,38 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a - * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a + * ssssssss ssssssss ssss1111 1111du-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx + * ssssssss ssssssss ssss1111 1111dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss1111 1111D--a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss ssss1111 1111D--a * * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp ppppnnnn nnnn---A + * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss---- ----D-LA - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----1111 1111D-LA * * Large (sampled, size <= PAGE): - * ssssssss ssssssss sssscccc ccccD-LA + * ssssssss ssssssss ssssnnnn nnnnD-LA * * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss---- ----D-LA + * ssssssss ssssssss ssss1111 1111D-LA */ size_t bits; -#define CHUNK_MAP_CLASS_SHIFT 4 -#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_BININD_SHIFT 4 +#define BININD_INVALID ((size_t)0xffU) +/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ +#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) @@ -409,8 +413,14 @@ void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); +void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -430,6 +440,30 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, size_t binind, size_t flags); +void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed); +size_t arena_ptr_binind(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); @@ -442,6 +476,203 @@ void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE arena_chunk_map_t * +arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map[pageind-map_bias]); +} + +JEMALLOC_INLINE size_t * +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_mapp_get(chunk, pageind)->bits); +} + +JEMALLOC_INLINE size_t +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (*arena_mapbitsp_get(chunk, pageind)); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> LG_PAGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + *mapbitsp = size | (*mapbitsp & PAGE_MASK); +} + +JEMALLOC_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind) +{ + size_t *mapbitsp; + + assert(binind <= BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << + CHUNK_MAP_BININD_SHIFT); +} + +JEMALLOC_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + size_t binind, size_t flags) +{ + size_t *mapbitsp; + + assert(binind < BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(pageind - runind >= map_bias); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | + flags | CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; +} + +JEMALLOC_INLINE size_t +arena_ptr_binind(const void *ptr, size_t mapbits) +{ + size_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena_t *arena = chunk->arena; + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t actual_mapbits = arena_mapbits_get(chunk, pageind); + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (actual_mapbits >> LG_PAGE)) << + LG_PAGE)); arena_bin_t *bin = run->bin; + size_t actual_binind = bin - arena->bins; + arena_bin_info_t *bin_info = &arena_bin_info[actual_binind]; + + assert(mapbits == actual_mapbits); + assert(binind == actual_binind); + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { @@ -535,7 +766,7 @@ arena_prof_ctx_get(const void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) @@ -544,7 +775,7 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t binind = arena_ptr_binind(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -554,7 +785,7 @@ arena_prof_ctx_get(const void *ptr) sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind-map_bias].prof_ctx; + ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } @@ -571,19 +802,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - arena_bin_t *bin = run->bin; size_t binind; arena_bin_info_t *bin_info; unsigned regind; - binind = arena_bin_index(chunk->arena, bin); + binind = arena_ptr_binind(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -592,7 +822,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind-map_bias].prof_ctx = ctx; + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; } JEMALLOC_INLINE void * @@ -638,26 +868,24 @@ arena_salloc(const void *ptr, bool demote) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + mapbits = arena_mapbits_get(chunk, pageind); if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - ret = bin_info->reg_size; + size_t binind = arena_ptr_binind(ptr, mapbits); + ret = arena_bin_info[binind].reg_size; } else { assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; + ret = arena_mapbits_large_size_get(chunk, pageind); if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; + (mapbits & CHUNK_MAP_BININD_MASK) != + CHUNK_MAP_BININD_MASK) { + size_t binind = ((mapbits & CHUNK_MAP_BININD_MASK) >> + CHUNK_MAP_BININD_SHIFT); assert(binind < NBINS); ret = arena_bin_info[binind].reg_size; + } else { + assert(demote == false || (mapbits & + CHUNK_MAP_BININD_MASK) == CHUNK_MAP_BININD_MASK); } assert(ret != 0); } @@ -668,8 +896,7 @@ arena_salloc(const void *ptr, bool demote) JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { - size_t pageind; - arena_chunk_map_t *mapelm; + size_t pageind, mapbits; tcache_t *tcache; assert(arena != NULL); @@ -678,47 +905,31 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) assert(CHUNK_ADDR2BASE(ptr) != ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) - tcache_dalloc_small(tcache, ptr); - else { - arena_run_t *run; - arena_bin_t *bin; + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind; - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - bin = run->bin; - if (config_debug) { - size_t binind = arena_bin_index(arena, bin); - UNUSED arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_interval == 0); - } - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); - } + binind = arena_ptr_binind(ptr, mapbits); + assert(binind < NBINS); + tcache_dalloc_small(tcache, ptr, binind); + } else + arena_dalloc_small(arena, chunk, ptr, pageind); } else { - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); assert(((uintptr_t)ptr & PAGE_MASK) == 0); if (try_tcache && size <= tcache_maxclass && (tcache = tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); - } else { - malloc_mutex_lock(&arena->lock); + } else arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } } } +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 18861476..ccecfaa1 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -685,8 +685,17 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" +/* + * Include arena.h twice in order to resolve circular dependencies with + * tcache.h. + */ +#define JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index c467153a..6ac07631 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -7,11 +7,30 @@ #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) @@ -22,6 +41,7 @@ #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_ptr_binind JEMALLOC_N(arena_ptr_binind) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) @@ -296,6 +316,7 @@ #define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) #define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index cfb17c28..38d735c8 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -101,6 +101,7 @@ extern size_t nhbins; extern size_t tcache_maxclass; size_t tcache_salloc(const void *ptr); +void tcache_event_hard(tcache_t *tcache); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, @@ -132,7 +133,7 @@ void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif @@ -266,47 +267,8 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) { - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low - * water mark. - */ - if (binind < NBINS) { - tcache_bin_flush_small(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } else { - tcache_bin_flush_large(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2), tcache); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; - } + if (tcache->ev_cnt == TCACHE_GC_INCR) + tcache_event_hard(tcache); } JEMALLOC_INLINE void * @@ -390,13 +352,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { - if (config_prof) { + if (config_prof && prof_promote && size == PAGE) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> LG_PAGE); - chunk->map[pageind-map_bias].bits &= - ~CHUNK_MAP_CLASS_MASK; + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); } if (zero == false) { if (config_fill) { @@ -421,30 +383,13 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr) +tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { - arena_t *arena; - arena_chunk_t *chunk; - arena_run_t *run; - arena_bin_t *bin; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - size_t pageind, binind; - arena_chunk_map_t *mapelm; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapelm = &chunk->map[pageind-map_bias]; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; - binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / - sizeof(arena_bin_t); - assert(binind < NBINS); - if (config_fill && opt_junk) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); diff --git a/src/arena.c b/src/arena.c index 51c268c2..3ef4b8fb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -41,11 +41,11 @@ const uint8_t small_size2bin[] = { /* Function prototypes for non-inline static functions. */ static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, bool zero); + bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - bool zero); + size_t binind, bool zero); static void arena_purge(arena_t *arena, bool all); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, @@ -152,7 +152,9 @@ static inline void arena_run_reg_dalloc(arena_run_t *run, void *ptr) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + size_t binind = arena_ptr_binind(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + @@ -184,28 +186,31 @@ arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - bool zero) + size_t binind, bool zero) { arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; arena_avail_tree_t *runs_avail; + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); - flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; + flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> + total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; - assert((chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == + flag_dirty); need_pages = (size >> LG_PAGE); assert(need_pages > 0); assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -222,22 +227,23 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { if (flag_dirty != 0) { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + CHUNK_MAP_DIRTY); } else { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+need_pages-map_bias].bits & - CHUNK_MAP_UNZEROED); - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << LG_PAGE) | - (chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind+need_pages, + (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages)); + arena_mapbits_unallocated_set(chunk, + run_ind+total_pages-1, (rem_pages << LG_PAGE), + arena_mapbits_unzeroed_get(chunk, + run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages-map_bias]); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + run_ind+need_pages)); } /* Update dirty page accounting. */ @@ -258,8 +264,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * zeroed (i.e. never before touched). */ for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind+i-map_bias].bits - & CHUNK_MAP_UNZEROED) != 0) { + if (arena_mapbits_unzeroed_get(chunk, + run_ind+i) != 0) { VALGRIND_MAKE_MEM_UNDEFINED( (void *)((uintptr_t) chunk + ((run_ind+i) << @@ -293,10 +299,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * Set the last element first, in case the run only contains one * page (i.e. both statements set the same element). */ - chunk->map[run_ind+need_pages-1-map_bias].bits = - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind-map_bias].bits = size | flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, + flag_dirty); + arena_mapbits_large_set(chunk, run_ind, size, flag_dirty); } else { assert(zero == false); /* @@ -304,34 +309,30 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * small run, so that arena_dalloc_bin_run() has the ability to * conditionally trim clean pages. */ - chunk->map[run_ind-map_bias].bits = - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind, 0, binind, + arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty); /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not * actually cause an observable failure. */ if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) - == 0) + arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) - | (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; + arena_mapbits_small_set(chunk, run_ind+i, i, + binind, arena_mapbits_unzeroed_get(chunk, + run_ind+i)); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) + arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); } - chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << LG_PAGE) | - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; + arena_mapbits_small_set(chunk, run_ind+need_pages-1, + need_pages-1, binind, arena_mapbits_unzeroed_get(chunk, + run_ind+need_pages-1) | flag_dirty); if (config_debug && flag_dirty == 0 && - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) { + arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == + 0) { arena_chunk_validate_zeroed(chunk, run_ind+need_pages-1); } @@ -351,17 +352,18 @@ arena_chunk_alloc(arena_t *arena) arena->spare = NULL; /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); - assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) - == arena_maxclass); - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == - (chunk->map[chunk_npages-1-map_bias].bits & - CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[0]); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, + chunk_npages-1) == arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, + map_bias)); } else { bool zero; size_t unzeroed; @@ -392,24 +394,27 @@ arena_chunk_alloc(arena_t *arena) * chunk. */ unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - chunk->map[0].bits = arena_maxclass | unzeroed; + arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass, + unzeroed); /* * There is no need to initialize the internal page map entries * unless the chunk is not zeroed. */ if (zero == false) { for (i = map_bias+1; i < chunk_npages-1; i++) - chunk->map[i-map_bias].bits = unzeroed; + arena_mapbits_unzeroed_set(chunk, i, unzeroed); } else if (config_debug) { - for (i = map_bias+1; i < chunk_npages-1; i++) - assert(chunk->map[i-map_bias].bits == unzeroed); + for (i = map_bias+1; i < chunk_npages-1; i++) { + assert(arena_mapbits_unzeroed_get(chunk, i) == + unzeroed); + } } - chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | - unzeroed; + arena_mapbits_unallocated_set(chunk, chunk_npages-1, + arena_maxclass, unzeroed); /* Insert the run into the runs_avail_clean tree. */ arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[0]); + arena_mapp_get(chunk, map_bias)); } return (chunk); @@ -424,11 +429,11 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) runs_avail = &arena->runs_avail_clean; else runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, &chunk->map[0]); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; @@ -449,7 +454,8 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) +arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, + bool zero) { arena_chunk_t *chunk; arena_run_t *run; @@ -457,6 +463,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) assert(size <= arena_maxclass); assert((size & PAGE_MASK) == 0); + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); /* Search the arena's chunks for the lowest best fit. */ key.bits = size | CHUNK_MAP_KEY; @@ -469,7 +477,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -481,7 +489,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -491,7 +499,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) chunk = arena_chunk_alloc(arena); if (chunk != NULL) { run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -509,7 +517,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); @@ -521,7 +529,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << LG_PAGE)); - arena_run_split(arena, run, size, large, zero); + arena_run_split(arena, run, size, large, binind, zero); return (run); } @@ -579,40 +587,38 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) * run. */ if (chunk == arena->spare) { - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); + assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); arena_chunk_alloc(arena); } /* Temporarily allocate all free dirty runs within chunk. */ for (pageind = map_bias; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind-map_bias]; - if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { + mapelm = arena_mapp_get(chunk, pageind); + if (arena_mapbits_allocated_get(chunk, pageind) == 0) { size_t npages; - npages = mapelm->bits >> LG_PAGE; + npages = arena_mapbits_unallocated_size_get(chunk, + pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); - if (mapelm->bits & CHUNK_MAP_DIRTY) { + if (arena_mapbits_dirty_get(chunk, pageind)) { size_t i; arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); - mapelm->bits = (npages << LG_PAGE) | - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind, + (npages << LG_PAGE), flag_unzeroed); /* * Update internal elements in the page map, so * that CHUNK_MAP_UNZEROED is properly set. */ for (i = 1; i < npages - 1; i++) { - chunk->map[pageind+i-map_bias].bits = - flag_unzeroed; + arena_mapbits_unzeroed_set(chunk, + pageind+i, flag_unzeroed); } if (npages > 1) { - chunk->map[ - pageind+npages-1-map_bias].bits = - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, + pageind+npages-1, 0, flag_unzeroed); } if (config_stats) { @@ -637,15 +643,17 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind += npages; } else { /* Skip allocated run. */ - if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> LG_PAGE; + if (arena_mapbits_large_get(chunk, pageind)) + pageind += arena_mapbits_large_size_get(chunk, + pageind) >> LG_PAGE; else { size_t binind; arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) chunk + (uintptr_t)(pageind << LG_PAGE)); - assert((mapelm->bits >> LG_PAGE) == 0); + assert(arena_mapbits_small_runind_get(chunk, + pageind) == 0); binind = arena_bin_index(arena, run->bin); bin_info = &arena_bin_info[binind]; pageind += bin_info->run_size >> LG_PAGE; @@ -669,7 +677,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_foreach(mapelm, &mapelms, u.ql_link) { size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> LG_PAGE; + size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> + LG_PAGE; assert(pageind + npages <= chunk_npages); assert(ndirty >= npages); @@ -806,15 +815,11 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); assert(run_ind >= map_bias); assert(run_ind < chunk_npages); - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { - size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + if (arena_mapbits_large_get(chunk, run_ind) != 0) { + size = arena_mapbits_large_size_get(chunk, run_ind); assert(size == PAGE || - (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); + arena_mapbits_large_size_get(chunk, + run_ind+(size>>LG_PAGE)-1) == 0); } else { size_t binind = arena_bin_index(arena, run->bin); arena_bin_info_t *bin_info = &arena_bin_info[binind]; @@ -837,7 +842,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) + if (arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; runs_avail = dirty ? &arena->runs_avail_dirty : @@ -845,58 +850,52 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* Mark pages as unallocated in the chunk map. */ if (dirty) { - chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - CHUNK_MAP_DIRTY; + arena_mapbits_unallocated_set(chunk, run_ind, size, + CHUNK_MAP_DIRTY); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + CHUNK_MAP_DIRTY); chunk->ndirty += run_pages; arena->ndirty += run_pages; } else { - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); + arena_mapbits_unallocated_set(chunk, run_ind, size, + arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, + arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1)); } /* Try to coalesce forward. */ if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & - ~PAGE_MASK; + arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 && + arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) { + size_t nrun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages); size_t nrun_pages = nrun_size >> LG_PAGE; /* * Remove successor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & ~PAGE_MASK) == nrun_size); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_DIRTY) == flag_dirty); + assert(arena_mapbits_unallocated_size_get(chunk, + run_ind+run_pages+nrun_pages-1) == nrun_size); + assert(arena_mapbits_dirty_get(chunk, + run_ind+run_pages+nrun_pages-1) == flag_dirty); arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages-map_bias]); + arena_mapp_get(chunk, run_ind+run_pages)); size += nrun_size; run_pages += nrun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Try to coalesce backward. */ - if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1-map_bias].bits & - ~PAGE_MASK; + if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1) + == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) { + size_t prun_size = arena_mapbits_unallocated_size_get(chunk, + run_ind-1); size_t prun_pages = prun_size >> LG_PAGE; run_ind -= prun_pages; @@ -905,31 +904,26 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * Remove predecessor from runs_avail; the coalesced run is * inserted later. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) - == prun_size); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) - == flag_dirty); - arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + prun_size); + assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); + arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, + run_ind)); size += prun_size; run_pages += prun_pages; - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_unallocated_size_set(chunk, run_ind, size); + arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1, + size); } /* Insert into runs_avail, now that coalescing is complete. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == + arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); + arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); if (dirty) { /* @@ -943,14 +937,15 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) } } - /* - * Deallocate chunk if it is now completely unused. The bit - * manipulation checks whether the first run is unallocated and extends - * to the end of the chunk. - */ - if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == - arena_maxclass) + /* Deallocate chunk if it is now completely unused. */ + if (size == arena_maxclass) { + assert(run_ind == map_bias); + assert(run_pages == (arena_maxclass >> LG_PAGE)); + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); arena_chunk_dealloc(arena, chunk); + } /* * It is okay to do dirty page processing here even if the chunk was @@ -969,7 +964,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -978,29 +973,21 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * leading run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = (oldsize - newsize) - | flag_dirty | (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_DIRTY) == flag_dirty); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_ALLOCATED) != 0); + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); } - chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty + | arena_mapbits_unzeroed_get(chunk, pageind+head_npages)); arena_run_dalloc(arena, run, false); } @@ -1011,9 +998,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, { size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; size_t head_npages = newsize >> LG_PAGE; - size_t tail_npages = (oldsize - newsize) >> LG_PAGE; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY; + size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind); assert(oldsize > newsize); @@ -1022,28 +1007,22 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trailing run as separately allocated. Set the last element of each * run first, in case of single-page runs. */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | + arena_mapbits_unzeroed_get(chunk, pageind)); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = - flag_dirty | - (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | - flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + if (config_debug) { + UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; + assert(arena_mapbits_large_size_get(chunk, + pageind+head_npages+tail_npages-1) == 0); + assert(arena_mapbits_dirty_get(chunk, + pageind+head_npages+tail_npages-1) == flag_dirty); + } + arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, + flag_dirty | arena_mapbits_unzeroed_get(chunk, + pageind+head_npages)); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1061,8 +1040,8 @@ arena_bin_runs_first(arena_bin_t *bin) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); return (run); } @@ -1075,7 +1054,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); @@ -1087,7 +1066,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); @@ -1126,7 +1105,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); + run = arena_run_alloc(arena, bin_info->run_size, false, binind, false); if (run != NULL) { bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + (uintptr_t)bin_info->bitmap_offset); @@ -1384,7 +1363,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Large allocation. */ size = PAGE_CEILING(size); malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, zero); + ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero); if (ret == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1428,7 +1407,7 @@ arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, alloc_size, true, zero); + run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero); if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); @@ -1485,8 +1464,7 @@ arena_prof_promoted(const void *ptr, size_t size) pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; binind = SMALL_SIZE2BIN(size); assert(binind < NBINS); - chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & - ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); + arena_mapbits_large_binind_set(chunk, pageind, binind); assert(isalloc(ptr, false) == PAGE); assert(isalloc(ptr, true) == size); @@ -1524,8 +1502,9 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t npages, run_ind, past; assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); + assert(arena_run_tree_search(&bin->runs, + arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)) + == NULL); binind = arena_bin_index(chunk->arena, run->bin); bin_info = &arena_bin_info[binind]; @@ -1545,18 +1524,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past - - run_ind < npages) { + if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < + npages) { /* * Trim clean pages. Convert to large run beforehand. Set the * last map element first, in case this is a one-page run. */ - chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind+npages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | - CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, + arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, + arena_mapbits_unzeroed_get(chunk, run_ind)); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ @@ -1591,7 +1568,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, } void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm) { size_t pageind; @@ -1602,9 +1579,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> LG_PAGE)) << LG_PAGE)); + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_bin_index(arena, bin); + binind = arena_ptr_binind(ptr, mapelm->bits); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1625,6 +1602,34 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } } +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm) +{ + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); + bin = run->bin; + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin_locked(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +} + +void +arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind) +{ + arena_chunk_map_t *mapelm; + + if (config_debug) { + assert(arena_ptr_binind(ptr, arena_mapbits_get(chunk, pageind)) + != BININD_INVALID); + } + mapelm = arena_mapp_get(chunk, pageind); + arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); +} void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, @@ -1673,12 +1678,12 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) { if (config_fill || config_stats) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); if (config_fill && config_stats && opt_junk) memset(ptr, 0x5a, size); @@ -1693,6 +1698,15 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) arena_run_dalloc(arena, (arena_run_t *)ptr, true); } +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large_locked(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +} + static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size) @@ -1731,16 +1745,15 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t npages = oldsize >> LG_PAGE; size_t followsize; - assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); + assert(oldsize == arena_mapbits_large_size_get(chunk, pageind)); /* Try to extend the run. */ assert(size + extra > oldsize); malloc_mutex_lock(&arena->lock); if (pageind + npages < chunk_npages && - (chunk->map[pageind+npages-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0 && (followsize = - chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - - oldsize) { + arena_mapbits_allocated_get(chunk, pageind+npages) == 0 && + (followsize = arena_mapbits_unallocated_size_get(chunk, + pageind+npages)) >= size - oldsize) { /* * The next run is available and sufficiently large. Split the * following run, then merge the first part with the existing @@ -1750,7 +1763,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t splitsize = (oldsize + followsize <= size + extra) ? followsize : size + extra - oldsize; arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << LG_PAGE)), splitsize, true, zero); + ((pageind+npages) << LG_PAGE)), splitsize, true, + BININD_INVALID, zero); size = oldsize + splitsize; npages = size >> LG_PAGE; @@ -1763,29 +1777,22 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, * arena_run_dalloc() with the dirty argument set to false * (which is when dirty flag consistency would really matter). */ - flag_dirty = (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY) | - (chunk->map[pageind+npages-1-map_bias].bits & - CHUNK_MAP_DIRTY); - chunk->map[pageind-map_bias].bits = size | flag_dirty - | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + flag_dirty = arena_mapbits_dirty_get(chunk, pageind) | + arena_mapbits_dirty_get(chunk, pageind+npages-1); + arena_mapbits_large_set(chunk, pageind, size, flag_dirty); + arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty); if (config_stats) { arena->stats.ndalloc_large++; arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].ndalloc++; - arena->stats.lstats[(oldsize >> LG_PAGE) - - 1].curruns--; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; arena->stats.allocated_large += size; arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; - arena->stats.lstats[(size >> LG_PAGE) - - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; } malloc_mutex_unlock(&arena->lock); diff --git a/src/tcache.c b/src/tcache.c index 9c4970c5..60244c45 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -24,6 +24,46 @@ size_t tcache_salloc(const void *ptr) return (arena_salloc(ptr, false)); } +void +tcache_event_hard(tcache_t *tcache) +{ + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low water mark. + */ + if (binind < NBINS) { + tcache_bin_flush_small(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } else { + tcache_bin_flush_large(tbin, binind, tbin->ncached - + tbin->low_water + (tbin->low_water >> 2), tcache); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that the + * fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div stays + * greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; +} + void * tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) { @@ -80,12 +120,13 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; arena_chunk_map_t *mapelm = - &chunk->map[pageind-map_bias]; + arena_mapp_get(chunk, pageind); if (config_fill && opt_junk) { arena_alloc_junk_small(ptr, &arena_bin_info[binind], true); } - arena_dalloc_bin(arena, chunk, ptr, mapelm); + arena_dalloc_bin_locked(arena, chunk, ptr, + mapelm); } else { /* * This object was allocated via a different @@ -158,7 +199,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk->arena == arena) - arena_dalloc_large(arena, chunk, ptr); + arena_dalloc_large_locked(arena, chunk, ptr); else { /* * This object was allocated via a different @@ -314,22 +355,14 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> LG_PAGE; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << - LG_PAGE)); - arena_bin_t *bin = run->bin; + arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind); - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, tcache, mapelm); - malloc_mutex_unlock(&bin->lock); + arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm); } else if (tcache_size <= tcache_maxclass) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; - malloc_mutex_lock(&arena->lock); arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); } else idalloc(tcache); } diff --git a/src/tsd.c b/src/tsd.c index 00d7b45d..961a5463 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -14,7 +14,7 @@ malloc_tsd_malloc(size_t size) { /* Avoid choose_arena() in order to dodge bootstrapping issues. */ - return arena_malloc(arenas[0], size, false, false); + return (arena_malloc(arenas[0], size, false, false)); } void From 8d5865eb578e99369382d90bdd1e557e5b233277 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 01:22:16 -0700 Subject: [PATCH 183/205] Make CACHELINE a raw constant. Make CACHELINE a raw constant in order to work around a __declspec(align()) limitation. Submitted by Mike Hommey. --- include/jemalloc/internal/jemalloc_internal.h.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ccecfaa1..e441285b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -307,9 +307,12 @@ static const bool config_ivsalloc = /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. */ #define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE 64 #define CACHELINE_MASK (CACHELINE - 1) /* Return the smallest cacheline multiple that is >= s. */ From 889ec59bd3ae3190fb715e64d8d15b6a1b47314a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 02:08:03 -0700 Subject: [PATCH 184/205] Make malloc_write() non-inline. Make malloc_write() non-inline, in order to resolve its dependency on je_malloc_write(). --- include/jemalloc/internal/util.h | 12 +----------- src/util.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index fa88bf3f..84796936 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -84,6 +84,7 @@ int buferror(char *buf, size_t buflen); uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +void malloc_write(const char *s); /* * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating @@ -130,17 +131,6 @@ pow2_ceil(size_t x) return (x); } -/* - * Wrapper around malloc_message() that avoids the need for - * je_malloc_message(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - je_malloc_message(NULL, s); -} - /* Sets error code */ JEMALLOC_INLINE void set_errno(int errnum) diff --git a/src/util.c b/src/util.c index 3c92ad2c..4f716957 100644 --- a/src/util.c +++ b/src/util.c @@ -59,6 +59,17 @@ wrtmessage(void *cbopaque, const char *s) JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = wrtmessage; +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +void +malloc_write(const char *s) +{ + + je_malloc_message(NULL, s); +} + /* * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so * provide a wrapper. From 9a7944f8aba8806c2806c7bb4d61b1b6df59f044 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 02:16:51 -0700 Subject: [PATCH 185/205] Update private namespace mangling. --- include/jemalloc/internal/private_namespace.h | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index 6ac07631..e599d64e 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -15,21 +15,21 @@ #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) -#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) #define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) -#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) #define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) #define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) -#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) -#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) -#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) -#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) -#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) -#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) -#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) -#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) #define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) #define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) #define arena_mapp_get JEMALLOC_N(arena_mapp_get) #define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) @@ -253,7 +253,6 @@ #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) -#define prof_lookup JEMALLOC_N(prof_lookup) #define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) @@ -317,12 +316,12 @@ #define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) #define tcache_event_hard JEMALLOC_N(tcache_event_hard) -#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) +#define tcache_initialized JEMALLOC_N(tcache_initialized) #define tcache_maxclass JEMALLOC_N(tcache_maxclass) -#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_salloc JEMALLOC_N(tcache_salloc) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) #define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) #define tcache_tls JEMALLOC_N(tcache_tls) #define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) From 1b523da21c8aded36dbe669a9e9ca78c3c96cad7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 03:02:53 -0700 Subject: [PATCH 186/205] Fix partial rename of s/EXPORT/JEMALLOC_EXPORT/g. --- include/jemalloc/jemalloc_defs.h.in | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index e61597c0..c9ab6468 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -111,11 +111,11 @@ # define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) #elif _MSC_VER # define JEMALLOC_ATTR(s) -#ifdef DLLEXPORT -# define EXPORT __declspec(dllexport) -#else -# define EXPORT __declspec(dllimport) -#endif +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_SECTION(s) __declspec(allocate(s)) # define JEMALLOC_NOINLINE __declspec(noinline) From 7bfecf412dab69e771c9bbbaa01160bb69af8ec0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 30 Apr 2012 15:15:15 +0200 Subject: [PATCH 187/205] Check for VALGRIND_RESIZEINPLACE_BLOCK support VALGRIND_RESIZEINPLACE_BLOCK was added in valgrind 3.7. Unfortunately, the __VALGRIND_MINOR__ macro still says 6 in 3.7 :( --- configure.ac | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 50eaae65..eeb2a290 100644 --- a/configure.ac +++ b/configure.ac @@ -841,10 +841,7 @@ if test "x$enable_valgrind" = "x1" ; then #include #include -#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ - && (__VALGRIND_MAJOR__ > 3 || (__VALGRIND_MAJOR__ == 3 && \ - __VALGRIND_MINOR__ >= 6)) -#else +#if !defined(VALGRIND_RESIZEINPLACE_BLOCK) # error "Incompatible Valgrind version" #endif ], [], [je_cv_valgrind]) From 80737c3323dabc45232affcaeb99ac2bad6ea647 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 16:11:03 -0700 Subject: [PATCH 188/205] Further optimize and harden arena_salloc(). Further optimize arena_salloc() to only look at the binind chunk map bits in the common case. Add more sanity checks to arena_salloc() that detect chunk map inconsistencies for large allocations (whether due to allocator bugs or application bugs). --- include/jemalloc/internal/arena.h | 98 +++++++++++++------ include/jemalloc/internal/private_namespace.h | 3 +- src/arena.c | 9 +- 3 files changed, 73 insertions(+), 37 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 067b75a8..264b5d3a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -447,6 +447,7 @@ size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); @@ -463,7 +464,7 @@ void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags); void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, size_t unzeroed); -size_t arena_ptr_binind(const void *ptr, size_t mapbits); +size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); @@ -533,6 +534,18 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) return (mapbits >> LG_PAGE); } +JEMALLOC_INLINE size_t +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + size_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + JEMALLOC_INLINE size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) { @@ -644,25 +657,37 @@ arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, } JEMALLOC_INLINE size_t -arena_ptr_binind(const void *ptr, size_t mapbits) +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { size_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; if (config_debug) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t actual_mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (actual_mapbits >> LG_PAGE)) << - LG_PAGE)); arena_bin_t *bin = run->bin; - size_t actual_binind = bin - arena->bins; - arena_bin_info_t *bin_info = &arena_bin_info[actual_binind]; + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + arena_run_t *run; + arena_bin_t *bin; + size_t actual_binind; + arena_bin_info_t *bin_info; + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + actual_binind = bin - arena->bins; assert(binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; assert(((uintptr_t)ptr - ((uintptr_t)run + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval == 0); @@ -775,7 +800,8 @@ arena_prof_ctx_get(const void *ptr) arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); - size_t binind = arena_ptr_binind(ptr, mapbits); + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; @@ -813,7 +839,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_bin_info_t *bin_info; unsigned regind; - binind = arena_ptr_binind(ptr, mapbits); + binind = arena_ptr_small_binind_get(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -861,7 +887,7 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, mapbits; + size_t pageind, binind; assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -869,25 +895,34 @@ arena_salloc(const void *ptr, bool demote) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - mapbits = arena_mapbits_get(chunk, pageind); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - size_t binind = arena_ptr_binind(ptr, mapbits); - ret = arena_bin_info[binind].reg_size; - } else { + binind = arena_mapbits_binind_get(chunk, pageind); + if (binind == BININD_INVALID || (config_prof && demote == false && + prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + /* + * Large allocation. In the common case (demote == true), and + * as this is an inline function, most callers will only end up + * looking at binind to determine that ptr is a small + * allocation. + */ assert(((uintptr_t)ptr & PAGE_MASK) == 0); ret = arena_mapbits_large_size_get(chunk, pageind); - if (config_prof && demote && prof_promote && ret == PAGE && - (mapbits & CHUNK_MAP_BININD_MASK) != - CHUNK_MAP_BININD_MASK) { - size_t binind = ((mapbits & CHUNK_MAP_BININD_MASK) >> - CHUNK_MAP_BININD_SHIFT); - assert(binind < NBINS); - ret = arena_bin_info[binind].reg_size; - } else { - assert(demote == false || (mapbits & - CHUNK_MAP_BININD_MASK) == CHUNK_MAP_BININD_MASK); - } assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(ret == PAGE || arena_mapbits_large_size_get(chunk, + pageind+(ret>>LG_PAGE)-1) == 0); + assert(binind == arena_mapbits_binind_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large object due to + * prof_promote). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) == binind); + ret = arena_bin_info[binind].reg_size; } return (ret); @@ -912,8 +947,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) if (try_tcache && (tcache = tcache_get(false)) != NULL) { size_t binind; - binind = arena_ptr_binind(ptr, mapbits); - assert(binind < NBINS); + binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else arena_dalloc_small(arena, chunk, ptr, pageind); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index e599d64e..b8166470 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -16,6 +16,7 @@ #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) #define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) #define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) #define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) #define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) @@ -41,7 +42,7 @@ #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) -#define arena_ptr_binind JEMALLOC_N(arena_ptr_binind) +#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) diff --git a/src/arena.c b/src/arena.c index 3ef4b8fb..9f24e7ca 100644 --- a/src/arena.c +++ b/src/arena.c @@ -154,7 +154,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; size_t mapbits = arena_mapbits_get(chunk, pageind); - size_t binind = arena_ptr_binind(ptr, mapbits); + size_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind = arena_run_regind(run, bin_info, ptr); bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + @@ -1581,7 +1581,7 @@ arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE)); bin = run->bin; - binind = arena_ptr_binind(ptr, mapelm->bits); + binind = arena_ptr_small_binind_get(ptr, mapelm->bits); bin_info = &arena_bin_info[binind]; if (config_fill || config_stats) size = bin_info->reg_size; @@ -1624,8 +1624,9 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm; if (config_debug) { - assert(arena_ptr_binind(ptr, arena_mapbits_get(chunk, pageind)) - != BININD_INVALID); + /* arena_ptr_small_binind_get() does extra sanity checking. */ + assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) != BININD_INVALID); } mapelm = arena_mapp_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); From 3597e91482c804592105ea078a0825fdb7c68dff Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 13:15:00 +0200 Subject: [PATCH 189/205] Allow je_malloc_message to be overridden when linking statically If an application wants to override je_malloc_message, it is better to define the symbol locally than to change its value in main(), which might be too late for various reasons. Due to je_malloc_message being initialized in util.c, statically linking jemalloc with an application defining je_malloc_message fails due to "multiple definition of" the symbol. Defining it without a value (like je_malloc_conf) makes it more easily overridable. --- src/stats.c | 22 +++++++--------------- src/util.c | 11 +++++++---- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/stats.c b/src/stats.c index 2854b309..1234e565 100644 --- a/src/stats.c +++ b/src/stats.c @@ -295,16 +295,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, abort(); } - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = je_malloc_message; - cbopaque = NULL; - } - if (opts != NULL) { unsigned i; @@ -330,7 +320,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); + malloc_cprintf(write_cb, cbopaque, + "___ Begin jemalloc statistics ___\n"); if (general) { int err; const char *cpv; @@ -375,7 +366,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " opt."#n": \"%s\"\n", cpv); \ } - write_cb(cbopaque, "Run-time option settings:\n"); + malloc_cprintf(write_cb, cbopaque, + "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) @@ -425,7 +417,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Min active:dirty page ratio per arena: %u:1\n", (1U << ssv)); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Min active:dirty page ratio per arena: N/A\n"); } if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) @@ -447,7 +439,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, " (2^%zd)\n", (((uint64_t)1U) << ssv), ssv); } else { - write_cb(cbopaque, + malloc_cprintf(write_cb, cbopaque, "Average profile dump interval: N/A\n"); } } @@ -547,5 +539,5 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } } - write_cb(cbopaque, "--- End jemalloc statistics ---\n"); + malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n"); } diff --git a/src/util.c b/src/util.c index 4f716957..9b73c3ec 100644 --- a/src/util.c +++ b/src/util.c @@ -56,8 +56,7 @@ wrtmessage(void *cbopaque, const char *s) #endif } -JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s) = - wrtmessage; +JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); /* * Wrapper around malloc_message() that avoids the need for @@ -67,7 +66,10 @@ void malloc_write(const char *s) { - je_malloc_message(NULL, s); + if (je_malloc_message != NULL) + je_malloc_message(NULL, s); + else + wrtmessage(NULL, s); } /* @@ -606,7 +608,8 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, * function, so use the default one. malloc_write() is an * inline function, so use malloc_message() directly here. */ - write_cb = je_malloc_message; + write_cb = (je_malloc_message != NULL) ? je_malloc_message : + wrtmessage; cbopaque = NULL; } From 79c4bca7d10e967ff524f17a6990e5c630116198 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:51 +0200 Subject: [PATCH 190/205] Use "hardcoded" dependencies when compiler doesn't support -MM --- Makefile.in | 14 ++++++++++++++ configure.ac | 3 +++ 2 files changed, 17 insertions(+) diff --git a/Makefile.in b/Makefile.in index 94f48699..6675b596 100644 --- a/Makefile.in +++ b/Makefile.in @@ -54,6 +54,7 @@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ MKLIB = @MKLIB@ +CC_MM = @CC_MM@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -136,9 +137,11 @@ build_doc: $(DOCS) # # Include generated dependency files. # +ifdef CC_MM -include $(COBJS:%.$(O)=%.d) -include $(CPICOBJS:%.$(O)=%.d) -include $(CTESTOBJS:%.$(O)=%.d) +endif $(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c $(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c @@ -149,10 +152,21 @@ ifneq ($(IMPORTLIB),$(SO)) $(COBJS): CPPFLAGS += -DDLLEXPORT endif +ifndef CC_MM +# Dependencies +HEADER_DIRS = $(srcroot)include/jemalloc/internal \ + $(objroot)include/jemalloc $(objroot)include/jemalloc/internal +HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) +$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS) +$(CTESTOBJS): $(objroot)test/jemalloc_test.h +endif + $(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O): @mkdir -p $(@D) $(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $< +ifdef CC_MM @$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $< +endif ifneq ($(SOREV),$(SO)) %.$(SO) : %.$(SOREV) diff --git a/configure.ac b/configure.ac index eeb2a290..3b32b884 100644 --- a/configure.ac +++ b/configure.ac @@ -227,6 +227,7 @@ CTARGET='-o $@' LDTARGET='-o $@' EXTRA_LDFLAGS= MKLIB='ar crus $@' +CC_MM=1 dnl Platform-specific settings. abi and RPATH can probably be determined dnl programmatically, but doing so is error-prone, which makes it generally @@ -307,6 +308,7 @@ case "${host}" in CTARGET='-Fo$@' LDTARGET='-Fe$@' MKLIB='lib -nologo -out:$@' + CC_MM= else importlib="${so}" DSO_LDFLAGS="-shared" @@ -337,6 +339,7 @@ AC_SUBST([PIC_CFLAGS]) AC_SUBST([CTARGET]) AC_SUBST([LDTARGET]) AC_SUBST([MKLIB]) +AC_SUBST([CC_MM]) if test "x$abi" != "xpecoff"; then dnl Heap profiling uses the log(3) function. From 1d01206bbc07a87ed8bf62d4d854ff3ff7dcae9a Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:52 +0200 Subject: [PATCH 191/205] Use "standard" printf prefixes instead of MSVC ones in inttypes.h We don't use MSVC's printf, but ours, and it doesn't support the I32 and I64 prefixes. --- include/msvc_compat/inttypes.h | 104 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 48 deletions(-) diff --git a/include/msvc_compat/inttypes.h b/include/msvc_compat/inttypes.h index 4b3828a2..a4e6b75c 100644 --- a/include/msvc_compat/inttypes.h +++ b/include/msvc_compat/inttypes.h @@ -53,6 +53,14 @@ typedef struct { #if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 +#ifdef _WIN64 +# define __PRI64_PREFIX "l" +# define __PRIPTR_PREFIX "l" +#else +# define __PRI64_PREFIX "ll" +# define __PRIPTR_PREFIX +#endif + // The fprintf macros for signed integers are: #define PRId8 "d" #define PRIi8 "i" @@ -68,25 +76,25 @@ typedef struct { #define PRIdFAST16 "hd" #define PRIiFAST16 "hi" -#define PRId32 "I32d" -#define PRIi32 "I32i" -#define PRIdLEAST32 "I32d" -#define PRIiLEAST32 "I32i" -#define PRIdFAST32 "I32d" -#define PRIiFAST32 "I32i" +#define PRId32 "d" +#define PRIi32 "i" +#define PRIdLEAST32 "d" +#define PRIiLEAST32 "i" +#define PRIdFAST32 "d" +#define PRIiFAST32 "i" -#define PRId64 "I64d" -#define PRIi64 "I64i" -#define PRIdLEAST64 "I64d" -#define PRIiLEAST64 "I64i" -#define PRIdFAST64 "I64d" -#define PRIiFAST64 "I64i" +#define PRId64 __PRI64_PREFIX "d" +#define PRIi64 __PRI64_PREFIX "i" +#define PRIdLEAST64 __PRI64_PREFIX "d" +#define PRIiLEAST64 __PRI64_PREFIX "i" +#define PRIdFAST64 __PRI64_PREFIX "d" +#define PRIiFAST64 __PRI64_PREFIX "i" -#define PRIdMAX "I64d" -#define PRIiMAX "I64i" +#define PRIdMAX __PRI64_PREFIX "d" +#define PRIiMAX __PRI64_PREFIX "i" -#define PRIdPTR "Id" -#define PRIiPTR "Ii" +#define PRIdPTR __PRIPTR_PREFIX "d" +#define PRIiPTR __PRIPTR_PREFIX "i" // The fprintf macros for unsigned integers are: #define PRIo8 "o" @@ -115,41 +123,41 @@ typedef struct { #define PRIxFAST16 "hx" #define PRIXFAST16 "hX" -#define PRIo32 "I32o" -#define PRIu32 "I32u" -#define PRIx32 "I32x" -#define PRIX32 "I32X" -#define PRIoLEAST32 "I32o" -#define PRIuLEAST32 "I32u" -#define PRIxLEAST32 "I32x" -#define PRIXLEAST32 "I32X" -#define PRIoFAST32 "I32o" -#define PRIuFAST32 "I32u" -#define PRIxFAST32 "I32x" -#define PRIXFAST32 "I32X" +#define PRIo32 "o" +#define PRIu32 "u" +#define PRIx32 "x" +#define PRIX32 "X" +#define PRIoLEAST32 "o" +#define PRIuLEAST32 "u" +#define PRIxLEAST32 "x" +#define PRIXLEAST32 "X" +#define PRIoFAST32 "o" +#define PRIuFAST32 "u" +#define PRIxFAST32 "x" +#define PRIXFAST32 "X" -#define PRIo64 "I64o" -#define PRIu64 "I64u" -#define PRIx64 "I64x" -#define PRIX64 "I64X" -#define PRIoLEAST64 "I64o" -#define PRIuLEAST64 "I64u" -#define PRIxLEAST64 "I64x" -#define PRIXLEAST64 "I64X" -#define PRIoFAST64 "I64o" -#define PRIuFAST64 "I64u" -#define PRIxFAST64 "I64x" -#define PRIXFAST64 "I64X" +#define PRIo64 __PRI64_PREFIX "o" +#define PRIu64 __PRI64_PREFIX "u" +#define PRIx64 __PRI64_PREFIX "x" +#define PRIX64 __PRI64_PREFIX "X" +#define PRIoLEAST64 __PRI64_PREFIX "o" +#define PRIuLEAST64 __PRI64_PREFIX "u" +#define PRIxLEAST64 __PRI64_PREFIX "x" +#define PRIXLEAST64 __PRI64_PREFIX "X" +#define PRIoFAST64 __PRI64_PREFIX "o" +#define PRIuFAST64 __PRI64_PREFIX "u" +#define PRIxFAST64 __PRI64_PREFIX "x" +#define PRIXFAST64 __PRI64_PREFIX "X" -#define PRIoMAX "I64o" -#define PRIuMAX "I64u" -#define PRIxMAX "I64x" -#define PRIXMAX "I64X" +#define PRIoMAX __PRI64_PREFIX "o" +#define PRIuMAX __PRI64_PREFIX "u" +#define PRIxMAX __PRI64_PREFIX "x" +#define PRIXMAX __PRI64_PREFIX "X" -#define PRIoPTR "Io" -#define PRIuPTR "Iu" -#define PRIxPTR "Ix" -#define PRIXPTR "IX" +#define PRIoPTR __PRIPTR_PREFIX "o" +#define PRIuPTR __PRIPTR_PREFIX "u" +#define PRIxPTR __PRIPTR_PREFIX "x" +#define PRIXPTR __PRIPTR_PREFIX "X" // The fscanf macros for signed integers are: #define SCNd8 "d" From c584fc75bb57ba275605c66522cbae45098d56f0 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Wed, 2 May 2012 21:30:53 +0200 Subject: [PATCH 192/205] Don't use sizeof() on a VARIABLE_ARRAY In the alloca() case, this fails to be the right size. --- src/stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stats.c b/src/stats.c index 1234e565..433b80d1 100644 --- a/src/stats.c +++ b/src/stats.c @@ -494,7 +494,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t isz; unsigned i, ninitialized; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); for (i = ninitialized = 0; i < narenas; i++) { @@ -523,7 +523,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t isz; unsigned i; - isz = sizeof(initialized); + isz = sizeof(bool) * narenas; xmallctl("arenas.initialized", initialized, &isz, NULL, 0); From 34a8cf6c4029c09e1db776b7027a9c1b31f9e5b4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 2 May 2012 20:41:42 -0700 Subject: [PATCH 193/205] Fix a base allocator deadlock. Fix a base allocator deadlock due to chunk_recycle() calling back into the base allocator. --- src/chunk.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 7ac229cb..166d1eab 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -30,19 +30,30 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void *chunk_recycle(size_t size, size_t alignment, bool base, + bool *zero); static void chunk_record(void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool *zero) +chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; extent_node_t key; size_t alloc_size, leadsize, trailsize; + if (base) { + /* + * This function may need to call base_node_{,de}alloc(), but + * the current chunk allocation request is on behalf of the + * base allocator. Avoid deadlock (and if that weren't an + * issue, potential for infinite recursion) by returning NULL. + */ + return (NULL); + } + alloc_size = size + alignment - chunksize; /* Beware size_t wrap-around. */ if (alloc_size < size) @@ -125,7 +136,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert((size & chunksize_mask) == 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, zero); + ret = chunk_recycle(size, alignment, base, zero); if (ret != NULL) goto label_return; From de6fbdb72c6e1401b36f8f2073404645bac6cd2b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 13:05:04 -0700 Subject: [PATCH 194/205] Fix chunk_alloc_mmap() bugs. Simplify chunk_alloc_mmap() to no longer attempt map extension. The extra complexity isn't warranted, because although in the success case it saves one system call as compared to immediately falling back to chunk_alloc_mmap_slow(), it also makes the failure case even more expensive. This simplification removes two bugs: - For Windows platforms, pages_unmap() wasn't being called for unaligned mappings prior to falling back to chunk_alloc_mmap_slow(). This caused permanent virtual memory leaks. - For non-Windows platforms, alignment greater than chunksize caused pages_map() to be called with size 0 when attempting map extension. This always resulted in an mmap() error, and subsequent fallback to chunk_alloc_mmap_slow(). --- src/chunk.c | 1 + src/chunk_mmap.c | 45 ++++++++++----------------------------------- 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 166d1eab..bb6189ee 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -134,6 +134,7 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(size != 0); assert((size & chunksize_mask) == 0); + assert(alignment != 0); assert((alignment & chunksize_mask) == 0); ret = chunk_recycle(size, alignment, base, zero); diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 9f388d28..c8da6556 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -16,6 +16,8 @@ pages_map(void *addr, size_t size) { void *ret; + assert(size != 0); + #ifdef _WIN32 /* * If VirtualAlloc can't allocate at the given address when one is @@ -164,51 +166,24 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero) * NetBSD has), but in the absence of such a feature, we have to work * hard to efficiently create aligned mappings. The reliable, but * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in at least one call to + * excess. However, that always results in one or two calls to * pages_unmap(). * - * A more optimistic approach is to try mapping precisely the right - * amount, then try to append another mapping if alignment is off. In - * practice, this works out well as long as the application is not - * interleaving mappings via direct mmap() calls. If we do run into a - * situation where there is an interleaved mapping and we are unable to - * extend an unaligned mapping, our best option is to switch to the - * slow method until mmap() returns another aligned mapping. This will - * tend to leave a gap in the memory map that is too small to cause - * later problems for the optimistic method. - * - * Another possible confounding factor is address space layout - * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. As such, repeatedly trying to extend unaligned - * mappings could result in an infinite loop, so if extension fails, - * immediately fall back to the reliable method of over-allocation - * followed by trimming. + * Optimistically try mapping precisely the right amount before falling + * back to the slow method, with the expectation that the optimistic + * approach works most of the time. */ + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + ret = pages_map(NULL, size); if (ret == NULL) return (NULL); - offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { -#ifdef _WIN32 + pages_unmap(ret, size); return (chunk_alloc_mmap_slow(size, alignment, zero)); -#else - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), chunksize - - offset) == NULL) { - /* - * Extension failed. Clean up, then fall back to the - * reliable-but-expensive method. - */ - pages_unmap(ret, size); - return (chunk_alloc_mmap_slow(size, alignment, zero)); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - offset)); - } -#endif } assert(ret != NULL); From 374d26a43bcceb12eb56ed7cc47815d7f933901c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 14:48:35 -0700 Subject: [PATCH 195/205] Fix chunk_recycle() to stop leaking trailing chunks. Fix chunk_recycle() to correctly compute trailsize and re-insert trailing chunks. This fixes a major virtual memory leak. Simplify chunk_record() to avoid dropping/re-acquiring chunks_mtx. --- src/chunk.c | 78 ++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index bb6189ee..6bc24544 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -68,8 +68,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - (uintptr_t)node->addr; - assert(alloc_size >= leadsize + size); - trailsize = alloc_size - leadsize - size; + assert(node->size >= leadsize + size); + trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ extent_tree_szad_remove(&chunks_szad, node); @@ -195,50 +195,48 @@ chunk_record(void *chunk, size_t size) pages_purge(chunk, size); - xnode = NULL; + /* + * Allocate a node before acquiring chunks_mtx even though it might not + * be needed, because base_node_alloc() may cause a new base chunk to + * be allocated, which could cause deadlock if chunks_mtx were already + * held. + */ + xnode = base_node_alloc(); + malloc_mutex_lock(&chunks_mtx); - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. This does + * not change the position within chunks_ad, so only + * remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + if (xnode != NULL) + base_node_dealloc(xnode); + } else { + /* Coalescing forward failed, so insert a new node. */ + if (xnode == NULL) { /* - * Coalesce chunk with the following address range. - * This does not change the position within chunks_ad, - * so only remove/insert from/into chunks_szad. - */ - extent_tree_szad_remove(&chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on chunks_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. + * base_node_alloc() failed, which is an exceedingly + * unlikely failure. Leak chunk; its pages have + * already been purged, so this is only a virtual + * memory leak. */ malloc_mutex_unlock(&chunks_mtx); - xnode = base_node_alloc(); - if (xnode == NULL) - return; - malloc_mutex_lock(&chunks_mtx); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); - break; + return; } + node = xnode; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); } - /* Discard xnode if it ended up unused due to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); /* Try to coalesce backward. */ prev = extent_tree_ad_prev(&chunks_ad, node); From 2e671ffbadc02fc7de8cbafdd1031e3b0ad73c5b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 16:12:00 -0700 Subject: [PATCH 196/205] Add the --enable-mremap option. Add the --enable-mremap option, and disable the use of mremap(2) by default, for the same reason that freeing chunks via munmap(2) is disabled by default on Linux: semi-permanent VM map fragmentation. --- INSTALL | 6 +++ configure.ac | 38 ++++++++++++++----- doc/jemalloc.xml.in | 10 +++++ .../jemalloc/internal/jemalloc_internal.h.in | 7 ++++ include/jemalloc/jemalloc_defs.h.in | 15 +++++--- src/ctl.c | 3 ++ src/huge.c | 2 +- 7 files changed, 64 insertions(+), 17 deletions(-) diff --git a/INSTALL b/INSTALL index 7e3051ac..e40a7edd 100644 --- a/INSTALL +++ b/INSTALL @@ -108,6 +108,12 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. +--enable-mremap + Enable huge realloc() via mremap(2). mremap() is disabled by default + because the flavor used is specific to Linux, which has a quirk in its + virtual memory allocation algorithm that causes semi-permanent VM map holes + under normal jemalloc operation. + --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. diff --git a/configure.ac b/configure.ac index 3b32b884..a09db7d0 100644 --- a/configure.ac +++ b/configure.ac @@ -372,16 +372,6 @@ else AC_DEFINE([JEMALLOC_TLS_MODEL], [ ]) fi -JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE -#include -], [ -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [je_cv_mremap_fixed]) -if test "x${je_cv_mremap_fixed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_MREMAP_FIXED], [ ]) -fi - dnl Support optional additions to rpath. AC_ARG_WITH([rpath], [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], @@ -743,6 +733,33 @@ if test "x$enable_tcache" = "x1" ; then fi AC_SUBST([enable_tcache]) +dnl Disable mremap() for huge realloc() by default. +AC_ARG_ENABLE([mremap], + [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])], +[if test "x$enable_mremap" = "xno" ; then + enable_mremap="0" +else + enable_mremap="1" +fi +], +[enable_mremap="0"] +) +if test "x$enable_mremap" = "x1" ; then + JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ +#define _GNU_SOURCE +#include +], [ +void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); +], [je_cv_mremap_fixed]) + if test "x${je_cv_mremap_fixed}" = "xno" ; then + enable_mremap="0" + fi +fi +if test "x$enable_mremap" = "x1" ; then + AC_DEFINE([JEMALLOC_MREMAP], [ ]) +fi +AC_SUBST([enable_mremap]) + dnl Enable VM deallocation via munmap() by default. AC_ARG_ENABLE([munmap], [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])], @@ -1261,6 +1278,7 @@ AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([mremap : ${enable_mremap}]) AC_MSG_RESULT([munmap : ${enable_munmap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 02961f6b..93c16dcf 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -650,6 +650,16 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.mremap + (bool) + r- + + was specified during + build configuration. + + config.munmap diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e441285b..268cd146 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -123,6 +123,13 @@ static const bool config_prof_libunwind = false #endif ; +static const bool config_mremap = +#ifdef JEMALLOC_MREMAP + true +#else + false +#endif + ; static const bool config_munmap = #ifdef JEMALLOC_MUNMAP true diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index c9ab6468..c469142a 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -193,12 +193,18 @@ /* * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is automatically disabled if configuration determines - * that common sequences of mmap()/munmap() calls will cause virtual memory map - * holes. + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. */ #undef JEMALLOC_MUNMAP +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +#undef JEMALLOC_MREMAP + /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS @@ -221,9 +227,6 @@ #undef JEMALLOC_ZONE #undef JEMALLOC_ZONE_VERSION -/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ -#undef JEMALLOC_MREMAP_FIXED - /* * Methods for purging unused pages differ between operating systems. * diff --git a/src/ctl.c b/src/ctl.c index dddf3bee..55e76677 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -76,6 +76,7 @@ CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_mremap) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -207,6 +208,7 @@ static const ctl_named_node_t config_node[] = { {NAME("dss"), CTL(config_dss)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("mremap"), CTL(config_mremap)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -1118,6 +1120,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_mremap) CTL_RO_BOOL_CONFIG_GEN(config_munmap) CTL_RO_BOOL_CONFIG_GEN(config_prof) CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) diff --git a/src/huge.c b/src/huge.c index 67b282d1..8a4ec942 100644 --- a/src/huge.c +++ b/src/huge.c @@ -140,11 +140,11 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; +#ifdef JEMALLOC_MREMAP /* * Use mremap(2) if this is a huge-->huge reallocation, and neither the * source nor the destination are in dss. */ -#ifdef JEMALLOC_MREMAP_FIXED if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false))) { size_t newsize = huge_salloc(ret); From a6770a70493bc25495d17bc8b4a0246b3877918b Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 3 May 2012 14:12:49 +0200 Subject: [PATCH 197/205] Remove -fno-common compiler flag for OS X. It doesn't allow the je_malloc_message and je_malloc_conf symbols to be overridden when linking statically. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index a09db7d0..a72019e5 100644 --- a/configure.ac +++ b/configure.ac @@ -239,7 +239,7 @@ dnl to make happen otherwise. default_munmap="1" case "${host}" in *-*-darwin*) - CFLAGS="$CFLAGS -fno-common" + CFLAGS="$CFLAGS" abi="macho" AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ]) RPATH="" From 37b6f95dcd866f51c91488531a2efc3ed4c2b754 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Mon, 7 May 2012 16:08:34 +0200 Subject: [PATCH 198/205] Export je_memalign and je_valloc da99e31 removed attributes on je_memalign and je_valloc, while they didn't have a definition in the jemalloc.h header, thus making them non-exported. Export them again, by defining them in the jemalloc.h header. --- include/jemalloc/jemalloc.h.in | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in index 47a4b9b7..ad069485 100644 --- a/include/jemalloc/jemalloc.h.in +++ b/include/jemalloc/jemalloc.h.in @@ -50,6 +50,15 @@ JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); JEMALLOC_EXPORT void je_free(void *ptr); +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *je_cbopaque, const char *opts); From 80fe0478e6b0b3a0c84ea1dfdeec1fc5685841dc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 9 May 2012 23:08:48 -0700 Subject: [PATCH 199/205] Generalize "stats.mapped" documentation. Generalize "stats.mapped" documentation to state that all inactive chunks are omitted, now that it is possible for mmap'ed chunks to be omitted in addition to DSS chunks. --- doc/jemalloc.xml.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 93c16dcf..877c500f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1471,7 +1471,7 @@ malloc_conf = "xmalloc:true";]]> application. This is a multiple of the chunk size, and is at least as large as stats.active. This - does not include inactive chunks embedded in the DSS. + does not include inactive chunks. @@ -1482,7 +1482,7 @@ malloc_conf = "xmalloc:true";]]> [] Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks embedded in the DSS. + application. This does not include inactive chunks. From 53bd42c1fe35c25ea299b96d546a9d0089c6f78d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 00:18:46 -0700 Subject: [PATCH 200/205] Update a comment. --- include/jemalloc/internal/arena.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 264b5d3a..9ce08e5c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -93,13 +93,13 @@ struct arena_chunk_map_s { * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dula + * ???????? ???????? ????nnnn nnnndula * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. * Small: Run page offset. * Large: Run size for first page, unset for trailing pages. - * - : Unused. + * n : binind for small size class, BININD_INVALID for large size class. * d : dirty? * u : unzeroed? * l : large? @@ -118,14 +118,14 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss1111 1111du-a + * ssssssss ssssssss ssss++++ ++++du-a * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx - * ssssssss ssssssss ssss1111 1111dU-a + * ssssssss ssssssss ssss++++ ++++dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss1111 1111D--a + * ssssssss ssssssss ssss++++ ++++D--a * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss ssss1111 1111D--a + * ssssssss ssssssss ssss++++ ++++D--a * * Small: * pppppppp pppppppp ppppnnnn nnnnd--A @@ -133,15 +133,15 @@ struct arena_chunk_map_s { * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss1111 1111D-LA + * ssssssss ssssssss ssss++++ ++++D-LA * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ----1111 1111D-LA + * -------- -------- ----++++ ++++D-LA * * Large (sampled, size <= PAGE): * ssssssss ssssssss ssssnnnn nnnnD-LA * * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss1111 1111D-LA + * ssssssss ssssssss ssss++++ ++++D-LA */ size_t bits; #define CHUNK_MAP_BININD_SHIFT 4 From 5b0c99649fa71674daadf4dd53b1ab05428483fb Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 15:47:24 -0700 Subject: [PATCH 201/205] Refactor arena_run_alloc(). Refactor duplicated arena_run_alloc() code into arena_run_alloc_helper(). --- src/arena.c | 58 ++++++++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9f24e7ca..94410aab 100644 --- a/src/arena.c +++ b/src/arena.c @@ -44,6 +44,8 @@ static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); +static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, + bool large, size_t binind, bool zero); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, bool zero); static void arena_purge(arena_t *arena, bool all); @@ -454,19 +456,12 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) } static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, +arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, bool zero) { - arena_chunk_t *chunk; arena_run_t *run; arena_chunk_map_t *mapelm, key; - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); - assert((large && binind == BININD_INVALID) || (large == false && binind - != BININD_INVALID)); - - /* Search the arena's chunks for the lowest best fit. */ key.bits = size | CHUNK_MAP_KEY; mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); if (mapelm != NULL) { @@ -493,6 +488,26 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, return (run); } + return (NULL); +} + +static arena_run_t * +arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, + bool zero) +{ + arena_chunk_t *chunk; + arena_run_t *run; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + assert((large && binind == BININD_INVALID) || (large == false && binind + != BININD_INVALID)); + + /* Search the arena's chunks for the lowest best fit. */ + run = arena_run_alloc_helper(arena, size, large, binind, zero); + if (run != NULL) + return (run); + /* * No usable runs. Create a new chunk from which to allocate the run. */ @@ -508,32 +523,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, * sufficient memory available while this one dropped arena->lock in * arena_chunk_alloc(), so search one more time. */ - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - - return (NULL); + return (arena_run_alloc_helper(arena, size, large, binind, zero)); } static inline void From 30fe12b866edbc2cf9aaef299063b392ea125aac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 17:09:17 -0700 Subject: [PATCH 202/205] Add arena chunk map assertions. --- src/arena.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/arena.c b/src/arena.c index 94410aab..021a0e2d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -353,17 +353,20 @@ arena_chunk_alloc(arena_t *arena) chunk = arena->spare; arena->spare = NULL; - /* Insert the run into the appropriate runs_avail_* tree. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == arena_maxclass); assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == arena_maxclass); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); + + /* Insert the run into the appropriate runs_avail_* tree. */ + if (arena_mapbits_dirty_get(chunk, map_bias) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, map_bias)); } else { @@ -427,6 +430,15 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { arena_avail_tree_t *runs_avail; + assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); + assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); + assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == + arena_maxclass); + assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) == + arena_maxclass); + assert(arena_mapbits_dirty_get(chunk, map_bias) == + arena_mapbits_dirty_get(chunk, chunk_npages-1)); + /* * Remove run from the appropriate runs_avail_* tree, so that the arena * does not use it. @@ -578,6 +590,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) */ if (chunk == arena->spare) { assert(arena_mapbits_dirty_get(chunk, map_bias) != 0); + assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0); + arena_chunk_alloc(arena); } @@ -590,7 +604,9 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) npages = arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE; assert(pageind + npages <= chunk_npages); - if (arena_mapbits_dirty_get(chunk, pageind)) { + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+npages-1)); + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { size_t i; arena_avail_tree_remove( @@ -832,6 +848,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) * The run is dirty if the caller claims to have dirtied it, as well as * if it was already dirty before being allocated. */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); if (arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; @@ -931,9 +949,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) if (size == arena_maxclass) { assert(run_ind == map_bias); assert(run_pages == (arena_maxclass >> LG_PAGE)); - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); - assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == - arena_maxclass); arena_chunk_dealloc(arena, chunk); } @@ -1514,16 +1529,16 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * trim the clean pages before deallocating the dirty portion of the * run. */ + assert(arena_mapbits_dirty_get(chunk, run_ind) == + arena_mapbits_dirty_get(chunk, run_ind+npages-1)); if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind < npages) { - /* - * Trim clean pages. Convert to large run beforehand. Set the - * last map element first, in case this is a one-page run. - */ - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, - arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + /* Trim clean pages. Convert to large run beforehand. */ + assert(npages > 0); arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, arena_mapbits_unzeroed_get(chunk, run_ind)); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, + arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ From d8ceef6c5558fdab8f9448376ae065a9e5ffcbdd Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 10 May 2012 20:59:39 -0700 Subject: [PATCH 203/205] Fix large calloc() zeroing bugs. Refactor code such that arena_mapbits_{large,small}_set() always preserves the unzeroed flag, and manually manipulate the unzeroed flag in the one case where it actually gets reset (in arena_chunk_purge()). This fixes unzeroed preservation bugs in arena_run_split() and arena_ralloc_large_grow(). These bugs caused large calloc() to return non-zeroed memory under some circumstances. --- ChangeLog | 1 + include/jemalloc/internal/arena.h | 15 +++++++---- src/arena.c | 43 +++++++++++++------------------ 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/ChangeLog b/ChangeLog index 691630bc..0a2b2ca5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -71,6 +71,7 @@ found in the git revision history: write-after-free memory corruption. - Fix a potential deadlock that could occur during interval- and growth-triggered heap profile dumps. + - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags. - Fix chunk_alloc_dss() to stop claiming memory is zeroed. This bug could cause memory corruption and crashes with --enable-dss specified. - Fix fork-related bugs that could cause deadlock in children between fork diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9ce08e5c..0b0f640a 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -591,6 +591,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; } @@ -611,12 +612,14 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { size_t *mapbitsp; + size_t unzeroed; mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); - *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; } JEMALLOC_INLINE void @@ -637,13 +640,15 @@ arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, size_t binind, size_t flags) { size_t *mapbitsp; + size_t unzeroed; assert(binind < BININD_INVALID); mapbitsp = arena_mapbitsp_get(chunk, pageind); assert(pageind - runind >= map_bias); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | - flags | CHUNK_MAP_ALLOCATED; + flags | unzeroed | CHUNK_MAP_ALLOCATED; } JEMALLOC_INLINE void diff --git a/src/arena.c b/src/arena.c index 021a0e2d..2a6150f3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -311,8 +311,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, * small run, so that arena_dalloc_bin_run() has the ability to * conditionally trim clean pages. */ - arena_mapbits_small_set(chunk, run_ind, 0, binind, - arena_mapbits_unzeroed_get(chunk, run_ind) | flag_dirty); + arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty); /* * The first page will always be dirtied during small run * initialization, so a validation failure here would not @@ -322,16 +321,13 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_mapbits_unzeroed_get(chunk, run_ind) == 0) arena_chunk_validate_zeroed(chunk, run_ind); for (i = 1; i < need_pages - 1; i++) { - arena_mapbits_small_set(chunk, run_ind+i, i, - binind, arena_mapbits_unzeroed_get(chunk, - run_ind+i)); + arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0) arena_chunk_validate_zeroed(chunk, run_ind+i); } arena_mapbits_small_set(chunk, run_ind+need_pages-1, - need_pages-1, binind, arena_mapbits_unzeroed_get(chunk, - run_ind+need_pages-1) | flag_dirty); + need_pages-1, binind, flag_dirty); if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) == 0) { @@ -612,8 +608,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_avail_tree_remove( &arena->runs_avail_dirty, mapelm); + arena_mapbits_unzeroed_set(chunk, pageind, + flag_unzeroed); arena_mapbits_large_set(chunk, pageind, - (npages << LG_PAGE), flag_unzeroed); + (npages << LG_PAGE), 0); /* * Update internal elements in the page map, so * that CHUNK_MAP_UNZEROED is properly set. @@ -623,8 +621,10 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind+i, flag_unzeroed); } if (npages > 1) { + arena_mapbits_unzeroed_set(chunk, + pageind+npages-1, flag_unzeroed); arena_mapbits_large_set(chunk, - pageind+npages-1, 0, flag_unzeroed); + pageind+npages-1, 0, 0); } if (config_stats) { @@ -979,10 +979,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); - arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind)); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty); if (config_debug) { UNUSED size_t tail_npages = newsize >> LG_PAGE; @@ -991,8 +989,8 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, assert(arena_mapbits_dirty_get(chunk, pageind+head_npages+tail_npages-1) == flag_dirty); } - arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty - | arena_mapbits_unzeroed_get(chunk, pageind+head_npages)); + arena_mapbits_large_set(chunk, pageind+head_npages, newsize, + flag_dirty); arena_run_dalloc(arena, run, false); } @@ -1013,10 +1011,8 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, * run first, in case of single-page runs. */ assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize); - arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind+head_npages-1)); - arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty | - arena_mapbits_unzeroed_get(chunk, pageind)); + arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty); + arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty); if (config_debug) { UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE; @@ -1026,8 +1022,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, pageind+head_npages+tail_npages-1) == flag_dirty); } arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize, - flag_dirty | arena_mapbits_unzeroed_get(chunk, - pageind+head_npages)); + flag_dirty); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), dirty); @@ -1535,10 +1530,8 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, npages) { /* Trim clean pages. Convert to large run beforehand. */ assert(npages > 0); - arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, - arena_mapbits_unzeroed_get(chunk, run_ind)); - arena_mapbits_large_set(chunk, run_ind+npages-1, 0, - arena_mapbits_unzeroed_get(chunk, run_ind+npages-1)); + arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0); + arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0); arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ From 58ad1e4956affe0f9949445dce4410ad70b4cdac Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 May 2012 17:40:16 -0700 Subject: [PATCH 204/205] Return early in _malloc_{pre,post}fork() if uninitialized. Avoid mutex operations in _malloc_{pre,post}fork() unless jemalloc has been initialized. Reported by David Xu. --- src/jemalloc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index d42e91db..bc54cd7c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1621,6 +1621,12 @@ _malloc_prefork(void) { unsigned i; +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + /* Acquire all mutexes in a safe order. */ malloc_mutex_prefork(&arenas_lock); for (i = 0; i < narenas; i++) { @@ -1642,6 +1648,12 @@ _malloc_postfork(void) { unsigned i; +#ifdef JEMALLOC_MUTEX_INIT_CB + if (malloc_initialized == false) + return; +#endif + assert(malloc_initialized); + /* Release all mutexes, now that fork() has completed. */ chunk_dss_postfork_parent(); huge_postfork_parent(); @@ -1658,6 +1670,8 @@ jemalloc_postfork_child(void) { unsigned i; + assert(malloc_initialized); + /* Release all mutexes, now that fork() has completed. */ chunk_dss_postfork_child(); huge_postfork_child(); From cbb71caceb1e53d0fd21284ce298885327c211b4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 11 May 2012 17:00:20 -0700 Subject: [PATCH 205/205] Update ChangeLog for 3.0.0. --- ChangeLog | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0a2b2ca5..231dd6da 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,7 +6,7 @@ found in the git revision history: http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git git://canonware.com/jemalloc.git -* 3.0.0 (XXX not yet released) +* 3.0.0 (May 11, 2012) Although this version adds some major new features, the primary focus is on internal code cleanup that facilitates maintainability and portability, most @@ -23,6 +23,7 @@ found in the git revision history: + FreeBSD + Mac OS X Lion + MinGW + + Windows (no support yet for replacing the system malloc) - Add support for additional architectures: + MIPS + SH4 @@ -31,12 +32,13 @@ found in the git revision history: - Add nallocm(), which rounds a request size up to the nearest size class without actually allocating. - Implement aligned_alloc() (blame C11). - - Add the --disable-munmap option, and make it the default on Linux. - - Add the --with-mangling option. - - Add the --disable-experimental option. - Add the "thread.tcache.enabled" mallctl. - Add the "opt.prof_final" mallctl. - Update pprof (from gperftools 2.0). + - Add the --with-mangling option. + - Add the --disable-experimental option. + - Add the --disable-munmap option, and make it the default on Linux. + - Add the --enable-mremap option, which disables use of mremap(2) by default. Incompatible changes: - Enable stats by default.