From 606ae49fa3b2610f41961d38e82828ddbbd89a69 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Nov 2015 14:59:14 -0800 Subject: [PATCH 01/96] Integrate raw heap profile support into jeprof. --- bin/jeprof.in | 54 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/bin/jeprof.in b/bin/jeprof.in index e7178078..d00ef5db 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -1160,8 +1160,21 @@ sub PrintSymbolizedProfile { } print '---', "\n"; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. @@ -1171,6 +1184,12 @@ sub PrintSymbolizedProfile { } close(SRC); } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; # dump a cpu-format profile to standard out PrintProfileData($profile); } @@ -3427,12 +3446,22 @@ sub FetchDynamicProfile { } $url .= sprintf("seconds=%d", $main::opt_seconds); $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } } my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); @@ -3730,6 +3759,8 @@ sub ReadProfile { my $symbol_marker = $&; $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; # Look at first line to see if it is a heap or a CPU profile. # CPU profile may start with no header at all, and just binary data @@ -3756,7 +3787,13 @@ sub ReadProfile { $header = ReadProfileHeader(*PROFILE) || ""; } + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { $main::profile_type = 'growth'; $result = ReadHeapProfile($prog, *PROFILE, $header); @@ -3808,9 +3845,9 @@ sub ReadProfile { # independent implementation. sub FixCallerAddresses { my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { $stack =~ /(\s)/; my $delimiter = $1; my @addrs = split(' ', $stack); @@ -3878,12 +3915,7 @@ sub ReadCPUProfile { for (my $j = 0; $j < $d; $j++) { my $pc = $slots->get($i+$j); # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; - } + $pc--; $pc = sprintf("%0*x", $address_length, $pc); $pcs->{$pc} = 1; push @k, $pc; From 566d4c02400700b94a952eddeed34313360211d3 Mon Sep 17 00:00:00 2001 From: Nathan Froyd Date: Thu, 5 Nov 2015 12:18:43 -0500 Subject: [PATCH 02/96] use correct macro definitions for clang-cl clang-cl, an MSVC-compatible frontend built on top of clang, defined _MSC_VER *and* supports __attribute__ syntax. The ordering of the checks in jemalloc_macros.h.in, however, do the wrong thing for clang-cl, as we want the Windows-specific macro definitions for clang-cl. To support this use case, we reorder the checks so that _MSC_VER is checked first (which includes clang-cl), and then JEMALLOC_HAVE_ATTR) is checked. No functionality change intended. --- include/jemalloc/jemalloc_macros.h.in | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 7f64d9ff..698caa19 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -36,32 +36,7 @@ # define JEMALLOC_CXX_THROW #endif -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#elif _MSC_VER +#if _MSC_VER # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) @@ -87,6 +62,31 @@ # else # define JEMALLOC_ALLOCATOR # endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR #else # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) From f97298bfc1c6edbb4fd00820e9e028e8d213af73 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 3 Sep 2015 20:32:57 +0900 Subject: [PATCH 03/96] Remove arena_run_dalloc_decommit(). This resolves #284. --- src/arena.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/arena.c b/src/arena.c index 43733cc1..58797ded 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1660,18 +1660,6 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, return (size); } -static bool -arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - size_t run_ind = arena_miscelm_to_pageind(miscelm); - size_t offset = run_ind << LG_PAGE; - size_t length = arena_run_size_get(arena, chunk, run, run_ind); - - return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length, - arena->ind)); -} - static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, bool decommitted) @@ -1749,15 +1737,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, arena_maybe_purge(arena); } -static void -arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run) -{ - bool committed = arena_run_decommit(arena, chunk, run); - - arena_run_dalloc(arena, run, committed, false, !committed); -} - static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize) @@ -2441,7 +2420,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, malloc_mutex_unlock(&bin->lock); /******************************/ malloc_mutex_lock(&arena->lock); - arena_run_dalloc_decommit(arena, chunk, run); + arena_run_dalloc(arena, run, true, false, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -2584,7 +2563,7 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk, } } - arena_run_dalloc_decommit(arena, chunk, run); + arena_run_dalloc(arena, run, true, false, false); } void From bd418ce11efe908d0edfbe66d5af17e78582c377 Mon Sep 17 00:00:00 2001 From: Steve Dougherty Date: Fri, 11 Sep 2015 17:56:28 -0400 Subject: [PATCH 04/96] Assert compact color bit is unused Signed-off-by: Joshua Kahn This resolves #280. --- include/jemalloc/internal/rb.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 2ca8e593..b460d74b 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -79,6 +79,15 @@ struct { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + /* Bookkeeping bit cannot be used by node pointer. */ \ + assert(((uintptr_t)(a_node) & 0x1) == 0); \ + rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) #else /* Right accessors. */ #define rbtn_right_get(a_type, a_field, a_node) \ @@ -99,7 +108,6 @@ struct { \ #define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) -#endif /* Node initializer. */ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ @@ -107,6 +115,7 @@ struct { \ rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) +#endif /* Tree initializer. */ #define rb_new(a_type, a_field, a_rbt) do { \ From 13b401553172942c3cc1d89c70fd965be71c1540 Mon Sep 17 00:00:00 2001 From: Joshua Kahn Date: Fri, 18 Sep 2015 16:58:17 -0400 Subject: [PATCH 05/96] Allow const keys for lookup Signed-off-by: Steve Dougherty This resolves #281. --- include/jemalloc/internal/arena.h | 4 ++-- include/jemalloc/internal/rb.h | 18 +++++++++--------- src/arena.c | 7 ++++--- src/extent.c | 4 ++-- test/unit/rb.c | 2 +- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 12c61797..9e2375ce 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -512,7 +512,7 @@ arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, size_t pageind); arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +size_t arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm); void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); @@ -590,7 +590,7 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index b460d74b..7ddc383b 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -178,11 +178,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ a_attr void \ @@ -263,7 +263,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * last/first. * * static ex_node_t * - * ex_search(ex_t *tree, ex_node_t *key); + * ex_search(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. * Args: * tree: Pointer to an initialized red-black tree object. @@ -271,9 +271,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *tree, ex_node_t *key); + * ex_nsearch(ex_t *tree, const ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *tree, ex_node_t *key); + * ex_psearch(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were * key in tree. @@ -406,7 +406,7 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ return (ret); \ } \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ @@ -424,7 +424,7 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ return (ret); \ } \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ ret = &rbtree->rbt_nil; \ @@ -446,7 +446,7 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ return (ret); \ } \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ ret = &rbtree->rbt_nil; \ diff --git a/src/arena.c b/src/arena.c index 58797ded..844d721c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -62,7 +62,7 @@ arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) } JEMALLOC_INLINE_C size_t -arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) +arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -76,7 +76,7 @@ arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm) } JEMALLOC_INLINE_C int -arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; uintptr_t b_miscelm = (uintptr_t)b; @@ -169,7 +169,8 @@ run_quantize_first(size_t size) } JEMALLOC_INLINE_C int -arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b) +arena_avail_comp(const arena_chunk_map_misc_t *a, + const arena_chunk_map_misc_t *b) { int ret; uintptr_t a_miscelm = (uintptr_t)a; diff --git a/src/extent.c b/src/extent.c index 13f94411..9f5146e5 100644 --- a/src/extent.c +++ b/src/extent.c @@ -15,7 +15,7 @@ extent_quantize(size_t size) } JEMALLOC_INLINE_C int -extent_szad_comp(extent_node_t *a, extent_node_t *b) +extent_szad_comp(const extent_node_t *a, const extent_node_t *b) { int ret; size_t a_qsize = extent_quantize(extent_node_size_get(a)); @@ -41,7 +41,7 @@ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link, extent_szad_comp) JEMALLOC_INLINE_C int -extent_ad_comp(extent_node_t *a, extent_node_t *b) +extent_ad_comp(const extent_node_t *a, const extent_node_t *b) { uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a); uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b); diff --git a/test/unit/rb.c b/test/unit/rb.c index b38eb0e3..0262037b 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -21,7 +21,7 @@ struct node_s { }; static int -node_cmp(node_t *a, node_t *b) { +node_cmp(const node_t *a, const node_t *b) { int ret; assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic"); From e8ab0ab9c0e395d3c09398fa981704a9be968838 Mon Sep 17 00:00:00 2001 From: Joshua Kahn Date: Fri, 18 Sep 2015 15:58:39 -0400 Subject: [PATCH 06/96] Add function to destroy tree ex_destroy iterates over the tree using post-order traversal so nodes can be removed and processed by the callback function without paying the cost to rebalance the tree. The destruction process cannot be stopped once started. --- include/jemalloc/internal/rb.h | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 7ddc383b..30ccab44 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -192,7 +192,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ a_rbt_type *, a_type *, void *), void *arg); \ a_attr a_type * \ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg); /* * The rb_gen() macro generates a type-specific red-black tree implementation, @@ -321,6 +324,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. + * + * static void + * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); + * Description: Iterate over the tree with post-order traversal, remove + * each node, and run the callback if non-null. This is + * used for destroying a tree without paying the cost to + * rebalance it. The tree must not be otherwise altered + * during traversal. + * Args: + * tree: Pointer to an initialized red-black tree object. + * cb : Callback function, which, if non-null, is called for each node + * during iteration. There is no way to stop iteration once it has + * begun. + * arg : Opaque pointer passed to cb(). */ #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ @@ -985,6 +1002,28 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = NULL; \ } \ return (ret); \ +} \ +a_attr void \ +a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ + a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return; \ + } \ + a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_left_set(a_type, a_field, (node), &rbtree->rbt_nil); \ + a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_right_set(a_type, a_field, (node), &rbtree->rbt_nil); \ + if (cb) { \ + cb(node, arg); \ + } \ +} \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg) { \ + a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ + rbtree->rbt_root = &rbtree->rbt_nil; \ } #endif /* RB_H_ */ From 710ca112e31e8621177d08162f60158c27dd2974 Mon Sep 17 00:00:00 2001 From: Joshua Kahn Date: Mon, 21 Sep 2015 17:14:55 -0400 Subject: [PATCH 07/96] Add test for tree destruction --- test/unit/rb.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/unit/rb.c b/test/unit/rb.c index 0262037b..14132c13 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -212,6 +212,15 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) return (ret); } +static void +destroy_cb(node_t *node, void *data) +{ + unsigned *nnodes = (unsigned *)data; + + assert_u_gt(*nnodes, 0, "Destruction removed too many nodes"); + (*nnodes)--; +} + TEST_BEGIN(test_rb_random) { #define NNODES 25 @@ -278,7 +287,7 @@ TEST_BEGIN(test_rb_random) } /* Remove nodes. */ - switch (i % 4) { + switch (i % 5) { case 0: for (k = 0; k < j; k++) node_remove(&tree, &nodes[k], j - k); @@ -314,6 +323,12 @@ TEST_BEGIN(test_rb_random) assert_u_eq(nnodes, 0, "Removal terminated early"); break; + } case 4: { + unsigned nnodes = j; + tree_destroy(&tree, destroy_cb, &nnodes); + assert_u_eq(nnodes, 0, + "Destruction terminated early"); + break; } default: not_reached(); } From f4a0f32d340985de477bbe329ecdaecd69ed1055 Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Tue, 27 Oct 2015 15:12:10 -0700 Subject: [PATCH 08/96] Fast-path improvement: reduce # of branches and unnecessary operations. - Combine multiple runtime branches into a single malloc_slow check. - Avoid calling arena_choose / size2index / index2size on fast path. - A few micro optimizations. --- include/jemalloc/internal/arena.h | 63 +++--- .../jemalloc/internal/jemalloc_internal.h.in | 62 +++--- include/jemalloc/internal/prof.h | 6 +- include/jemalloc/internal/tcache.h | 116 +++++++---- src/arena.c | 26 +-- src/ckh.c | 10 +- src/huge.c | 6 +- src/jemalloc.c | 192 +++++++++++++----- src/prof.c | 37 ++-- src/quarantine.c | 20 +- src/tcache.c | 33 +-- 11 files changed, 357 insertions(+), 214 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9e2375ce..9715ad93 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -461,8 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc_small(arena_t *arena, size_t size, szind_t ind, + bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, szind_t ind, + bool zero); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); @@ -558,11 +560,11 @@ prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); void arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); -void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path); arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif @@ -1158,34 +1160,34 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache, bool slow_path) { assert(size != 0); + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return (tcache_alloc_small(tsd, arena, tcache, size, + ind, zero, slow_path)); + } + if (likely(size <= tcache_maxclass)) { + return (tcache_alloc_large(tsd, arena, tcache, size, + ind, zero, slow_path)); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + arena = arena_choose(tsd, arena); if (unlikely(arena == NULL)) return (NULL); - if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) { - return (tcache_alloc_small(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= large_maxclass)) { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (likely(tcache != NULL) && size <= tcache_maxclass) { - return (tcache_alloc_large(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_large(arena, size, zero)); - } else - return (huge_malloc(tsd, arena, size, zero, tcache)); + if (likely(size <= SMALL_MAXCLASS)) + return (arena_malloc_small(arena, size, ind, zero)); + if (likely(size <= large_maxclass)) + return (arena_malloc_large(arena, size, ind, zero)); + return (huge_malloc(tsd, arena, size, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * @@ -1251,7 +1253,7 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { arena_chunk_t *chunk; size_t pageind, mapbits; @@ -1268,7 +1270,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL)) { szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, + slow_path); } else { arena_dalloc_small(extent_node_arena_get( &chunk->node), chunk, ptr, pageind); @@ -1283,7 +1286,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) if (likely(tcache != NULL) && size - large_pad <= tcache_maxclass) { tcache_dalloc_large(tsd, tcache, ptr, size - - large_pad); + large_pad, slow_path); } else { arena_dalloc_large(extent_node_arena_get( &chunk->node), chunk, ptr); @@ -1319,7 +1322,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); + tcache_dalloc_small(tsd, tcache, ptr, binind, true); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1331,7 +1334,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) PAGE_MASK) == 0); if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); + tcache_dalloc_large(tsd, tcache, ptr, size, true); else { arena_dalloc_large(extent_node_arena_get( &chunk->node), chunk, ptr); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 654cd088..d31da4ca 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -437,7 +437,7 @@ extern unsigned ncpus; * index2size_tab encodes the same information as could be computed (at * unacceptable cost in some code paths) by index2size_compute(). */ -extern size_t const index2size_tab[NSIZES]; +extern size_t const index2size_tab[NSIZES+1]; /* * size2index_tab is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, @@ -624,7 +624,7 @@ JEMALLOC_ALWAYS_INLINE size_t index2size(szind_t index) { - assert(index < NSIZES); + assert(index <= NSIZES); return (index2size_lookup(index)); } @@ -823,12 +823,14 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, #ifndef JEMALLOC_ENABLE_INLINE arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size); +void *iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path); +void *imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path); +void *icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, + arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size, szind_t ind); void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, bool is_metadata, arena_t *arena); void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, @@ -837,10 +839,11 @@ void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path); void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path); void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, @@ -881,14 +884,14 @@ isalloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, - arena_t *arena) +iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena, bool slow_path) { void *ret; assert(size != 0); - ret = arena_malloc(tsd, arena, size, zero, tcache); + ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_metadata && likely(ret != NULL)) { arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, config_prof)); @@ -897,31 +900,33 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata } JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, false, tcache, false, arena)); + return (iallocztm(tsd, size, ind, false, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) +imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path) { - return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false, + NULL, slow_path)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) +icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena) { - return (iallocztm(tsd, size, true, tcache, false, arena)); + return (iallocztm(tsd, size, ind, true, tcache, false, arena, true)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) +icalloc(tsd_t *tsd, size_t size, szind_t ind) { - return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); + return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false, + NULL, true)); } JEMALLOC_ALWAYS_INLINE void * @@ -997,7 +1002,8 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata, + bool slow_path) { assert(ptr != NULL); @@ -1006,31 +1012,31 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) config_prof)); } - arena_dalloc(tsd, ptr, tcache); + arena_dalloc(tsd, ptr, tcache, slow_path); } JEMALLOC_ALWAYS_INLINE void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) { - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, true); } JEMALLOC_ALWAYS_INLINE void idalloc(tsd_t *tsd, void *ptr) { - idalloctm(tsd, ptr, tcache_get(tsd, false), false); + idalloctm(tsd, ptr, tcache_get(tsd, false), false, true); } JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { - if (config_fill && unlikely(opt_quarantine)) + if (slow_path && config_fill && unlikely(opt_quarantine)) quarantine(tsd, ptr); else - idalloctm(tsd, ptr, tcache, false); + idalloctm(tsd, ptr, tcache, false, slow_path); } JEMALLOC_ALWAYS_INLINE void diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index e5198c3e..a25502a9 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -436,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, cassert(config_prof); tdata = prof_tdata_get(tsd, true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) tdata = NULL; if (tdata_out != NULL) *tdata_out = tdata; - if (tdata == NULL) + if (unlikely(tdata == NULL)) return (true); - if (tdata->bytes_until_sample >= usize) { + if (likely(tdata->bytes_until_sample >= usize)) { if (update) tdata->bytes_until_sample -= usize; return (true); diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 5079cd26..c2921405 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -70,6 +70,13 @@ struct tcache_bin_s { int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ + /* + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... 1] are available items and the lowest item will + * be allocated first. + */ void **avail; /* Stack of available objects. */ }; @@ -126,7 +133,7 @@ extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind); + tcache_bin_t *tbin, szind_t binind, bool *tcache_success); void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, szind_t binind, unsigned rem); void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, @@ -155,15 +162,15 @@ void tcache_flush(void); bool tcache_enabled_get(void); tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success); void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); + size_t size, szind_t ind, bool zero, bool slow_path); void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind); + szind_t binind, bool slow_path); void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size); + size_t size, bool slow_path); tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif @@ -247,44 +254,69 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) +tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { void *ret; if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; + *tcache_success = false; return (NULL); } + /* + * tcache_success (instead of ret) should be checked upon the return of + * this function. We avoid checking (ret == NULL) because there is + * never a null stored on the avail stack (which is unknown to the + * compiler), and eagerly checking ret would cause pipeline stall + * (waiting for the cacheline). + */ + *tcache_success = true; + ret = *(tbin->avail - tbin->ncached); tbin->ncached--; + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; + return (ret); } JEMALLOC_ALWAYS_INLINE void * tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - usize = index2size(binind); - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); - if (ret == NULL) + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind, + &tcache_hard_success); + if (tcache_hard_success == false) return (NULL); } - assert(tcache_salloc(ret) == usize); + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(tcache_salloc(ret) == usize); + } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); @@ -292,7 +324,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, memset(ret, 0, usize); } } else { - if (config_fill && unlikely(opt_junk_alloc)) { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } @@ -309,28 +341,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) + szind_t binind, bool zero, bool slow_path) { void *ret; - szind_t binind; - size_t usize; tcache_bin_t *tbin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); - binind = size2index(size); - usize = index2size(binind); - assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { + ret = tcache_alloc_easy(tbin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(arena, usize, zero); + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + usize = index2size(binind); + assert(usize <= tcache_maxclass); + ret = arena_malloc_large(arena, usize, binind, zero); if (ret == NULL) return (NULL); } else { + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = index2size(binind); + assert(usize <= tcache_maxclass); + } + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); @@ -340,7 +382,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, BININD_INVALID); } if (likely(!zero)) { - if (config_fill) { + if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) memset(ret, 0xa5, usize); else if (unlikely(opt_zero)) @@ -360,14 +402,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; @@ -377,14 +420,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size, + bool slow_path) { szind_t binind; tcache_bin_t *tbin; @@ -396,7 +440,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) binind = size2index(size); - if (config_fill && unlikely(opt_junk_free)) + if (slow_path && config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; @@ -406,8 +450,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; tbin->ncached++; + *(tbin->avail - tbin->ncached) = ptr; tcache_event(tsd, tcache); } diff --git a/src/arena.c b/src/arena.c index 844d721c..143afb9a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1990,11 +1990,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must - * be moved to the base of tbin->avail before bailing - * out. + * be moved just before tbin->avail before bailing out. */ if (i > 0) { - memmove(tbin->avail, &tbin->avail[nfill - i], + memmove(tbin->avail - i, tbin->avail - nfill, i * sizeof(void *)); } break; @@ -2004,7 +2003,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, true); } /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; + *(tbin->avail - nfill + i) = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -2125,14 +2124,12 @@ arena_quarantine_junk_small(void *ptr, size_t usize) } void * -arena_malloc_small(arena_t *arena, size_t size, bool zero) +arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero) { void *ret; arena_bin_t *bin; arena_run_t *run; - szind_t binind; - binind = size2index(size); assert(binind < NBINS); bin = &arena->bins[binind]; size = index2size(binind); @@ -2179,7 +2176,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) } void * -arena_malloc_large(arena_t *arena, size_t size, bool zero) +arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) { void *ret; size_t usize; @@ -2189,7 +2186,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) UNUSED bool idump; /* Large allocation. */ - usize = s2u(size); + usize = index2size(binind); malloc_mutex_lock(&arena->lock); if (config_cache_oblivious) { uint64_t r; @@ -2214,7 +2211,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) + random_offset); if (config_stats) { - szind_t index = size2index(usize) - NBINS; + szind_t index = binind - NBINS; arena->stats.nmalloc_large++; arena->stats.nrequests_large++; @@ -2336,7 +2333,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special run placement. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true); } else if (usize <= large_maxclass && alignment <= PAGE) { /* * Large; alignment doesn't require special run placement. @@ -2344,7 +2342,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, * the base of the run, so do some bit manipulation to retrieve * the base. */ - ret = arena_malloc(tsd, arena, usize, zero, tcache); + ret = arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true); if (config_cache_oblivious) ret = (void *)((uintptr_t)ret & ~PAGE_MASK); } else { @@ -2823,7 +2822,8 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, { if (alignment == 0) - return (arena_malloc(tsd, arena, usize, zero, tcache)); + return (arena_malloc(tsd, arena, usize, size2index(usize), zero, + tcache, true)); usize = sa2u(usize, alignment); if (usize == 0) return (NULL); diff --git a/src/ckh.c b/src/ckh.c index 53a1c1ef..e4328d22 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd, tab, tcache_get(tsd, false), true, true); break; } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; } @@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; if (!ckh_rebuild(ckh, tab)) { - idalloctm(tsd, tab, tcache_get(tsd, false), true); + idalloctm(tsd, tab, tcache_get(tsd, false), true, true); #ifdef CKH_COUNT ckh->nshrinks++; #endif @@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) } /* Rebuilding failed, so back out partially rebuilt table. */ - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); ckh->tab = tab; ckh->lg_curbuckets = lg_prevbuckets; #ifdef CKH_COUNT @@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) (unsigned long long)ckh->nrelocs); #endif - idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true); + idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true); if (config_debug) memset(ckh, 0x5a, sizeof(ckh_t)); } diff --git a/src/huge.c b/src/huge.c index 1e9a6651..c1fa3795 100644 --- a/src/huge.c +++ b/src/huge.c @@ -75,7 +75,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, size, alignment, &is_zeroed)) == NULL) { - idalloctm(tsd, node, tcache, true); + idalloctm(tsd, node, tcache, true, true); return (NULL); } @@ -83,7 +83,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (huge_node_set(ret, node)) { arena_chunk_dalloc_huge(arena, ret, size); - idalloctm(tsd, node, tcache, true); + idalloctm(tsd, node, tcache, true, true); return (NULL); } @@ -372,7 +372,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) extent_node_size_get(node)); arena_chunk_dalloc_huge(extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); - idalloctm(tsd, node, tcache, true); + idalloctm(tsd, node, tcache, true, true); } arena_t * diff --git a/src/jemalloc.c b/src/jemalloc.c index 5a2d3240..eed6331d 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -70,12 +70,29 @@ typedef enum { } malloc_init_t; static malloc_init_t malloc_init_state = malloc_init_uninitialized; +/* 0 should be the common case. Set to true to trigger initialization. */ +static bool malloc_slow = true; + +/* When malloc_slow != 0, set the corresponding bits for sanity check. */ +enum { + flag_opt_junk_alloc = (1U), + flag_opt_junk_free = (1U << 1), + flag_opt_quarantine = (1U << 2), + flag_opt_zero = (1U << 3), + flag_opt_utrace = (1U << 4), + flag_in_valgrind = (1U << 5), + flag_opt_xmalloc = (1U << 6) +}; +static uint8_t malloc_slow_flags; + +/* Last entry for overflow detection only. */ JEMALLOC_ALIGNED(CACHELINE) -const size_t index2size_tab[NSIZES] = { +const size_t index2size_tab[NSIZES+1] = { #define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \ ((ZU(1)<: Error in malloc(): " + "out of memory\n"); + abort(); + } + set_errno(ENOMEM); + } + if (config_stats && likely(ret != NULL)) { + assert(usize == isalloc(ret, config_prof)); + *tsd_thread_allocatedp_get(tsd) += usize; + } } JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN @@ -1424,21 +1488,20 @@ je_malloc(size_t size) if (size == 0) size = 1; - ret = imalloc_body(size, &tsd, &usize); - if (unlikely(ret == NULL)) { - if (config_xmalloc && unlikely(opt_xmalloc)) { - malloc_write(": Error in malloc(): " - "out of memory\n"); - abort(); - } - set_errno(ENOMEM); + if (likely(!malloc_slow)) { + /* + * imalloc_body() is inlined so that fast and slow paths are + * generated separately with statically known slow_path. + */ + ret = imalloc_body(size, &tsd, &usize, false); + imalloc_post_check(ret, tsd, usize, false); + } else { + ret = imalloc_body(size, &tsd, &usize, true); + imalloc_post_check(ret, tsd, usize, true); + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); } - if (config_stats && likely(ret != NULL)) { - assert(usize == isalloc(ret, config_prof)); - *tsd_thread_allocatedp_get(tsd) += usize; - } - UTRACE(0, size, ret); - JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); } @@ -1576,34 +1639,35 @@ je_aligned_alloc(size_t alignment, size_t size) } static void * -icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) +icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx) { void *p; if (tctx == NULL) return (NULL); if (usize <= SMALL_MAXCLASS) { - p = icalloc(tsd, LARGE_MINCLASS); + szind_t ind_large = size2index(LARGE_MINCLASS); + p = icalloc(tsd, LARGE_MINCLASS, ind_large); if (p == NULL) return (NULL); arena_prof_promoted(p, usize); } else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); return (p); } JEMALLOC_ALWAYS_INLINE_C void * -icalloc_prof(tsd_t *tsd, size_t usize) +icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind) { void *p; prof_tctx_t *tctx; tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) - p = icalloc_prof_sample(tsd, usize, tctx); + p = icalloc_prof_sample(tsd, usize, ind, tctx); else - p = icalloc(tsd, usize); + p = icalloc(tsd, usize, ind); if (unlikely(p == NULL)) { prof_alloc_rollback(tsd, tctx, true); return (NULL); @@ -1621,6 +1685,7 @@ je_calloc(size_t num, size_t size) void *ret; tsd_t *tsd; size_t num_size; + szind_t ind; size_t usize JEMALLOC_CC_SILENCE_INIT(0); if (unlikely(malloc_init())) { @@ -1650,17 +1715,18 @@ je_calloc(size_t num, size_t size) goto label_return; } + ind = size2index(num_size); if (config_prof && opt_prof) { - usize = s2u(num_size); + usize = index2size(ind); if (unlikely(usize == 0)) { ret = NULL; goto label_return; } - ret = icalloc_prof(tsd, usize); + ret = icalloc_prof(tsd, usize, ind); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) - usize = s2u(num_size); - ret = icalloc(tsd, num_size); + usize = index2size(ind); + ret = icalloc(tsd, num_size, ind); } label_return: @@ -1725,7 +1791,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) } JEMALLOC_INLINE_C void -ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) +ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) { size_t usize; UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); @@ -1740,10 +1806,15 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache) usize = isalloc(ptr, config_prof); if (config_stats) *tsd_thread_deallocatedp_get(tsd) += usize; - if (config_valgrind && unlikely(in_valgrind)) - rzsize = p2rz(ptr); - iqalloc(tsd, ptr, tcache); - JEMALLOC_VALGRIND_FREE(ptr, rzsize); + + if (likely(!slow_path)) + iqalloc(tsd, ptr, tcache, false); + else { + if (config_valgrind && unlikely(in_valgrind)) + rzsize = p2rz(ptr); + iqalloc(tsd, ptr, tcache, true); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + } } JEMALLOC_INLINE_C void @@ -1780,7 +1851,7 @@ je_realloc(void *ptr, size_t size) /* realloc(ptr, 0) is equivalent to free(ptr). */ UTRACE(ptr, 0, 0); tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + ifree(tsd, ptr, tcache_get(tsd, false), true); return (NULL); } size = 1; @@ -1807,7 +1878,10 @@ je_realloc(void *ptr, size_t size) } } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - ret = imalloc_body(size, &tsd, &usize); + if (likely(!malloc_slow)) + ret = imalloc_body(size, &tsd, &usize, false); + else + ret = imalloc_body(size, &tsd, &usize, true); } if (unlikely(ret == NULL)) { @@ -1836,7 +1910,10 @@ je_free(void *ptr) UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { tsd_t *tsd = tsd_fetch(); - ifree(tsd, ptr, tcache_get(tsd, false)); + if (likely(!malloc_slow)) + ifree(tsd, ptr, tcache_get(tsd, false), false); + else + ifree(tsd, ptr, tcache_get(tsd, false), true); } } @@ -1965,12 +2042,14 @@ JEMALLOC_ALWAYS_INLINE_C void * imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { + szind_t ind; + ind = size2index(usize); if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); if (unlikely(zero)) - return (icalloct(tsd, usize, tcache, arena)); - return (imalloct(tsd, usize, tcache, arena)); + return (icalloct(tsd, usize, ind, tcache, arena)); + return (imalloct(tsd, usize, ind, tcache, arena)); } static void * @@ -2034,9 +2113,10 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) arena_t *arena; if (likely(flags == 0)) { + szind_t ind = size2index(size); if (config_stats || (config_valgrind && unlikely(in_valgrind))) - *usize = s2u(size); - return (imalloc(tsd, size)); + *usize = index2size(ind); + return (imalloc(tsd, size, ind, true)); } if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize, @@ -2375,7 +2455,7 @@ je_dallocx(void *ptr, int flags) tcache = tcache_get(tsd, false); UTRACE(ptr, 0, 0); - ifree(tsd_fetch(), ptr, tcache); + ifree(tsd_fetch(), ptr, tcache, true); } JEMALLOC_ALWAYS_INLINE_C size_t diff --git a/src/prof.c b/src/prof.c index 5d2b9598..199e63e4 100644 --- a/src/prof.c +++ b/src/prof.c @@ -551,9 +551,9 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt) /* * Create a single allocation that has space for vec of length bt->len. */ - prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t, - vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true), - true, NULL); + size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); + prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size, + size2index(size), false, tcache_get(tsd, true), true, NULL, true); if (gctx == NULL) return (NULL); gctx->lock = prof_gctx_mutex_choose(); @@ -594,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, prof_leave(tsd, tdata_self); /* Destroy gctx. */ malloc_mutex_unlock(gctx->lock); - idalloctm(tsd, gctx, tcache_get(tsd, false), true); + idalloctm(tsd, gctx, tcache_get(tsd, false), true, true); } else { /* * Compensate for increment in prof_tctx_destroy() or @@ -701,7 +701,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) prof_tdata_destroy(tsd, tdata, false); if (destroy_tctx) - idalloctm(tsd, tctx, tcache_get(tsd, false), true); + idalloctm(tsd, tctx, tcache_get(tsd, false), true, true); } static bool @@ -730,7 +730,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { /* OOM. */ prof_leave(tsd, tdata); - idalloctm(tsd, gctx.v, tcache_get(tsd, false), true); + idalloctm(tsd, gctx.v, tcache_get(tsd, false), true, + true); return (true); } new_gctx = true; @@ -789,8 +790,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) /* Link a prof_tctx_t into gctx for this thread. */ tcache = tcache_get(tsd, true); - ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true, - NULL); + ret.v = iallocztm(tsd, sizeof(prof_tctx_t), + size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL, + true); if (ret.p == NULL) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); @@ -810,7 +812,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) if (error) { if (new_gctx) prof_gctx_try_destroy(tsd, tdata, gctx, tdata); - idalloctm(tsd, ret.v, tcache, true); + idalloctm(tsd, ret.v, tcache, true, true); return (NULL); } malloc_mutex_lock(gctx->lock); @@ -1211,7 +1213,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) tctx_tree_remove(&gctx->tctxs, to_destroy); idalloctm(tsd, to_destroy, - tcache_get(tsd, false), true); + tcache_get(tsd, false), true, true); } else next = NULL; } while (next != NULL); @@ -1714,8 +1716,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, /* Initialize an empty cache for this thread. */ tcache = tcache_get(tsd, true); - tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false, - tcache, true, NULL); + tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), + size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true); if (tdata == NULL) return (NULL); @@ -1729,7 +1731,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { - idalloctm(tsd, tdata, tcache, true); + idalloctm(tsd, tdata, tcache, true, true); return (NULL); } @@ -1784,9 +1786,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, tcache = tcache_get(tsd, false); if (tdata->thread_name != NULL) - idalloctm(tsd, tdata->thread_name, tcache, true); + idalloctm(tsd, tdata->thread_name, tcache, true, true); ckh_delete(tsd, &tdata->bt2tctx); - idalloctm(tsd, tdata, tcache, true); + idalloctm(tsd, tdata, tcache, true, true); } static void @@ -1947,7 +1949,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) if (size == 1) return (""); - ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL); + ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd, + true), true, NULL, true); if (ret == NULL) return (NULL); memcpy(ret, thread_name, size); @@ -1980,7 +1983,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) if (tdata->thread_name != NULL) { idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false), - true); + true, true); tdata->thread_name = NULL; } if (strlen(s) > 0) diff --git a/src/quarantine.c b/src/quarantine.c index 6c43dfca..ff8801cb 100644 --- a/src/quarantine.c +++ b/src/quarantine.c @@ -23,12 +23,14 @@ static quarantine_t * quarantine_init(tsd_t *tsd, size_t lg_maxobjs) { quarantine_t *quarantine; + size_t size; assert(tsd_nominal(tsd)); - quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs) - + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false, - tcache_get(tsd, true), true, NULL); + size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) * + sizeof(quarantine_obj_t)); + quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size), + false, tcache_get(tsd, true), true, NULL, true); if (quarantine == NULL) return (NULL); quarantine->curbytes = 0; @@ -55,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd) if (tsd_quarantine_get(tsd) == NULL) tsd_quarantine_set(tsd, quarantine); else - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); } static quarantine_t * @@ -87,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine) memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b * sizeof(quarantine_obj_t)); } - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); tsd_quarantine_set(tsd, ret); return (ret); @@ -98,7 +100,7 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine) { quarantine_obj_t *obj = &quarantine->objs[quarantine->first]; assert(obj->usize == isalloc(obj->ptr, config_prof)); - idalloctm(tsd, obj->ptr, NULL, false); + idalloctm(tsd, obj->ptr, NULL, false, true); quarantine->curbytes -= obj->usize; quarantine->curobjs--; quarantine->first = (quarantine->first + 1) & ((ZU(1) << @@ -123,7 +125,7 @@ quarantine(tsd_t *tsd, void *ptr) assert(opt_quarantine); if ((quarantine = tsd_quarantine_get(tsd)) == NULL) { - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd, ptr, NULL, false, true); return; } /* @@ -162,7 +164,7 @@ quarantine(tsd_t *tsd, void *ptr) } } else { assert(quarantine->curbytes == 0); - idalloctm(tsd, ptr, NULL, false); + idalloctm(tsd, ptr, NULL, false, true); } } @@ -177,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd) quarantine = tsd_quarantine_get(tsd); if (quarantine != NULL) { quarantine_drain(tsd, quarantine, 0); - idalloctm(tsd, quarantine, tcache_get(tsd, false), true); + idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true); tsd_quarantine_set(tsd, NULL); } } diff --git a/src/tcache.c b/src/tcache.c index fdafd0c6..78c62300 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) void * tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind) + tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; @@ -80,7 +80,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; - ret = tcache_alloc_easy(tbin); + ret = tcache_alloc_easy(tbin, tcache_success); return (ret); } @@ -102,7 +102,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena bin associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *bin_arena = extent_node_arena_get(&chunk->node); arena_bin_t *bin = &bin_arena->bins[binind]; @@ -122,7 +122,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == bin_arena) { @@ -139,7 +139,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * locked. Stash the object, so that it can be * handled in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } @@ -158,8 +158,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, malloc_mutex_unlock(&bin->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { /* Lock the arena associated with the first object. */ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); + *(tbin->avail - 1)); arena_t *locked_arena = extent_node_arena_get(&chunk->node); UNUSED bool idump; @@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } ndeferred = 0; for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; + ptr = *(tbin->avail - 1 - i); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (extent_node_arena_get(&chunk->node) == @@ -220,7 +220,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, * Stash the object, so that it can be handled * in a future pass. */ - tbin->avail[ndeferred] = ptr; + *(tbin->avail - 1 - ndeferred) = ptr; ndeferred++; } } @@ -241,8 +241,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, malloc_mutex_unlock(&arena->lock); } - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); + memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * + sizeof(void *)); tbin->ncached = rem; if ((int)tbin->ncached < tbin->low_water) tbin->low_water = tbin->ncached; @@ -333,9 +333,14 @@ tcache_create(tsd_t *tsd, arena_t *arena) assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + /* + * avail points past the available space. Allocations will + * access the slots toward higher addresses (for the benefit of + * prefetch). + */ tcache->tbins[i].avail = (void **)((uintptr_t)tcache + (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); } return (tcache); @@ -379,7 +384,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) arena_prof_accum(arena, tcache->prof_accumbytes)) prof_idump(); - idalloctm(tsd, tcache, false, true); + idalloctm(tsd, tcache, false, true, true); } void From ea59ebf4d3c2a5749e170cc45c294e04129e5b49 Mon Sep 17 00:00:00 2001 From: Dmitry-Me Date: Thu, 12 Nov 2015 14:59:29 +0300 Subject: [PATCH 09/96] Reuse previously computed value --- src/zone.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/zone.c b/src/zone.c index 12e1734a..6859b3fe 100644 --- a/src/zone.c +++ b/src/zone.c @@ -121,9 +121,11 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) { + size_t alloc_size; - if (ivsalloc(ptr, config_prof) != 0) { - assert(ivsalloc(ptr, config_prof) == size); + alloc_size = ivsalloc(ptr, config_prof); + if (alloc_size != 0) { + assert(alloc_size == size); je_free(ptr); return; } From a6ec1c869e1abe3eb70616d19d3e553339449636 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Nov 2015 10:51:32 -0800 Subject: [PATCH 10/96] Fix a comment. --- include/jemalloc/internal/tcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c2921405..aa73060a 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -74,7 +74,7 @@ struct tcache_bin_s { * To make use of adjacent cacheline prefetch, the items in the avail * stack goes to higher address for newer allocations. avail points * just above the available space, which means that - * avail[-ncached, ... 1] are available items and the lowest item will + * avail[-ncached, ... -1] are available items and the lowest item will * be allocated first. */ void **avail; /* Stack of available objects. */ From f9e3459f751b08b3c2108fda7462827cf8a4f2af Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Nov 2015 11:06:41 -0800 Subject: [PATCH 11/96] Tweak code to allow compilation of concatenated src/*.c sources. This resolves #294. --- include/jemalloc/internal/assert.h | 45 ++++++++++++++++++++++++++++++ include/jemalloc/internal/util.h | 44 +---------------------------- src/ctl.c | 6 ++-- src/util.c | 13 +++++++++ 4 files changed, 62 insertions(+), 46 deletions(-) create mode 100644 include/jemalloc/internal/assert.h diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h new file mode 100644 index 00000000..6f8f7eb9 --- /dev/null +++ b/include/jemalloc/internal/assert.h @@ -0,0 +1,45 @@ +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) \ + not_implemented(); \ +} while (0) +#endif + + diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index b2ea740f..0bccea24 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -81,49 +81,7 @@ # define unreachable() #endif -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (unlikely(config_debug && !(e))) { \ - malloc_printf( \ - ": %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - ": %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ - unreachable(); \ -} while (0) -#endif - -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf(": %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ - not_implemented(); \ -} while (0) -#endif +#include "jemalloc/internal/assert.h" /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ diff --git a/src/ctl.c b/src/ctl.c index 3de8e602..db1ddcb5 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -115,7 +115,7 @@ CTL_PROTO(tcache_create) CTL_PROTO(tcache_flush) CTL_PROTO(tcache_destroy) CTL_PROTO(arena_i_purge) -static void arena_purge(unsigned arena_ind); +static void arena_i_purge(unsigned arena_ind); CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_lg_dirty_mult) CTL_PROTO(arena_i_chunk_hooks) @@ -1538,7 +1538,7 @@ label_return: /* ctl_mutex must be held during execution of this function. */ static void -arena_purge(unsigned arena_ind) +arena_i_purge(unsigned arena_ind) { tsd_t *tsd; unsigned i; @@ -1576,7 +1576,7 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); malloc_mutex_lock(&ctl_mtx); - arena_purge(mib[1]); + arena_i_purge(mib[1]); malloc_mutex_unlock(&ctl_mtx); ret = 0; diff --git a/src/util.c b/src/util.c index 4cb0d6c1..1373ee15 100644 --- a/src/util.c +++ b/src/util.c @@ -1,3 +1,7 @@ +/* + * Define simple versions of assertion macros that won't recurse in case + * of assertion failures in malloc_*printf(). + */ #define assert(e) do { \ if (config_debug && !(e)) { \ malloc_write(": Failed assertion\n"); \ @@ -648,3 +652,12 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } + +/* + * Restore normal assertion macros, in order to make it possible to compile all + * C files as a single concatenation. + */ +#undef assert +#undef not_reached +#undef not_implemented +#include "jemalloc/internal/assert.h" From 3a92319ddc5610b755f755cbbbd12791ca9d0c3d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 12 Nov 2015 11:23:39 -0800 Subject: [PATCH 12/96] Use AC_CONFIG_AUX_DIR([build-aux]). This resolves #293. --- config.guess => build-aux/config.guess | 0 config.sub => build-aux/config.sub | 0 install-sh => build-aux/install-sh | 0 configure.ac | 2 ++ 4 files changed, 2 insertions(+) rename config.guess => build-aux/config.guess (100%) rename config.sub => build-aux/config.sub (100%) rename install-sh => build-aux/install-sh (100%) diff --git a/config.guess b/build-aux/config.guess similarity index 100% rename from config.guess rename to build-aux/config.guess diff --git a/config.sub b/build-aux/config.sub similarity index 100% rename from config.sub rename to build-aux/config.sub diff --git a/install-sh b/build-aux/install-sh similarity index 100% rename from install-sh rename to build-aux/install-sh diff --git a/configure.ac b/configure.ac index 7a1290e0..5a3eba27 100644 --- a/configure.ac +++ b/configure.ac @@ -1,6 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT([Makefile.in]) +AC_CONFIG_AUX_DIR([build-aux]) + dnl ============================================================================ dnl Custom macro definitions. From 43de1b3ebc928fa0884422ccd0a2e9cd233d1059 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 14 Dec 2015 11:42:08 -0800 Subject: [PATCH 13/96] Implement --retain and --exclude in jeprof. These options make it possible to filter symbolized backtrace frames using regular expressions. --- bin/jeprof.in | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/bin/jeprof.in b/bin/jeprof.in index d00ef5db..444041ec 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -223,12 +223,14 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching + --focus= Focus on backtraces with nodes matching --thread= Show profile for thread - --ignore= Ignore nodes matching + --ignore= Ignore backtraces with nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames @@ -339,6 +341,8 @@ sub Init() { $main::opt_ignore = ''; $main::opt_scale = 0; $main::opt_heapcheck = 0; + $main::opt_retain = ''; + $main::opt_exclude = ''; $main::opt_seconds = 30; $main::opt_lib = ""; @@ -410,6 +414,8 @@ sub Init() { "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -2840,6 +2846,43 @@ sub ExtractCalls { return $calls; } +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + sub RemoveUninterestingFrames { my $symbols = shift; my $profile = shift; @@ -2984,6 +3027,9 @@ sub RemoveUninterestingFrames { my $reduced_path = join("\n", @path); AddEntry($result, $reduced_path, $count); } + + $result = FilterFrames($symbols, $result); + return $result; } From 9cb481a73f6d2b518f695a669c1f850e477fdd2c Mon Sep 17 00:00:00 2001 From: Cosmin Paraschiv Date: Mon, 11 Jan 2016 11:05:00 -0800 Subject: [PATCH 14/96] Call malloc_test_boot0() from malloc_init_hard_recursible(). When using LinuxThreads, malloc bootstrapping deadlocks, since malloc_tsd_boot0() ends up calling pthread_setspecific(), which causes recursive allocation. Fix it by moving the malloc_tsd_boot0() call to malloc_init_hard_recursible(). The deadlock was introduced by 8bb3198f72fc7587dc93527f9f19fb5be52fa553 (Refactor/fix arenas manipulation.), when tsd_boot() was split and the top half, tsd_boot0(), got an extra tsd_wrapper_set() call. --- src/jemalloc.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index eed6331d..fab0eb05 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1276,26 +1276,37 @@ malloc_init_hard_a0(void) * * init_lock must be held. */ -static void +static bool malloc_init_hard_recursible(void) { + bool ret = false; malloc_init_state = malloc_init_recursible; malloc_mutex_unlock(&init_lock); + /* LinuxThreads' pthread_setspecific() allocates. */ + if (malloc_tsd_boot0()) { + ret = true; + goto label_return; + } + ncpus = malloc_ncpus(); #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \ && !defined(_WIN32) && !defined(__native_client__)) - /* LinuxThreads's pthread_atfork() allocates. */ + /* LinuxThreads' pthread_atfork() allocates. */ if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, jemalloc_postfork_child) != 0) { + ret = true; malloc_write(": Error in pthread_atfork()\n"); if (opt_abort) abort(); } #endif + +label_return: malloc_mutex_lock(&init_lock); + return (ret); } /* init_lock must be held. */ @@ -1365,16 +1376,16 @@ malloc_init_hard(void) malloc_mutex_unlock(&init_lock); return (true); } - if (malloc_tsd_boot0()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (config_prof && prof_boot2()) { + + if (malloc_init_hard_recursible()) { malloc_mutex_unlock(&init_lock); return (true); } - malloc_init_hard_recursible(); + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } if (malloc_init_hard_finish()) { malloc_mutex_unlock(&init_lock); From fdbb950495b1f3e000a816f921ed1d97ca4953cb Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Tue, 12 Jan 2016 14:47:00 -0800 Subject: [PATCH 15/96] Don't discard curl options if timeout is not defined. Merge of https://github.com/gperftools/gperftools/commit/5078abdb331e63d7a216994f186eb736861f8df7 --- bin/jeprof.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/jeprof.in b/bin/jeprof.in index 444041ec..a2402f40 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -3339,7 +3339,7 @@ sub ResolveRedirectionForCurl { # Add a timeout flat to URL_FETCHER. Returns a new list. sub AddFetchTimeout { my $timeout = shift; - my @fetcher = shift; + my @fetcher = @_; if (defined($timeout)) { if (join(" ", @fetcher) =~ m/\bcurl -s/) { push(@fetcher, "--max-time", sprintf("%d", $timeout)); From f459d5a2034e733eab74cc9b029dfec2ff13b196 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Tue, 12 Jan 2016 14:48:09 -0800 Subject: [PATCH 16/96] Detect failed profile fetches Summary: Currently an HTTP error response will still try to be parsed, resulting in these messages: substr outside of string at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3635, line 1. Use of uninitialized value in string eq at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3635, line 1. substr outside of string at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3637, line 1. Use of uninitialized value in string eq at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3637, line 1. /home/davejwatson/jeprof/server.1452638936.localhost.pprof.heap: header size >= 2**16 After this fix, curl will return an error status code that will be correctly checked at line 3536, resulting in this error message: Failed to get profile: curl -s --fail 'http://localhost:4010/pprof/heap' > /home/davejwatson/jeprof/.tmp.server.1452639085.localhost.pprof.heap: No such file or directory Test Plan: Tested with MALLOC_CONF="prof:false". Also tested fetching symbols. Didn't test redirects, but this should only affect http error codes >= 400 --- bin/jeprof.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/jeprof.in b/bin/jeprof.in index a2402f40..dfd9195e 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -95,7 +95,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); +my @URL_FETCHER = ("curl", "-s", "--fail"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; From d1acd1bea9bc2735b53ac68fa98891cab8c71d02 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 29 Jan 2016 19:59:06 -0800 Subject: [PATCH 17/96] Pass retain and exclude parameters to /pprof/symbol. Pass the retain and exclude parameters to the /pprof/symbol pprof server endpoint so that the server has the opportunity to optimize which symbols it looks up and/or returns mappings for. --- bin/jeprof.in | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/bin/jeprof.in b/bin/jeprof.in index dfd9195e..42087fce 100644 --- a/bin/jeprof.in +++ b/bin/jeprof.in @@ -3385,6 +3385,27 @@ sub ReadSymbols { return $map; } +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + # Fetches and processes symbols to prepare them for use in the profile output # code. If the optional 'symbol_map' arg is not given, fetches symbols from # $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols @@ -3409,9 +3430,11 @@ sub FetchSymbols { my $command_line; if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", $url); } else { + $url = AppendSymbolFilterParams($url); $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) . " < " . ShellEscape($main::tmpfile_sym)); } From 109712b681cd146cb88b17d743fe53314c962144 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 12:08:14 -0800 Subject: [PATCH 18/96] Fix a documentation editing error. --- doc/jemalloc.xml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 26a5e142..519ba3fa 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1467,7 +1467,7 @@ malloc_conf = "xmalloc:true";]]> Flush the specified thread-specific cache (tcache). The same considerations apply to this interface as to thread.tcache.flush, - except that the tcache will never be automatically be discarded. + except that the tcache will never be automatically discarded. From b24f74b8624fb43f9b562a067ca14563f45cb553 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 17 Feb 2016 06:40:33 -0800 Subject: [PATCH 19/96] Don't rely on unpurged chunks in xallocx() test. --- test/integration/xallocx.c | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c index 00451961..5c4998b6 100644 --- a/test/integration/xallocx.c +++ b/test/integration/xallocx.c @@ -305,63 +305,63 @@ TEST_END TEST_BEGIN(test_extra_huge) { int flags = MALLOCX_ARENA(arena_ind()); - size_t largemax, huge0, huge1, huge2, hugemax; + size_t largemax, huge1, huge2, huge3, hugemax; void *p; /* Get size classes. */ largemax = get_large_size(get_nlarge()-1); - huge0 = get_huge_size(0); huge1 = get_huge_size(1); huge2 = get_huge_size(2); + huge3 = get_huge_size(3); hugemax = get_huge_size(get_nhuge()-1); - p = mallocx(huge2, flags); + p = mallocx(huge3, flags); assert_ptr_not_null(p, "Unexpected mallocx() error"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with zero extra. */ - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, 0, flags), huge0, + assert_zu_ge(xallocx(p, largemax, 0, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size decrease with non-zero extra. */ - assert_zu_eq(xallocx(p, huge0, huge2 - huge0, flags), huge2, + assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3, + "Unexpected xallocx() behavior"); + assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3, "Unexpected xallocx() behavior"); assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge0, huge1 - huge0, flags), huge1, - "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, largemax, huge0 - largemax, flags), huge0, + assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with zero extra. */ - assert_zu_le(xallocx(p, huge2, 0, flags), huge2, + assert_zu_le(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge2, + assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, flags), hugemax, + assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax, "Unexpected xallocx() behavior"); - assert_zu_ge(xallocx(p, huge0, 0, flags), huge0, + assert_zu_ge(xallocx(p, huge1, 0, flags), huge1, "Unexpected xallocx() behavior"); /* Test size increase with non-zero extra. */ - assert_zu_le(xallocx(p, huge0, huge2 - huge0, flags), huge2, + assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3, "Unexpected xallocx() behavior"); - assert_zu_eq(xallocx(p, huge2, 0, flags), huge2, + assert_zu_eq(xallocx(p, huge3, 0, flags), huge3, "Unexpected xallocx() behavior"); /* Test size+extra overflow. */ - assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, flags), hugemax, + assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax, "Unexpected xallocx() behavior"); dallocx(p, flags); From 49931bd8ffeffde72865990d74b7ff65d6e8c466 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 18:24:30 -0800 Subject: [PATCH 20/96] Fix test_stats_arenas_summary fragility. Fix test_stats_arenas_summary to deallocate before asserting that purging must have happened. --- test/unit/stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unit/stats.c b/test/unit/stats.c index 8e4bc631..6e803160 100644 --- a/test/unit/stats.c +++ b/test/unit/stats.c @@ -93,6 +93,10 @@ TEST_BEGIN(test_stats_arenas_summary) huge = mallocx(chunksize, 0); assert_ptr_not_null(huge, "Unexpected mallocx() failure"); + dallocx(little, 0); + dallocx(large, 0); + dallocx(huge, 0); + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -116,10 +120,6 @@ TEST_BEGIN(test_stats_arenas_summary) assert_u64_le(nmadvise, purged, "nmadvise should be no greater than purged"); } - - dallocx(little, 0); - dallocx(large, 0); - dallocx(huge, 0); } TEST_END From ef349f3f944b9b40bdeeff6cc322ef753f1ad4be Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 18:29:43 -0800 Subject: [PATCH 21/96] Fix arena_sdalloc() line wrapping. --- include/jemalloc/internal/arena.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9715ad93..b6824896 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1306,7 +1306,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) if (config_prof && opt_prof) { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); if (arena_mapbits_large_get(chunk, pageind) != 0) { /* * Make sure to use promoted size, not request @@ -1322,7 +1323,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) /* Small allocation. */ if (likely(tcache != NULL)) { szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind, true); + tcache_dalloc_small(tsd, tcache, ptr, binind, + true); } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; @@ -1333,9 +1335,10 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) assert(config_cache_oblivious || ((uintptr_t)ptr & PAGE_MASK) == 0); - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size, true); - else { + if (likely(tcache != NULL) && size <= tcache_maxclass) { + tcache_dalloc_large(tsd, tcache, ptr, size, + true); + } else { arena_dalloc_large(extent_node_arena_get( &chunk->node), chunk, ptr); } From f829009929bdce17bef8a963264a92e39271a166 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 7 Feb 2016 14:23:22 -0800 Subject: [PATCH 22/96] Add --with-malloc-conf. Add --with-malloc-conf, which makes it possible to embed a default options string during configuration. --- INSTALL | 8 ++++ configure.ac | 10 +++++ doc/jemalloc.xml.in | 28 ++++++++++---- .../jemalloc/internal/jemalloc_internal.h.in | 1 + .../internal/jemalloc_internal_defs.h.in | 3 ++ src/ctl.c | 37 ++++++++++--------- src/jemalloc.c | 9 +++-- src/stats.c | 2 + test/unit/mallctl.c | 33 +++++++++-------- 9 files changed, 87 insertions(+), 44 deletions(-) diff --git a/INSTALL b/INSTALL index 8d396874..5c25054a 100644 --- a/INSTALL +++ b/INSTALL @@ -84,6 +84,14 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. +--with-malloc-conf= + Embed as a run-time options string that is processed prior to + the malloc_conf global variable, the /etc/malloc.conf symlink, and the + MALLOC_CONF environment variable. For example, to change the default chunk + size to 256 KiB: + + --with-malloc-conf=lg_chunk:18 + --disable-cc-silence Disable code that silences non-useful compiler warnings. This is mainly useful during development when auditing the set of warnings that are being diff --git a/configure.ac b/configure.ac index 5a3eba27..5232c8f3 100644 --- a/configure.ac +++ b/configure.ac @@ -577,6 +577,15 @@ AC_ARG_WITH([install_suffix], install_suffix="$INSTALL_SUFFIX" AC_SUBST([install_suffix]) +dnl Specify default malloc_conf. +AC_ARG_WITH([malloc_conf], + [AS_HELP_STRING([--with-malloc-conf=], [config.malloc_conf options string])], + [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"], + [JEMALLOC_CONFIG_MALLOC_CONF=""] +) +config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF" +AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"]) + dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of dnl jemalloc_protos_jet.h easy. je_="je_" @@ -1726,6 +1735,7 @@ AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE]) AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}]) AC_MSG_RESULT([install_suffix : ${install_suffix}]) +AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}]) AC_MSG_RESULT([autogen : ${enable_autogen}]) AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) AC_MSG_RESULT([debug : ${enable_debug}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 519ba3fa..48765b01 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -455,19 +455,20 @@ for (i = 0; i < nbins; i++) { routines, the allocator initializes its internals based in part on various options that can be specified at compile- or run-time. - The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the + The string specified via , the + string pointed to by the global variable malloc_conf, the + “name” of the file referenced by the symbolic link named + /etc/malloc.conf, and the value of the environment variable MALLOC_CONF, will be interpreted, in that order, from left to right as options. Note that malloc_conf may be read before main is entered, so the declaration of malloc_conf should specify an initializer that contains - the final value to be read by jemalloc. malloc_conf is - a compile-time setting, whereas /etc/malloc.conf and MALLOC_CONF - can be safely set any time prior to program invocation. + the final value to be read by jemalloc. + and malloc_conf are compile-time mechanisms, whereas + /etc/malloc.conf and + MALLOC_CONF can be safely set any time prior to program + invocation. An options string is a comma-separated list of option:value pairs. There is one key corresponding to each + + + config.malloc_conf + (const char *) + r- + + Embedded configure-time-specified run-time options + string, empty unless was specified + during build configuration. + + config.munmap diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index d31da4ca..8c507f79 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -49,6 +49,7 @@ static const bool config_lazy_lock = false #endif ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; static const bool config_prof = #ifdef JEMALLOC_PROF true diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index b0f8caaf..c84e27c9 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -259,4 +259,7 @@ */ #undef JEMALLOC_EXPORT +/* config.malloc_conf options string. */ +#undef JEMALLOC_CONFIG_MALLOC_CONF + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/src/ctl.c b/src/ctl.c index db1ddcb5..9618d632 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -77,6 +77,7 @@ CTL_PROTO(config_cache_oblivious) CTL_PROTO(config_debug) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_malloc_conf) CTL_PROTO(config_munmap) CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) @@ -241,6 +242,7 @@ static const ctl_named_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("malloc_conf"), CTL(config_malloc_conf)}, {NAME("munmap"), CTL(config_munmap)}, {NAME("prof"), CTL(config_prof)}, {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, @@ -1199,17 +1201,17 @@ label_return: \ return (ret); \ } -#define CTL_RO_BOOL_CONFIG_GEN(n) \ +#define CTL_RO_CONFIG_GEN(n, t) \ static int \ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ void *newp, size_t newlen) \ { \ int ret; \ - bool oldval; \ + t oldval; \ \ READONLY(); \ oldval = n; \ - READ(oldval, bool); \ + READ(oldval, t); \ \ ret = 0; \ label_return: \ @@ -1241,20 +1243,21 @@ label_return: /******************************************************************************/ -CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious) -CTL_RO_BOOL_CONFIG_GEN(config_debug) -CTL_RO_BOOL_CONFIG_GEN(config_fill) -CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) -CTL_RO_BOOL_CONFIG_GEN(config_munmap) -CTL_RO_BOOL_CONFIG_GEN(config_prof) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) -CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) -CTL_RO_BOOL_CONFIG_GEN(config_stats) -CTL_RO_BOOL_CONFIG_GEN(config_tcache) -CTL_RO_BOOL_CONFIG_GEN(config_tls) -CTL_RO_BOOL_CONFIG_GEN(config_utrace) -CTL_RO_BOOL_CONFIG_GEN(config_valgrind) -CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) +CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) +CTL_RO_CONFIG_GEN(config_debug, bool) +CTL_RO_CONFIG_GEN(config_fill, bool) +CTL_RO_CONFIG_GEN(config_lazy_lock, bool) +CTL_RO_CONFIG_GEN(config_malloc_conf, const char *) +CTL_RO_CONFIG_GEN(config_munmap, bool) +CTL_RO_CONFIG_GEN(config_prof, bool) +CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) +CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) +CTL_RO_CONFIG_GEN(config_stats, bool) +CTL_RO_CONFIG_GEN(config_tcache, bool) +CTL_RO_CONFIG_GEN(config_tls, bool) +CTL_RO_CONFIG_GEN(config_utrace, bool) +CTL_RO_CONFIG_GEN(config_valgrind, bool) +CTL_RO_CONFIG_GEN(config_xmalloc, bool) /******************************************************************************/ diff --git a/src/jemalloc.c b/src/jemalloc.c index fab0eb05..8415c0e2 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -902,10 +902,13 @@ malloc_conf_init(void) opt_tcache = false; } - for (i = 0; i < 3; i++) { + for (i = 0; i < 4; i++) { /* Get runtime configuration. */ switch (i) { case 0: + opts = config_malloc_conf; + break; + case 1: if (je_malloc_conf != NULL) { /* * Use options that were compiled into the @@ -918,7 +921,7 @@ malloc_conf_init(void) opts = buf; } break; - case 1: { + case 2: { int linklen = 0; #ifndef _WIN32 int saved_errno = errno; @@ -945,7 +948,7 @@ malloc_conf_init(void) buf[linklen] = '\0'; opts = buf; break; - } case 2: { + } case 3: { const char *envname = #ifdef JEMALLOC_PREFIX JEMALLOC_CPREFIX"MALLOC_CONF" diff --git a/src/stats.c b/src/stats.c index 154c3e74..7d09c23c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -438,6 +438,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("config.debug", &bv, bool); malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", bv ? "enabled" : "disabled"); + malloc_cprintf(write_cb, cbopaque, + "config.malloc_conf: \"%s\"\n", config_malloc_conf); #define OPT_WRITE_BOOL(n) \ if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) { \ diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 31e354ca..fde223f9 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -117,8 +117,8 @@ TEST_END TEST_BEGIN(test_mallctl_config) { -#define TEST_MALLCTL_CONFIG(config) do { \ - bool oldval; \ +#define TEST_MALLCTL_CONFIG(config, t) do { \ + t oldval; \ size_t sz = sizeof(oldval); \ assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \ 0, "Unexpected mallctl() failure"); \ @@ -126,20 +126,21 @@ TEST_BEGIN(test_mallctl_config) assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \ } while (0) - TEST_MALLCTL_CONFIG(cache_oblivious); - TEST_MALLCTL_CONFIG(debug); - TEST_MALLCTL_CONFIG(fill); - TEST_MALLCTL_CONFIG(lazy_lock); - TEST_MALLCTL_CONFIG(munmap); - TEST_MALLCTL_CONFIG(prof); - TEST_MALLCTL_CONFIG(prof_libgcc); - TEST_MALLCTL_CONFIG(prof_libunwind); - TEST_MALLCTL_CONFIG(stats); - TEST_MALLCTL_CONFIG(tcache); - TEST_MALLCTL_CONFIG(tls); - TEST_MALLCTL_CONFIG(utrace); - TEST_MALLCTL_CONFIG(valgrind); - TEST_MALLCTL_CONFIG(xmalloc); + TEST_MALLCTL_CONFIG(cache_oblivious, bool); + TEST_MALLCTL_CONFIG(debug, bool); + TEST_MALLCTL_CONFIG(fill, bool); + TEST_MALLCTL_CONFIG(lazy_lock, bool); + TEST_MALLCTL_CONFIG(malloc_conf, const char *); + TEST_MALLCTL_CONFIG(munmap, bool); + TEST_MALLCTL_CONFIG(prof, bool); + TEST_MALLCTL_CONFIG(prof_libgcc, bool); + TEST_MALLCTL_CONFIG(prof_libunwind, bool); + TEST_MALLCTL_CONFIG(stats, bool); + TEST_MALLCTL_CONFIG(tcache, bool); + TEST_MALLCTL_CONFIG(tls, bool); + TEST_MALLCTL_CONFIG(utrace, bool); + TEST_MALLCTL_CONFIG(valgrind, bool); + TEST_MALLCTL_CONFIG(xmalloc, bool); #undef TEST_MALLCTL_CONFIG } From e5d5a4a51792258aed65e6b45191d1f56c1305ea Mon Sep 17 00:00:00 2001 From: Cameron Evans Date: Tue, 2 Feb 2016 21:52:08 -0800 Subject: [PATCH 23/96] Add time_update(). --- Makefile.in | 5 +-- .../jemalloc/internal/jemalloc_internal.h.in | 4 +++ .../internal/jemalloc_internal_decls.h | 8 +++++ include/jemalloc/internal/private_symbols.txt | 1 + include/jemalloc/internal/time.h | 22 ++++++++++++ src/time.c | 36 +++++++++++++++++++ test/unit/time.c | 23 ++++++++++++ 7 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 include/jemalloc/internal/time.h create mode 100644 src/time.c create mode 100644 test/unit/time.c diff --git a/Makefile.in b/Makefile.in index 1ac6f292..c4f8cf90 100644 --- a/Makefile.in +++ b/Makefile.in @@ -85,8 +85,8 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \ - $(srcroot)src/tsd.c + $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/time.c \ + $(srcroot)src/tsd.c $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -143,6 +143,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/time.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 8c507f79..e7bc4c84 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -356,6 +356,7 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif +#include "jemalloc/internal/time.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -384,6 +385,7 @@ typedef unsigned szind_t; /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/time.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -469,6 +471,7 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/time.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -497,6 +500,7 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/time.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index a601d6eb..0f29e676 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -61,4 +61,12 @@ isblank(int c) #endif #include +#include +#ifdef _WIN32 +struct timespec { + time_t tv_sec; + long tv_nsec; +}; +#endif + #endif /* JEMALLOC_INTERNAL_H */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index a90021aa..8b1fd45c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -460,6 +460,7 @@ tcaches_get tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup +time_update tsd_arena_get tsd_arena_set tsd_boot diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h new file mode 100644 index 00000000..e3e6c5f4 --- /dev/null +++ b/include/jemalloc/internal/time.h @@ -0,0 +1,22 @@ +#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ + && _POSIX_MONOTONIC_CLOCK >= 0 + +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool time_update(struct timespec *time); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/src/time.c b/src/time.c new file mode 100644 index 00000000..2147c529 --- /dev/null +++ b/src/time.c @@ -0,0 +1,36 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +bool +time_update(struct timespec *time) +{ + struct timespec old_time; + + memcpy(&old_time, time, sizeof(struct timespec)); + +#ifdef _WIN32 + FILETIME ft; + uint64_t ticks; + GetSystemTimeAsFileTime(&ft); + ticks = (ft.dwHighDateTime << 32) | ft.dWLowDateTime; + time->tv_sec = ticks / 10000; + time->tv_nsec = ((ticks % 10000) * 100); +#elif JEMALLOC_CLOCK_GETTIME + if (sysconf(_SC_MONOTONIC_CLOCK) > 0) + clock_gettime(CLOCK_MONOTONIC, time); + else + clock_gettime(CLOCK_REALTIME, time); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + time->tv_sec = tv.tv_sec; + time->tv_nsec = tv.tv_usec * 1000; +#endif + + /* Handle non-monotonic clocks. */ + if (unlikely(old_time.tv_sec > time->tv_sec)) + return (true); + if (unlikely(old_time.tv_sec == time->tv_sec)) + return old_time.tv_nsec > time->tv_nsec; + + return (false); +} diff --git a/test/unit/time.c b/test/unit/time.c new file mode 100644 index 00000000..80460f98 --- /dev/null +++ b/test/unit/time.c @@ -0,0 +1,23 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_time_update) +{ + struct timespec ts; + + memset(&ts, 0, sizeof(struct timespec)); + + assert_false(time_update(&ts), "Basic time update failed."); + + /* Only Rip Van Winkle sleeps this long. */ + ts.tv_sec += 631152000; + assert_true(time_update(&ts), "Update should detect time roll-back."); +} +TEST_END + +int +main(void) +{ + + return (test( + test_time_update)); +} From 94451d184b09fdf57837d0a69ab05ec06317f5ca Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 12:35:37 -0800 Subject: [PATCH 24/96] Flesh out time_*() API. --- Makefile.in | 2 +- include/jemalloc/internal/private_symbols.txt | 12 + include/jemalloc/internal/time.h | 18 +- src/time.c | 158 +++++++++++++- test/include/test/jemalloc_test.h.in | 1 + test/include/test/timer.h | 16 +- test/src/timer.c | 40 +--- test/unit/time.c | 206 +++++++++++++++++- 8 files changed, 397 insertions(+), 56 deletions(-) diff --git a/Makefile.in b/Makefile.in index c4f8cf90..e314a6f3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -116,7 +116,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 8b1fd45c..4c40af61 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -460,6 +460,18 @@ tcaches_get tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup +ticker_init +ticker_tick +time_add +time_compare +time_copy +time_divide +time_idivide +time_imultiply +time_init +time_nsec +time_sec +time_subtract time_update tsd_arena_get tsd_arena_set diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h index e3e6c5f4..a290f386 100644 --- a/include/jemalloc/internal/time.h +++ b/include/jemalloc/internal/time.h @@ -1,8 +1,11 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + #define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ && _POSIX_MONOTONIC_CLOCK >= 0 -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +/* Maximum supported number of seconds (~584 years). */ +#define TIME_SEC_MAX 18446744072 #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -12,6 +15,17 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +void time_init(struct timespec *time, time_t sec, long nsec); +time_t time_sec(const struct timespec *time); +long time_nsec(const struct timespec *time); +void time_copy(struct timespec *time, const struct timespec *source); +int time_compare(const struct timespec *a, const struct timespec *b); +void time_add(struct timespec *time, const struct timespec *addend); +void time_subtract(struct timespec *time, const struct timespec *subtrahend); +void time_imultiply(struct timespec *time, uint64_t multiplier); +void time_idivide(struct timespec *time, uint64_t divisor); +uint64_t time_divide(const struct timespec *time, + const struct timespec *divisor); bool time_update(struct timespec *time); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/time.c b/src/time.c index 2147c529..3f930385 100644 --- a/src/time.c +++ b/src/time.c @@ -1,11 +1,160 @@ #include "jemalloc/internal/jemalloc_internal.h" +#define BILLION 1000000000 + +UNUSED static bool +time_valid(const struct timespec *time) +{ + + if (time->tv_sec > TIME_SEC_MAX) + return (false); + if (time->tv_nsec >= BILLION) + return (false); + + return (true); +} + +void +time_init(struct timespec *time, time_t sec, long nsec) +{ + + time->tv_sec = sec; + time->tv_nsec = nsec; + + assert(time_valid(time)); +} + +time_t +time_sec(const struct timespec *time) +{ + + assert(time_valid(time)); + + return (time->tv_sec); +} + +long +time_nsec(const struct timespec *time) +{ + + assert(time_valid(time)); + + return (time->tv_nsec); +} + +void +time_copy(struct timespec *time, const struct timespec *source) +{ + + assert(time_valid(source)); + + *time = *source; +} + +int +time_compare(const struct timespec *a, const struct timespec *b) +{ + int ret; + + assert(time_valid(a)); + assert(time_valid(b)); + + ret = (a->tv_sec > b->tv_sec) - (a->tv_sec < b->tv_sec); + if (ret == 0) + ret = (a->tv_nsec > b->tv_nsec) - (a->tv_nsec < b->tv_nsec); + + return (ret); +} + +void +time_add(struct timespec *time, const struct timespec *addend) +{ + + assert(time_valid(time)); + assert(time_valid(addend)); + + time->tv_sec += addend->tv_sec; + time->tv_nsec += addend->tv_nsec; + if (time->tv_nsec >= BILLION) { + time->tv_sec++; + time->tv_nsec -= BILLION; + } + + assert(time_valid(time)); +} + +void +time_subtract(struct timespec *time, const struct timespec *subtrahend) +{ + + assert(time_valid(time)); + assert(time_valid(subtrahend)); + assert(time_compare(time, subtrahend) >= 0); + + time->tv_sec -= subtrahend->tv_sec; + if (time->tv_nsec < subtrahend->tv_nsec) { + time->tv_sec--; + time->tv_nsec += BILLION; + } + time->tv_nsec -= subtrahend->tv_nsec; +} + +void +time_imultiply(struct timespec *time, uint64_t multiplier) +{ + time_t sec; + uint64_t nsec; + + assert(time_valid(time)); + + sec = time->tv_sec * multiplier; + nsec = time->tv_nsec * multiplier; + sec += nsec / BILLION; + nsec %= BILLION; + time_init(time, sec, (long)nsec); + + assert(time_valid(time)); +} + +void +time_idivide(struct timespec *time, uint64_t divisor) +{ + time_t sec; + uint64_t nsec; + + assert(time_valid(time)); + + sec = time->tv_sec / divisor; + nsec = ((time->tv_sec % divisor) * BILLION + time->tv_nsec) / divisor; + sec += nsec / BILLION; + nsec %= BILLION; + time_init(time, sec, (long)nsec); + + assert(time_valid(time)); +} + +uint64_t +time_divide(const struct timespec *time, const struct timespec *divisor) +{ + uint64_t t, d; + + assert(time_valid(time)); + assert(time_valid(divisor)); + + t = time_sec(time) * BILLION + time_nsec(time); + d = time_sec(divisor) * BILLION + time_nsec(divisor); + assert(d != 0); + return (t / d); +} + bool time_update(struct timespec *time) { struct timespec old_time; - memcpy(&old_time, time, sizeof(struct timespec)); + assert(time_valid(time)); + + time_copy(&old_time, time); #ifdef _WIN32 FILETIME ft; @@ -27,10 +176,11 @@ time_update(struct timespec *time) #endif /* Handle non-monotonic clocks. */ - if (unlikely(old_time.tv_sec > time->tv_sec)) + if (unlikely(time_compare(&old_time, time) > 0)) { + time_copy(time, &old_time); return (true); - if (unlikely(old_time.tv_sec == time->tv_sec)) - return old_time.tv_nsec > time->tv_nsec; + } + assert(time_valid(time)); return (false); } diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 455569da..223162e1 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -94,6 +94,7 @@ # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES +# include "jemalloc/internal/time.h" # include "jemalloc/internal/util.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" diff --git a/test/include/test/timer.h b/test/include/test/timer.h index a7fefdfd..a791f9ce 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -3,21 +3,9 @@ #include #include -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - typedef struct { -#ifdef _WIN32 - FILETIME ft0; - FILETIME ft1; -#elif JEMALLOC_CLOCK_GETTIME - struct timespec ts0; - struct timespec ts1; - int clock_id; -#else - struct timeval tv0; - struct timeval tv1; -#endif + struct timespec t0; + struct timespec t1; } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/test/src/timer.c b/test/src/timer.c index 0c93abaf..15306cfd 100644 --- a/test/src/timer.c +++ b/test/src/timer.c @@ -4,50 +4,26 @@ void timer_start(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - if (sysconf(_SC_MONOTONIC_CLOCK) <= 0) - timer->clock_id = CLOCK_REALTIME; - else - timer->clock_id = CLOCK_MONOTONIC; - clock_gettime(timer->clock_id, &timer->ts0); -#else - gettimeofday(&timer->tv0, NULL); -#endif + time_init(&timer->t0, 0, 0); + time_update(&timer->t0); } void timer_stop(timedelta_t *timer) { -#ifdef _WIN32 - GetSystemTimeAsFileTime(&timer->ft0); -#elif JEMALLOC_CLOCK_GETTIME - clock_gettime(timer->clock_id, &timer->ts1); -#else - gettimeofday(&timer->tv1, NULL); -#endif + time_copy(&timer->t1, &timer->t0); + time_update(&timer->t1); } uint64_t timer_usec(const timedelta_t *timer) { + struct timespec delta; -#ifdef _WIN32 - uint64_t t0, t1; - t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) | - timer->ft0.dwLowDateTime; - t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) | - timer->ft1.dwLowDateTime; - return ((t1 - t0) / 10); -#elif JEMALLOC_CLOCK_GETTIME - return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) + - (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000); -#else - return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) + - timer->tv1.tv_usec - timer->tv0.tv_usec); -#endif + time_copy(&delta, &timer->t1); + time_subtract(&delta, &timer->t0); + return (time_sec(&delta) * 1000000 + time_nsec(&delta) / 1000); } void diff --git a/test/unit/time.c b/test/unit/time.c index 80460f98..941e6f13 100644 --- a/test/unit/time.c +++ b/test/unit/time.c @@ -1,16 +1,206 @@ #include "test/jemalloc_test.h" +#define BILLION 1000000000 + +TEST_BEGIN(test_time_init) +{ + struct timespec ts; + + time_init(&ts, 42, 43); + assert_ld_eq(ts.tv_sec, 42, "tv_sec incorrectly initialized"); + assert_ld_eq(ts.tv_nsec, 43, "tv_nsec incorrectly initialized"); +} +TEST_END + +TEST_BEGIN(test_time_sec) +{ + struct timespec ts; + + time_init(&ts, 42, 43); + assert_ld_eq(time_sec(&ts), 42, "tv_sec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_time_nsec) +{ + struct timespec ts; + + time_init(&ts, 42, 43); + assert_ld_eq(time_nsec(&ts), 43, "tv_nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_time_copy) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_init(&tsb, 0, 0); + time_copy(&tsb, &tsa); + assert_ld_eq(time_sec(&tsb), 42, "tv_sec incorrectly copied"); + assert_ld_eq(time_nsec(&tsb), 43, "tv_nsec incorrectly copied"); +} +TEST_END + +TEST_BEGIN(test_time_compare) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + assert_d_eq(time_compare(&tsa, &tsb), 0, "Times should be equal"); + assert_d_eq(time_compare(&tsb, &tsa), 0, "Times should be equal"); + + time_init(&tsb, 42, 42); + assert_d_eq(time_compare(&tsa, &tsb), 1, + "tsa should be greater than tsb"); + assert_d_eq(time_compare(&tsb, &tsa), -1, + "tsb should be less than tsa"); + + time_init(&tsb, 42, 44); + assert_d_eq(time_compare(&tsa, &tsb), -1, + "tsa should be less than tsb"); + assert_d_eq(time_compare(&tsb, &tsa), 1, + "tsb should be greater than tsa"); + + time_init(&tsb, 41, BILLION - 1); + assert_d_eq(time_compare(&tsa, &tsb), 1, + "tsa should be greater than tsb"); + assert_d_eq(time_compare(&tsb, &tsa), -1, + "tsb should be less than tsa"); + + time_init(&tsb, 43, 0); + assert_d_eq(time_compare(&tsa, &tsb), -1, + "tsa should be less than tsb"); + assert_d_eq(time_compare(&tsb, &tsa), 1, + "tsb should be greater than tsa"); +} +TEST_END + +TEST_BEGIN(test_time_add) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_add(&tsa, &tsb); + time_init(&tsb, 84, 86); + assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result"); + + time_init(&tsa, 42, BILLION - 1); + time_copy(&tsb, &tsa); + time_add(&tsa, &tsb); + time_init(&tsb, 85, BILLION - 2); + assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result"); +} +TEST_END + +TEST_BEGIN(test_time_subtract) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_subtract(&tsa, &tsb); + time_init(&tsb, 0, 0); + assert_d_eq(time_compare(&tsa, &tsb), 0, + "Incorrect subtraction result"); + + time_init(&tsa, 42, 43); + time_init(&tsb, 41, 44); + time_subtract(&tsa, &tsb); + time_init(&tsb, 0, BILLION - 1); + assert_d_eq(time_compare(&tsa, &tsb), 0, + "Incorrect subtraction result"); +} +TEST_END + +TEST_BEGIN(test_time_imultiply) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_imultiply(&tsa, 10); + time_init(&tsb, 420, 430); + assert_d_eq(time_compare(&tsa, &tsb), 0, + "Incorrect multiplication result"); + + time_init(&tsa, 42, 666666666); + time_imultiply(&tsa, 3); + time_init(&tsb, 127, 999999998); + assert_d_eq(time_compare(&tsa, &tsb), 0, + "Incorrect multiplication result"); +} +TEST_END + +TEST_BEGIN(test_time_idivide) +{ + struct timespec tsa, tsb; + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_imultiply(&tsa, 10); + time_idivide(&tsa, 10); + assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result"); + + time_init(&tsa, 42, 666666666); + time_copy(&tsb, &tsa); + time_imultiply(&tsa, 3); + time_idivide(&tsa, 3); + assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_time_divide) +{ + struct timespec tsa, tsb, tsc; + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_imultiply(&tsa, 10); + assert_u64_eq(time_divide(&tsa, &tsb), 10, + "Incorrect division result"); + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_imultiply(&tsa, 10); + time_init(&tsc, 0, 1); + time_add(&tsa, &tsc); + assert_u64_eq(time_divide(&tsa, &tsb), 10, + "Incorrect division result"); + + time_init(&tsa, 42, 43); + time_copy(&tsb, &tsa); + time_imultiply(&tsa, 10); + time_init(&tsc, 0, 1); + time_subtract(&tsa, &tsc); + assert_u64_eq(time_divide(&tsa, &tsb), 9, "Incorrect division result"); +} +TEST_END + TEST_BEGIN(test_time_update) { struct timespec ts; - memset(&ts, 0, sizeof(struct timespec)); + time_init(&ts, 0, 0); assert_false(time_update(&ts), "Basic time update failed."); /* Only Rip Van Winkle sleeps this long. */ - ts.tv_sec += 631152000; - assert_true(time_update(&ts), "Update should detect time roll-back."); + { + struct timespec addend; + time_init(&addend, 631152000, 0); + time_add(&ts, &addend); + } + { + struct timespec ts0; + time_copy(&ts0, &ts); + assert_true(time_update(&ts), + "Update should detect time roll-back."); + assert_d_eq(time_compare(&ts, &ts0), 0, + "Time should not have been modified"); + } + } TEST_END @@ -19,5 +209,15 @@ main(void) { return (test( + test_time_init, + test_time_sec, + test_time_nsec, + test_time_copy, + test_time_compare, + test_time_add, + test_time_subtract, + test_time_imultiply, + test_time_idivide, + test_time_divide, test_time_update)); } From 9998000b2b77205a37ae630d5fd1ec9ee4569476 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 2 Feb 2016 20:27:54 -0800 Subject: [PATCH 25/96] Implement ticker. Implement ticker, which provides a simple API for ticking off some number of events before indicating that the ticker has hit its limit. --- Makefile.in | 5 +- .../jemalloc/internal/jemalloc_internal.h.in | 4 + include/jemalloc/internal/private_symbols.txt | 3 + include/jemalloc/internal/ticker.h | 75 ++++++++++++++++++ src/ticker.c | 2 + test/unit/ticker.c | 76 +++++++++++++++++++ 6 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 include/jemalloc/internal/ticker.h create mode 100644 src/ticker.c create mode 100644 test/unit/ticker.c diff --git a/Makefile.in b/Makefile.in index e314a6f3..f3c2e4bd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -85,8 +85,8 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/time.c \ - $(srcroot)src/tsd.c $(srcroot)src/util.c + $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/ticker.c \ + $(srcroot)src/time.c $(srcroot)src/tsd.c $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -143,6 +143,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/ticker.c \ $(srcroot)test/unit/time.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e7bc4c84..12d51be2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -361,6 +361,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -390,6 +391,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -476,6 +478,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" @@ -505,6 +508,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 4c40af61..216367e5 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -460,8 +460,11 @@ tcaches_get tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup +ticker_copy ticker_init +ticker_read ticker_tick +ticker_ticks time_add time_compare time_copy diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h new file mode 100644 index 00000000..4696e56d --- /dev/null +++ b/include/jemalloc/internal/ticker.h @@ -0,0 +1,75 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ticker_s ticker_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ticker_s { + int32_t tick; + int32_t nticks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void ticker_init(ticker_t *ticker, int32_t nticks); +void ticker_copy(ticker_t *ticker, const ticker_t *other); +int32_t ticker_read(const ticker_t *ticker); +bool ticker_ticks(ticker_t *ticker, int32_t nticks); +bool ticker_tick(ticker_t *ticker); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_)) +JEMALLOC_INLINE void +ticker_init(ticker_t *ticker, int32_t nticks) +{ + + ticker->tick = nticks; + ticker->nticks = nticks; +} + +JEMALLOC_INLINE void +ticker_copy(ticker_t *ticker, const ticker_t *other) +{ + + *ticker = *other; +} + +JEMALLOC_INLINE int32_t +ticker_read(const ticker_t *ticker) +{ + + return (ticker->tick); +} + +JEMALLOC_INLINE bool +ticker_ticks(ticker_t *ticker, int32_t nticks) +{ + + if (unlikely(ticker->tick < nticks)) { + ticker->tick = ticker->nticks; + return (true); + } + ticker->tick -= nticks; + return(false); +} + +JEMALLOC_INLINE bool +ticker_tick(ticker_t *ticker) +{ + + return (ticker_ticks(ticker, 1)); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/src/ticker.c b/src/ticker.c new file mode 100644 index 00000000..db090240 --- /dev/null +++ b/src/ticker.c @@ -0,0 +1,2 @@ +#define JEMALLOC_TICKER_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/test/unit/ticker.c b/test/unit/ticker.c new file mode 100644 index 00000000..e737020a --- /dev/null +++ b/test/unit/ticker.c @@ -0,0 +1,76 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_ticker_tick) +{ +#define NREPS 2 +#define NTICKS 3 + ticker_t ticker; + int32_t i, j; + + ticker_init(&ticker, NTICKS); + for (i = 0; i < NREPS; i++) { + for (j = 0; j < NTICKS; j++) { + assert_u_eq(ticker_read(&ticker), NTICKS - j, + "Unexpected ticker value (i=%d, j=%d)", i, j); + assert_false(ticker_tick(&ticker), + "Unexpected ticker fire (i=%d, j=%d)", i, j); + } + assert_u32_eq(ticker_read(&ticker), 0, + "Expected ticker depletion"); + assert_true(ticker_tick(&ticker), + "Expected ticker fire (i=%d)", i); + assert_u32_eq(ticker_read(&ticker), NTICKS, + "Expected ticker reset"); + } +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_ticks) +{ +#define NTICKS 3 + ticker_t ticker; + + ticker_init(&ticker, NTICKS); + + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire"); + assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value"); + assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); + + assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +TEST_BEGIN(test_ticker_copy) +{ +#define NTICKS 3 + ticker_t ta, tb; + + ticker_init(&ta, NTICKS); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); + + ticker_tick(&ta); + ticker_copy(&tb, &ta); + assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value"); + assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire"); + assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value"); +#undef NTICKS +} +TEST_END + +int +main(void) +{ + + return (test( + test_ticker_tick, + test_ticker_ticks, + test_ticker_copy)); +} From c87ab25d189e0ae76fd568db4bf273e2788cf1a9 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 2 Feb 2016 20:37:24 -0800 Subject: [PATCH 26/96] Use ticker for incremental tcache GC. --- include/jemalloc/internal/tcache.h | 6 ++---- src/tcache.c | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index aa73060a..c64f5d34 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -83,7 +83,7 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - unsigned ev_cnt; /* Event count since incremental GC. */ + ticker_t gc_ticker; /* Drives incremental GC. */ szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* @@ -247,9 +247,7 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) if (TCACHE_GC_INCR == 0) return; - tcache->ev_cnt++; - assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) + if (unlikely(ticker_tick(&tcache->gc_ticker))) tcache_event_hard(tsd, tcache); } diff --git a/src/tcache.c b/src/tcache.c index 78c62300..e8c3152d 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -67,7 +67,6 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) tcache->next_gc_bin++; if (tcache->next_gc_bin == nhbins) tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; } void * @@ -330,6 +329,8 @@ tcache_create(tsd_t *tsd, arena_t *arena) tcache_arena_associate(tcache, arena); + ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR); + assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); for (i = 0; i < nhbins; i++) { tcache->tbins[i].lg_fill_div = 1; From 34676d33690f6cc6885ff769e537ca940aacf886 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 9 Feb 2016 16:28:40 -0800 Subject: [PATCH 27/96] Refactor prng* from cpp macros into inline functions. Remove 32-bit variant, convert prng64() to prng_lg_range(), and add prng_range(). --- Makefile.in | 8 ++- include/jemalloc/internal/ckh.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 4 +- include/jemalloc/internal/private_symbols.txt | 4 +- include/jemalloc/internal/prng.h | 67 +++++++++++------- include/jemalloc/internal/util.h | 37 ++++++++-- src/arena.c | 4 +- src/ckh.c | 4 +- src/prng.c | 2 + src/prof.c | 3 +- test/unit/prng.c | 68 ++++++++++++++++++ test/unit/util.c | 69 ++++++++++++------- 12 files changed, 205 insertions(+), 69 deletions(-) create mode 100644 src/prng.c create mode 100644 test/unit/prng.c diff --git a/Makefile.in b/Makefile.in index f3c2e4bd..6b210fee 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,9 +84,10 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ - $(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \ - $(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/ticker.c \ - $(srcroot)src/time.c $(srcroot)src/tsd.c $(srcroot)src/util.c + $(srcroot)src/prng.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c $(srcroot)src/time.c $(srcroot)src/tsd.c \ + $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -129,6 +130,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 75c1c979..45fb3455 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -40,9 +40,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; + uint64_t prng_state; /* Total number of items. */ size_t count; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 12d51be2..616eb9f3 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -547,7 +547,7 @@ size2index_compute(size_t size) #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif @@ -644,7 +644,7 @@ s2u_compute(size_t size) #if (NTBINS > 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : (ZU(1) << lg_ceil)); } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 216367e5..d910202d 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -348,7 +348,9 @@ pages_map pages_purge pages_trim pages_unmap -pow2_ceil +pow2_ceil_u32 +pow2_ceil_u64 +pow2_ceil_zu prof_active_get prof_active_get_unlocked prof_active_set diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 216d0ef4..83c90906 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -18,31 +18,9 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - (lg_range)); \ -} while (false) - -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - (lg_range)); \ -} while (false) +#define PRNG_A UINT64_C(6364136223846793005) +#define PRNG_C UINT64_C(1442695040888963407) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -56,5 +34,46 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t prng_lg_range(uint64_t *state, unsigned lg_range); +uint64_t prng_range(uint64_t *state, uint64_t range); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_)) +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range(uint64_t *state, unsigned lg_range) +{ + uint64_t ret; + + assert(lg_range > 0); + assert(lg_range <= 64); + + ret = (*state * PRNG_A) + PRNG_C; + *state = ret; + ret >>= (64 - lg_range); + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range(uint64_t *state, uint64_t range) +{ + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = jemalloc_ffsl(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range(state, lg_range); + } while (ret >= range); + + return (ret); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 0bccea24..dfe5c93c 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -123,7 +123,9 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifndef JEMALLOC_ENABLE_INLINE int jemalloc_ffsl(long bitmap); int jemalloc_ffs(int bitmap); -size_t pow2_ceil(size_t x); +uint64_t pow2_ceil_u64(uint64_t x); +uint32_t pow2_ceil_u32(uint32_t x); +size_t pow2_ceil_zu(size_t x); size_t lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); @@ -150,9 +152,8 @@ jemalloc_ffs(int bitmap) return (JEMALLOC_INTERNAL_FFS(bitmap)); } -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) +JEMALLOC_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { x--; @@ -161,13 +162,37 @@ pow2_ceil(size_t x) x |= x >> 4; x |= x >> 8; x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) x |= x >> 32; -#endif x++; return (x); } +JEMALLOC_INLINE uint32_t +pow2_ceil_u32(uint32_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return (x); +} + +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil_zu(size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return (pow2_ceil_u64(x)); +#else + return (pow2_ceil_u32(x)); +#endif +} + #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE size_t lg_floor(size_t x) diff --git a/src/arena.c b/src/arena.c index 143afb9a..aa787f99 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2196,9 +2196,7 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64 * for 4 KiB pages and 64-byte cachelines. */ - prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state, - UINT64_C(6364136223846793009), - UINT64_C(1442695040888963409)); + r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE); random_offset = ((uintptr_t)r) << LG_CACHELINE; } else random_offset = 0; diff --git a/src/ckh.c b/src/ckh.c index e4328d22..08fc433d 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + offset = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + i = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); diff --git a/src/prng.c b/src/prng.c new file mode 100644 index 00000000..76646a2a --- /dev/null +++ b/src/prng.c @@ -0,0 +1,2 @@ +#define JEMALLOC_PRNG_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/prof.c b/src/prof.c index 199e63e4..31f5e601 100644 --- a/src/prof.c +++ b/src/prof.c @@ -871,8 +871,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ - prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005), - UINT64_C(1442695040888963407)); + r = prng_lg_range(&tdata->prng_state, 53); u = (double)r * (1.0/9007199254740992.0L); tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) diff --git a/test/unit/prng.c b/test/unit/prng.c new file mode 100644 index 00000000..b22bd2f5 --- /dev/null +++ b/test/unit/prng.c @@ -0,0 +1,68 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_prng_lg_range) +{ + uint64_t sa, sb, ra, rb; + unsigned lg_range; + + sa = 42; + ra = prng_lg_range(&sa, 64); + sa = 42; + rb = prng_lg_range(&sa, 64); + assert_u64_eq(ra, rb, + "Repeated generation should produce repeated results"); + + sb = 42; + rb = prng_lg_range(&sb, 64); + assert_u64_eq(ra, rb, + "Equivalent generation should produce equivalent results"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + rb = prng_lg_range(&sa, 64); + assert_u64_ne(ra, rb, + "Full-width results must not immediately repeat"); + + sa = 42; + ra = prng_lg_range(&sa, 64); + for (lg_range = 63; lg_range > 0; lg_range--) { + sb = 42; + rb = prng_lg_range(&sb, lg_range); + assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)), + 0, "High order bits should be 0, lg_range=%u", lg_range); + assert_u64_eq(rb, (ra >> (64 - lg_range)), + "Expected high order bits of full-width result, " + "lg_range=%u", lg_range); + } +} +TEST_END + +TEST_BEGIN(test_prng_range) +{ + uint64_t range; +#define MAX_RANGE 10000000 +#define RANGE_STEP 97 +#define NREPS 10 + + for (range = 2; range < MAX_RANGE; range += RANGE_STEP) { + uint64_t s; + unsigned rep; + + s = range; + for (rep = 0; rep < NREPS; rep++) { + uint64_t r = prng_range(&s, range); + + assert_u64_lt(r, range, "Out of range"); + } + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_prng_lg_range, + test_prng_range)); +} diff --git a/test/unit/util.c b/test/unit/util.c index 8ab39a45..2f65aad2 100644 --- a/test/unit/util.c +++ b/test/unit/util.c @@ -1,33 +1,54 @@ #include "test/jemalloc_test.h" -TEST_BEGIN(test_pow2_ceil) +#define TEST_POW2_CEIL(t, suf, pri) do { \ + unsigned i, pow2; \ + t x; \ + \ + assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \ + \ + for (i = 0; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i, \ + "Unexpected result"); \ + } \ + \ + for (i = 2; i < sizeof(t) * 8; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1), \ + ((t)1) << i, "Unexpected result"); \ + } \ + \ + for (i = 0; i < sizeof(t) * 8 - 1; i++) { \ + assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1), \ + ((t)1) << (i+1), "Unexpected result"); \ + } \ + \ + for (pow2 = 1; pow2 < 25; pow2++) { \ + for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \ + x++) { \ + assert_zu_eq(pow2_ceil_##suf(x), \ + ((t)1) << pow2, \ + "Unexpected result, x=%"pri, x); \ + } \ + } \ +} while (0) + +TEST_BEGIN(test_pow2_ceil_u64) { - unsigned i, pow2; - size_t x; - assert_zu_eq(pow2_ceil(0), 0, "Unexpected result"); + TEST_POW2_CEIL(uint64_t, u64, FMTu64); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i, - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_u32) +{ - for (i = 2; i < sizeof(size_t) * 8; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i, - "Unexpected result"); - } + TEST_POW2_CEIL(uint32_t, u32, FMTu32); +} +TEST_END - for (i = 0; i < sizeof(size_t) * 8 - 1; i++) { - assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1), - "Unexpected result"); - } +TEST_BEGIN(test_pow2_ceil_zu) +{ - for (pow2 = 1; pow2 < 25; pow2++) { - for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) { - assert_zu_eq(pow2_ceil(x), ZU(1) << pow2, - "Unexpected result, x=%zu", x); - } - } + TEST_POW2_CEIL(size_t, zu, "zu"); } TEST_END @@ -286,7 +307,9 @@ main(void) { return (test( - test_pow2_ceil, + test_pow2_ceil_u64, + test_pow2_ceil_u32, + test_pow2_ceil_zu, test_malloc_strtoumax_no_endptr, test_malloc_strtoumax, test_malloc_snprintf_truncated, From 578cd165812a11cd7250bfe5051cddc30ffec6e5 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 18:40:03 -0800 Subject: [PATCH 28/96] Refactor arena_malloc_hard() out of arena_malloc(). --- include/jemalloc/internal/arena.h | 22 ++++++------------- include/jemalloc/internal/private_symbols.txt | 2 +- src/arena.c | 18 ++++++++++++++- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index b6824896..24c4c1d1 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -461,10 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, szind_t ind, - bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, szind_t ind, - bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); @@ -1160,8 +1160,8 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, - bool zero, tcache_t *tcache, bool slow_path) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) { assert(size != 0); @@ -1179,15 +1179,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, assert(size > tcache_maxclass); } - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - if (likely(size <= SMALL_MAXCLASS)) - return (arena_malloc_small(arena, size, ind, zero)); - if (likely(size <= large_maxclass)) - return (arena_malloc_large(arena, size, ind, zero)); - return (huge_malloc(tsd, arena, size, zero, tcache)); + return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache)); } JEMALLOC_ALWAYS_INLINE arena_t * diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index d910202d..87b5a919 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -35,8 +35,8 @@ arena_lg_dirty_mult_default_set arena_lg_dirty_mult_get arena_lg_dirty_mult_set arena_malloc +arena_malloc_hard arena_malloc_large -arena_malloc_small arena_mapbits_allocated_get arena_mapbits_binind_get arena_mapbits_decommitted_get diff --git a/src/arena.c b/src/arena.c index aa787f99..b452df62 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2123,7 +2123,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize) arena_redzones_validate(ptr, bin_info, true); } -void * +static void * arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero) { void *ret; @@ -2236,6 +2236,22 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) return (ret); } +void * +arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, + bool zero, tcache_t *tcache) +{ + + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + if (likely(size <= SMALL_MAXCLASS)) + return (arena_malloc_small(arena, size, ind, zero)); + if (likely(size <= large_maxclass)) + return (arena_malloc_large(arena, size, ind, zero)); + return (huge_malloc(tsd, arena, size, zero, tcache)); +} + /* Only handles large allocations that require more than page alignment. */ static void * arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, From 4985dc681e2e44f9d43c902647371790acac3ad4 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 19:24:58 -0800 Subject: [PATCH 29/96] Refactor arena_ralloc_no_move(). Refactor early return logic in arena_ralloc_no_move() to return early on failure rather than on success. --- src/arena.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/arena.c b/src/arena.c index b452df62..68220d7c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2810,20 +2810,19 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[size2index(oldsize)].reg_size == oldsize); - if ((usize_max <= SMALL_MAXCLASS && - size2index(usize_max) == size2index(oldsize)) || - (size <= oldsize && usize_max >= oldsize)) - return (false); + if ((usize_max > SMALL_MAXCLASS || + size2index(usize_max) != size2index(oldsize)) && + (size > oldsize || usize_max < oldsize)) + return (true); } else { - if (usize_max > SMALL_MAXCLASS) { - if (!arena_ralloc_large(ptr, oldsize, usize_min, - usize_max, zero)) - return (false); - } + if (usize_max <= SMALL_MAXCLASS) + return (true); + if (arena_ralloc_large(ptr, oldsize, usize_min, + usize_max, zero)) + return (true); } - /* Reallocation would require a move. */ - return (true); + return (false); } else { return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, zero)); From db927b672748994bef0df6b5f9e94fe6c1d40d02 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 19:37:10 -0800 Subject: [PATCH 30/96] Refactor arenas_cache tsd. Refactor arenas_cache tsd into arenas_tdata, which is a structure of type arena_tdata_t. --- include/jemalloc/internal/arena.h | 6 + .../jemalloc/internal/jemalloc_internal.h.in | 61 +++++--- include/jemalloc/internal/private_symbols.txt | 8 +- include/jemalloc/internal/tsd.h | 6 +- src/jemalloc.c | 143 ++++++++++-------- src/tsd.c | 4 +- 6 files changed, 139 insertions(+), 89 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 24c4c1d1..2750c008 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -31,6 +31,7 @@ typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -403,6 +404,11 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; }; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + arena_t *arena; +}; #endif /* JEMALLOC_ARENA_STRUCTS_B */ #endif /* JEMALLOC_H_STRUCTS */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 616eb9f3..760dbdda 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -459,16 +459,18 @@ void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); -arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing, + arena_tdata_t *tdata); arena_t *arena_choose_hard(tsd_t *tsd); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); -void arenas_cache_cleanup(tsd_t *tsd); -void narenas_cache_cleanup(tsd_t *tsd); -void arenas_cache_bypass_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void narenas_tdata_cleanup(tsd_t *tsd); +void arenas_tdata_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); @@ -535,6 +537,8 @@ size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, + bool refresh_if_missing); arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, bool refresh_if_missing); #endif @@ -785,32 +789,45 @@ arena_choose(tsd_t *tsd, arena_t *arena) return (ret); } +JEMALLOC_INLINE arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) +{ + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return (arena_tdata_get_hard(tsd, ind)); + } + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or tdata to be + * initialized. + */ + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); + } + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) + return (tdata); + return (arena_tdata_get_hard(tsd, ind)); +} + JEMALLOC_INLINE arena_t * arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, bool refresh_if_missing) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + arena_tdata_t *tdata; /* init_if_missing requires refresh_if_missing. */ assert(!init_if_missing || refresh_if_missing); - if (unlikely(arenas_cache == NULL)) { - /* arenas_cache hasn't been initialized yet. */ - return (arena_get_hard(tsd, ind, init_if_missing)); - } - if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { - /* - * ind is invalid, cache is old (too small), or arena to be - * initialized. - */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, - init_if_missing) : NULL); - } - arena = arenas_cache[ind]; - if (likely(arena != NULL) || !refresh_if_missing) - return (arena); - return (arena_get_hard(tsd, ind, init_if_missing)); + tdata = arena_tdata_get(tsd, ind, refresh_if_missing); + if (unlikely(tdata == NULL || tdata->arena == NULL)) + return (arena_get_hard(tsd, ind, init_if_missing, tdata)); + + return (tdata->arena); } #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 87b5a919..a0e6d8ab 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -93,11 +93,13 @@ arena_redzone_corruption arena_run_regind arena_run_to_miscelm arena_salloc -arenas_cache_bypass_cleanup -arenas_cache_cleanup +arenas_tdata_bypass_cleanup +arenas_tdata_cleanup arena_sdalloc arena_stats_merge arena_tcache_fill_small +arena_tdata_get +arena_tdata_get_hard atomic_add_p atomic_add_u atomic_add_uint32 @@ -311,7 +313,7 @@ map_bias map_misc_offset mb_write mutex_boot -narenas_cache_cleanup +narenas_tdata_cleanup narenas_total_get ncpus nhbins diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h index eed7aa01..16cc2f17 100644 --- a/include/jemalloc/internal/tsd.h +++ b/include/jemalloc/internal/tsd.h @@ -537,9 +537,9 @@ struct tsd_init_head_s { O(thread_deallocated, uint64_t) \ O(prof_tdata, prof_tdata_t *) \ O(arena, arena_t *) \ - O(arenas_cache, arena_t **) \ - O(narenas_cache, unsigned) \ - O(arenas_cache_bypass, bool) \ + O(arenas_tdata, arena_tdata_t *) \ + O(narenas_tdata, unsigned) \ + O(arenas_tdata_bypass, bool) \ O(tcache_enabled, tcache_enabled_t) \ O(quarantine, quarantine_t *) \ diff --git a/src/jemalloc.c b/src/jemalloc.c index 8415c0e2..d2b2afce 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -516,74 +516,99 @@ arena_unbind(tsd_t *tsd, unsigned ind) tsd_arena_set(tsd, NULL); } -arena_t * -arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing) +arena_tdata_t * +arena_tdata_get_hard(tsd_t *tsd, unsigned ind) { - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - unsigned narenas_cache = tsd_narenas_cache_get(tsd); + arena_tdata_t *tdata, *arenas_tdata_old; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + unsigned narenas_tdata_old, i; + unsigned narenas_tdata = tsd_narenas_tdata_get(tsd); unsigned narenas_actual = narenas_total_get(); - /* Deallocate old cache if it's too small. */ - if (arenas_cache != NULL && narenas_cache < narenas_actual) { - a0dalloc(arenas_cache); - arenas_cache = NULL; - narenas_cache = 0; - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + /* + * Dissociate old tdata array (and set up for deallocation upon return) + * if it's too small. + */ + if (arenas_tdata != NULL && narenas_tdata < narenas_actual) { + arenas_tdata_old = arenas_tdata; + narenas_tdata_old = narenas_tdata; + arenas_tdata = NULL; + narenas_tdata = 0; + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); + } else { + arenas_tdata_old = NULL; + narenas_tdata_old = 0; } - /* Allocate cache if it's missing. */ - if (arenas_cache == NULL) { - bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd); - assert(ind < narenas_actual || !init_if_missing); - narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1; + /* Allocate tdata array if it's missing. */ + if (arenas_tdata == NULL) { + bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd); + narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1; - if (tsd_nominal(tsd) && !*arenas_cache_bypassp) { - *arenas_cache_bypassp = true; - arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) * - narenas_cache); - *arenas_cache_bypassp = false; + if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) { + *arenas_tdata_bypassp = true; + arenas_tdata = (arena_tdata_t *)a0malloc( + sizeof(arena_tdata_t) * narenas_tdata); + *arenas_tdata_bypassp = false; } - if (arenas_cache == NULL) { - /* - * This function must always tell the truth, even if - * it's slow, so don't let OOM, thread cleanup (note - * tsd_nominal check), nor recursive allocation - * avoidance (note arenas_cache_bypass check) get in the - * way. - */ - if (ind >= narenas_actual) - return (NULL); - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - return (arena); + if (arenas_tdata == NULL) { + tdata = NULL; + goto label_return; } - assert(tsd_nominal(tsd) && !*arenas_cache_bypassp); - tsd_arenas_cache_set(tsd, arenas_cache); - tsd_narenas_cache_set(tsd, narenas_cache); + assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp); + tsd_arenas_tdata_set(tsd, arenas_tdata); + tsd_narenas_tdata_set(tsd, narenas_tdata); } /* - * Copy to cache. It's possible that the actual number of arenas has - * increased since narenas_total_get() was called above, but that causes - * no correctness issues unless two threads concurrently execute the - * arenas.extend mallctl, which we trust mallctl synchronization to + * Copy to tdata array. It's possible that the actual number of arenas + * has increased since narenas_total_get() was called above, but that + * causes no correctness issues unless two threads concurrently execute + * the arenas.extend mallctl, which we trust mallctl synchronization to * prevent. */ malloc_mutex_lock(&arenas_lock); - memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual); + for (i = 0; i < narenas_actual; i++) + arenas_tdata[i].arena = arenas[i]; malloc_mutex_unlock(&arenas_lock); - if (narenas_cache > narenas_actual) { - memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) * - (narenas_cache - narenas_actual)); + if (narenas_tdata > narenas_actual) { + memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) + * (narenas_tdata - narenas_actual)); } - /* Read the refreshed cache, and init the arena if necessary. */ - arena = arenas_cache[ind]; - if (init_if_missing && arena == NULL) - arena = arenas_cache[ind] = arena_init(ind); + /* Read the refreshed tdata array. */ + tdata = &arenas_tdata[ind]; +label_return: + if (arenas_tdata_old != NULL) + a0dalloc(arenas_tdata_old); + return (tdata); +} + +arena_t * +arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing, + arena_tdata_t *tdata) +{ + arena_t *arena; + unsigned narenas_actual; + + if (init_if_missing && tdata != NULL) { + tdata->arena = arena_init(ind); + if (tdata->arena != NULL) + return (tdata->arena); + } + + /* + * This function must always tell the truth, even if it's slow, so don't + * let OOM, thread cleanup (note tsd_nominal check), nor recursive + * allocation avoidance (note arenas_tdata_bypass check) get in the way. + */ + narenas_actual = narenas_total_get(); + if (ind >= narenas_actual) + return (NULL); + malloc_mutex_lock(&arenas_lock); + arena = arenas[ind]; + malloc_mutex_unlock(&arenas_lock); return (arena); } @@ -674,26 +699,26 @@ arena_cleanup(tsd_t *tsd) } void -arenas_cache_cleanup(tsd_t *tsd) +arenas_tdata_cleanup(tsd_t *tsd) { - arena_t **arenas_cache; + arena_tdata_t *arenas_tdata; - arenas_cache = tsd_arenas_cache_get(tsd); - if (arenas_cache != NULL) { - tsd_arenas_cache_set(tsd, NULL); - a0dalloc(arenas_cache); + arenas_tdata = tsd_arenas_tdata_get(tsd); + if (arenas_tdata != NULL) { + tsd_arenas_tdata_set(tsd, NULL); + a0dalloc(arenas_tdata); } } void -narenas_cache_cleanup(tsd_t *tsd) +narenas_tdata_cleanup(tsd_t *tsd) { /* Do nothing. */ } void -arenas_cache_bypass_cleanup(tsd_t *tsd) +arenas_tdata_bypass_cleanup(tsd_t *tsd) { /* Do nothing. */ diff --git a/src/tsd.c b/src/tsd.c index 9ffe9afe..b85b8b9d 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -113,7 +113,7 @@ malloc_tsd_boot0(void) ncleanups = 0; if (tsd_boot0()) return (true); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = true; + *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true; return (false); } @@ -122,7 +122,7 @@ malloc_tsd_boot1(void) { tsd_boot1(); - *tsd_arenas_cache_bypassp_get(tsd_fetch()) = false; + *tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false; } #ifdef _WIN32 From 1a4ad3c0fab470c9a720a40c4433532d98bd9adc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 19:51:23 -0800 Subject: [PATCH 31/96] Refactor out arena_compute_npurge(). Refactor out arena_compute_npurge() by integrating its logic into arena_stash_dirty() as an incremental computation. --- src/arena.c | 92 +++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 49 deletions(-) diff --git a/src/arena.c b/src/arena.c index 68220d7c..47b136b6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -23,7 +23,7 @@ unsigned nhclasses; /* Number of huge size classes. */ * definition. */ -static void arena_purge(arena_t *arena, bool all); +static void arena_purge_to_limit(arena_t *arena, size_t ndirty_limit); static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, bool decommitted); static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, @@ -1205,16 +1205,14 @@ arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) return (false); } -void -arena_maybe_purge(arena_t *arena) +static void +arena_maybe_purge_ratio(arena_t *arena) { /* Don't purge if the option is disabled. */ if (arena->lg_dirty_mult < 0) return; - /* Don't recursively purge. */ - if (arena->purging) - return; + /* * Iterate, since preventing recursive purging could otherwise leave too * many dirty pages. @@ -1229,10 +1227,21 @@ arena_maybe_purge(arena_t *arena) */ if (arena->ndirty <= threshold) return; - arena_purge(arena, false); + arena_purge_to_limit(arena, threshold); } } +void +arena_maybe_purge(arena_t *arena) +{ + + /* Don't recursively purge. */ + if (arena->purging) + return; + + arena_maybe_purge_ratio(arena); +} + static size_t arena_dirty_count(arena_t *arena) { @@ -1268,35 +1277,15 @@ arena_dirty_count(arena_t *arena) } static size_t -arena_compute_npurge(arena_t *arena, bool all) -{ - size_t npurge; - - /* - * Compute the minimum number of pages that this thread should try to - * purge. - */ - if (!all) { - size_t threshold = (arena->nactive >> arena->lg_dirty_mult); - threshold = threshold < chunk_npages ? chunk_npages : threshold; - - npurge = arena->ndirty - threshold; - } else - npurge = arena->ndirty; - - return (npurge); -} - -static size_t -arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, - size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel, +arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, + size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel, extent_node_t *purge_chunks_sentinel) { arena_runs_dirty_link_t *rdelm, *rdelm_next; extent_node_t *chunkselm; size_t nstashed = 0; - /* Stash at least npurge pages. */ + /* Stash runs/chunks according to ndirty_limit. */ for (rdelm = qr_next(&arena->runs_dirty, rd_link), chunkselm = qr_next(&arena->chunks_cache, cc_link); rdelm != &arena->runs_dirty; rdelm = rdelm_next) { @@ -1308,6 +1297,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, bool zero; UNUSED void *chunk; + npages = extent_node_size_get(chunkselm) >> LG_PAGE; + chunkselm_next = qr_next(chunkselm, cc_link); /* * Allocate. chunkselm remains valid due to the @@ -1322,7 +1313,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, assert(zero == extent_node_zeroed_get(chunkselm)); extent_node_dirty_insert(chunkselm, purge_runs_sentinel, purge_chunks_sentinel); - npages = extent_node_size_get(chunkselm) >> LG_PAGE; + assert(npages == (extent_node_size_get(chunkselm) >> + LG_PAGE)); chunkselm = chunkselm_next; } else { arena_chunk_t *chunk = @@ -1360,7 +1352,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all, } nstashed += npages; - if (!all && nstashed >= npurge) + if (arena->ndirty - nstashed <= ndirty_limit) break; } @@ -1501,10 +1493,10 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, } static void -arena_purge(arena_t *arena, bool all) +arena_purge_to_limit(arena_t *arena, size_t ndirty_limit) { chunk_hooks_t chunk_hooks = chunk_hooks_get(arena); - size_t npurge, npurgeable, npurged; + size_t npurge, npurged; arena_runs_dirty_link_t purge_runs_sentinel; extent_node_t purge_chunks_sentinel; @@ -1518,24 +1510,26 @@ arena_purge(arena_t *arena, bool all) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all); + assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || + ndirty_limit == 0); + + qr_new(&purge_runs_sentinel, rd_link); + extent_node_dirty_linkage_init(&purge_chunks_sentinel); + + npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit, + &purge_runs_sentinel, &purge_chunks_sentinel); + if (npurge == 0) + goto label_return; + npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); + assert(npurged == npurge); + arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, + &purge_chunks_sentinel); if (config_stats) arena->stats.npurge++; - npurge = arena_compute_npurge(arena, all); - qr_new(&purge_runs_sentinel, rd_link); - extent_node_dirty_linkage_init(&purge_chunks_sentinel); - - npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge, - &purge_runs_sentinel, &purge_chunks_sentinel); - assert(npurgeable >= npurge); - npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - assert(npurged == npurgeable); - arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel, - &purge_chunks_sentinel); - +label_return: arena->purging = false; } @@ -1544,7 +1538,7 @@ arena_purge_all(arena_t *arena) { malloc_mutex_lock(&arena->lock); - arena_purge(arena, true); + arena_purge_to_limit(arena, 0); malloc_mutex_unlock(&arena->lock); } From 8e82af1166242bebd29289d2b16ce447273b427a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 6 Feb 2016 00:46:19 -0800 Subject: [PATCH 32/96] Implement smoothstep table generation. Check in a generated smootherstep table as smoothstep.h rather than generating it at configure time, since not all systems (e.g. Windows) have dc. --- Makefile.in | 1 + .../jemalloc/internal/jemalloc_internal.h.in | 4 + include/jemalloc/internal/smoothstep.h | 246 ++++++++++++++++++ include/jemalloc/internal/smoothstep.sh | 115 ++++++++ test/unit/smoothstep.c | 106 ++++++++ 5 files changed, 472 insertions(+) create mode 100644 include/jemalloc/internal/smoothstep.h create mode 100755 include/jemalloc/internal/smoothstep.sh create mode 100644 test/unit/smoothstep.c diff --git a/Makefile.in b/Makefile.in index 6b210fee..9530aa8e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -144,6 +144,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/rtree.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ + $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ $(srcroot)test/unit/ticker.c \ $(srcroot)test/unit/time.c \ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 760dbdda..e84c4357 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -364,6 +364,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -394,6 +395,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -483,6 +485,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -513,6 +516,7 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/ticker.h" #include "jemalloc/internal/ckh.h" #include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h new file mode 100644 index 00000000..c5333cca --- /dev/null +++ b/include/jemalloc/internal/smoothstep.h @@ -0,0 +1,246 @@ +/* + * This file was generated by the following command: + * sh smoothstep.sh smoother 200 24 3 15 + */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "smoother" +#define SMOOTHSTEP_NSTEPS 200 +#define SMOOTHSTEP_BFP 24 +#define SMOOTHSTEP \ + /* STEP(step, h, x, y) */ \ + STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ + STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ + STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ + STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ + STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ + STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ + STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ + STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ + STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ + STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ + STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ + STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ + STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ + STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ + STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ + STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ + STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ + STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ + STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ + STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ + STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ + STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ + STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ + STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ + STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ + STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ + STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ + STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ + STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ + STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ + STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ + STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ + STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ + STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ + STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ + STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ + STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ + STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ + STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ + STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ + STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ + STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ + STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ + STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ + STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ + STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ + STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ + STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ + STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ + STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ + STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ + STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ + STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ + STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ + STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ + STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ + STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ + STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ + STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ + STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ + STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ + STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ + STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ + STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ + STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ + STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ + STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ + STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ + STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ + STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ + STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ + STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ + STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ + STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ + STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ + STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ + STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ + STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ + STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ + STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ + STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ + STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ + STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ + STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ + STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ + STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ + STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ + STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ + STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ + STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ + STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ + STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ + STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ + STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ + STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ + STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ + STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ + STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ + STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ + STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ + STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ + STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ + STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ + STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ + STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ + STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ + STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ + STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ + STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ + STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ + STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ + STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ + STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ + STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ + STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ + STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ + STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ + STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ + STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ + STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ + STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ + STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ + STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ + STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ + STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ + STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ + STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ + STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ + STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ + STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ + STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ + STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ + STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ + STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ + STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ + STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ + STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ + STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ + STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ + STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ + STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ + STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ + STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ + STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ + STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ + STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ + STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ + STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ + STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ + STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ + STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ + STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ + STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ + STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ + STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ + STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ + STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ + STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ + STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ + STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ + STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ + STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ + STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ + STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ + STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ + STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ + STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ + STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ + STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ + STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ + STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ + STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ + STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ + STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ + STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ + STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ + STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ + STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ + STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ + STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ + STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ + STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ + STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ + STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ + STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ + STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ + STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ + STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ + STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ + STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ + STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ + STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ + STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ + STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ + STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ + STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ + STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ + STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ + STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ + STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh new file mode 100755 index 00000000..8124693f --- /dev/null +++ b/include/jemalloc/internal/smoothstep.sh @@ -0,0 +1,115 @@ +#!/bin/sh +# +# Generate a discrete lookup table for a sigmoid function in the smoothstep +# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table +# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps]. Encode +# the entries using a binary fixed point representation. +# +# Usage: smoothstep.sh +# +# is in {smooth, smoother, smoothest}. +# must be greater than zero. +# must be in [0..62]; reasonable values are roughly [10..30]. +# is x decimal precision. +# is y decimal precision. + +#set -x + +cmd="sh smoothstep.sh $*" +variant=$1 +nsteps=$2 +bfp=$3 +xprec=$4 +yprec=$5 + +case "${variant}" in + smooth) + ;; + smoother) + ;; + smoothest) + ;; + *) + echo "Unsupported variant" + exit 1 + ;; +esac + +smooth() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoother() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoothest() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +cat <= SMOOTHSTEP_NSTEPS / 2; i--) { + uint64_t h = smoothstep_tab[i]; + uint64_t delta = prev_h - h; + assert_u64_ge(delta, prev_delta, + "Slope must monotonically decrease in 0.5 <= x <= 1.0, " + "i=%u", i); + prev_h = h; + prev_delta = delta; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_smoothstep_integral, + test_smoothstep_monotonic, + test_smoothstep_slope)); +} From 243f7a0508bb014c2a7bf592c466a923911db234 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 19 Feb 2016 20:09:31 -0800 Subject: [PATCH 33/96] Implement decay-based unused dirty page purging. This is an alternative to the existing ratio-based unused dirty page purging, and is intended to eventually become the sole purging mechanism. Add mallctls: - opt.purge - opt.decay_time - arena..decay - arena..decay_time - arenas.decay_time - stats.arenas..decay_time This resolves #325. --- Makefile.in | 11 +- doc/jemalloc.xml.in | 95 ++++- include/jemalloc/internal/arena.h | 119 +++++- include/jemalloc/internal/ctl.h | 1 + include/jemalloc/internal/huge.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 22 +- include/jemalloc/internal/private_symbols.txt | 12 +- include/jemalloc/internal/tcache.h | 2 +- include/jemalloc/internal/time.h | 5 + src/arena.c | 327 +++++++++++++++- src/ctl.c | 166 ++++++-- src/huge.c | 25 +- src/jemalloc.c | 53 ++- src/stats.c | 60 ++- src/tcache.c | 4 +- src/time.c | 9 + test/unit/decay.c | 370 ++++++++++++++++++ test/unit/mallctl.c | 95 +++++ 18 files changed, 1268 insertions(+), 112 deletions(-) create mode 100644 test/unit/decay.c diff --git a/Makefile.in b/Makefile.in index 9530aa8e..e5681926 100644 --- a/Makefile.in +++ b/Makefile.in @@ -121,6 +121,7 @@ C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/decay.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ $(srcroot)test/unit/junk_alloc.c \ @@ -354,18 +355,22 @@ stress_dir: check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) check_integration_prof: tests_integration check_integration_dir ifeq ($(enable_prof), 1) $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) endif +check_integration_decay: tests_integration check_integration_dir + $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir check_integration_prof - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +check: check_unit check_integration check_integration_decay check_integration_prof ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 48765b01..0ced0aaa 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -949,6 +949,20 @@ for (i = 0; i < nbins; i++) { number of CPUs, or one if there is a single CPU. + + + opt.purge + (const char *) + r- + + Purge mode is “ratio” (default) or + “decay”. See opt.lg_dirty_mult + for details of the ratio mode. See opt.decay_time for + details of the decay mode. + + opt.lg_dirty_mult @@ -971,6 +985,26 @@ for (i = 0; i < nbins; i++) { for related dynamic control options. + + + opt.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. The pages are incrementally purged according to a + sigmoidal decay curve that starts and ends with zero purge rate. A + decay time of 0 causes all unused dirty pages to be purged immediately + upon creation. A decay time of -1 disables purging. The default decay + time is 10 seconds. See arenas.decay_time + and arena.<i>.decay_time + for related dynamic control options. + + + opt.stats_print @@ -1501,12 +1535,27 @@ malloc_conf = "xmalloc:true";]]> (void) -- - Purge unused dirty pages for arena <i>, or for + Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals arenas.narenas. + + + arena.<i>.decay + (void) + -- + + Trigger decay-based purging of unused dirty pages for + arena <i>, or for all arenas if <i> equals arenas.narenas. + The proportion of unused dirty pages to be purged depends on the current + time; see opt.decay_time for + details. + + arena.<i>.dss @@ -1535,6 +1584,22 @@ malloc_conf = "xmalloc:true";]]> for additional information. + + + arena.<i>.decay_time + (ssize_t) + rw + + Current per-arena approximate time in seconds from the + creation of a set of unused dirty pages until an equivalent set of + unused dirty pages is purged and/or reused. Each time this interface is + set, all currently unused dirty pages are considered to have fully + decayed, which causes immediate purging of all unused dirty pages unless + the decay time is set to -1 (i.e. purging disabled). See opt.decay_time for + additional information. + + arena.<i>.chunk_hooks @@ -1769,6 +1834,21 @@ typedef struct { for additional information. + + + arenas.decay_time + (ssize_t) + rw + + Current default per-arena approximate time in seconds + from the creation of a set of unused dirty pages until an equivalent set + of unused dirty pages is purged and/or reused, used to initialize arena.<i>.decay_time + during arena creation. See opt.decay_time for + additional information. + + arenas.quantum @@ -2113,6 +2193,19 @@ typedef struct { for details. + + + stats.arenas.<i>.decay_time + (ssize_t) + r- + + Approximate time in seconds from the creation of a set + of unused dirty pages until an equivalent set of unused dirty pages is + purged and/or reused. See opt.decay_time + for details. + + stats.arenas.<i>.nthreads diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2750c008..76d3be19 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -23,6 +23,18 @@ */ #define LG_DIRTY_MULT_DEFAULT 3 +typedef enum { + purge_mode_ratio = 0, + purge_mode_decay = 1, + + purge_mode_limit = 2 +} purge_mode_t; +#define PURGE_DEFAULT purge_mode_ratio +/* Default decay time in seconds. */ +#define DECAY_TIME_DEFAULT 10 +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_run_s arena_run_t; typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; @@ -325,7 +337,7 @@ struct arena_s { /* Minimum ratio (log base 2) of nactive:ndirty. */ ssize_t lg_dirty_mult; - /* True if a thread is currently executing arena_purge(). */ + /* True if a thread is currently executing arena_purge_to_limit(). */ bool purging; /* Number of pages in active runs and huge regions. */ @@ -376,6 +388,53 @@ struct arena_s { arena_runs_dirty_link_t runs_dirty; extent_node_t chunks_cache; + /* + * Approximate time in seconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + ssize_t decay_time; + /* decay_time / SMOOTHSTEP_NSTEPS. */ + struct timespec decay_interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + struct timespec decay_epoch; + /* decay_deadline randomness generator. */ + uint64_t decay_jitter_state; + /* + * Deadline for current epoch. This is the sum of decay_interval and + * per epoch jitter which is a uniform random variable in + * [0..decay_interval). Epochs always advance by precise multiples of + * decay_interval, but we randomize the deadline to reduce the + * likelihood of arenas purging in lockstep. + */ + struct timespec decay_deadline; + /* + * Number of dirty pages at beginning of current epoch. During epoch + * advancement we use the delta between decay_ndirty and ndirty to + * determine how many dirty pages, if any, were generated, and record + * the result in decay_backlog. + */ + size_t decay_ndirty; + /* + * Memoized result of arena_decay_backlog_npages_limit() corresponding + * to the current contents of decay_backlog, i.e. the limit on how many + * pages are allowed to exist for the decay epochs. + */ + size_t decay_backlog_npages_limit; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to decay_epoch. + */ + size_t decay_backlog[SMOOTHSTEP_NSTEPS]; + /* Extant huge allocations. */ ql_head(extent_node_t) huge; /* Synchronizes all huge allocation/update/deallocation. */ @@ -408,6 +467,7 @@ struct arena_s { /* Used in conjunction with tsd for fast arena-related context lookup. */ struct arena_tdata_s { arena_t *arena; + ticker_t decay_ticker; }; #endif /* JEMALLOC_ARENA_STRUCTS_B */ @@ -423,7 +483,10 @@ static const size_t large_pad = #endif ; +extern purge_mode_t opt_purge; +extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; +extern ssize_t opt_decay_time; extern arena_bin_info_t arena_bin_info[NBINS]; @@ -451,9 +514,11 @@ bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, size_t usize, bool *zero); ssize_t arena_lg_dirty_mult_get(arena_t *arena); bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); +ssize_t arena_decay_time_get(arena_t *arena); +bool arena_decay_time_set(arena_t *arena, ssize_t decay_time); void arena_maybe_purge(arena_t *arena); -void arena_purge_all(arena_t *arena); -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, +void arena_purge(arena_t *arena, bool all); +void arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); @@ -467,7 +532,7 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_large(arena_t *arena, size_t size, +void *arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero); void *arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, tcache_t *tcache); @@ -478,8 +543,8 @@ void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind); +void arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; @@ -488,12 +553,13 @@ void arena_dalloc_junk_large(void *ptr, size_t usize); #endif void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, + void *ptr); #ifdef JEMALLOC_JET typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif -bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, +bool arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache); @@ -501,9 +567,11 @@ dss_prec_t arena_dss_prec_get(arena_t *arena); bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); +ssize_t arena_decay_time_default_get(void); +bool arena_decay_time_default_set(ssize_t decay_time); void arena_stats_merge(arena_t *arena, const char **dss, - ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); arena_t *arena_new(unsigned ind); bool arena_boot(void); @@ -566,6 +634,8 @@ prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); void arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, prof_tctx_t *old_tctx); +void arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks); +void arena_decay_tick(tsd_t *tsd, arena_t *arena); void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool slow_path); arena_t *arena_aalloc(const void *ptr); @@ -1165,6 +1235,27 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, } } +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks) +{ + ticker_t *decay_ticker; + + if (unlikely(tsd == NULL)) + return; + decay_ticker = decay_ticker_get(tsd, arena->ind); + if (unlikely(decay_ticker == NULL)) + return; + if (unlikely(ticker_ticks(decay_ticker, nticks))) + arena_purge(arena, false); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsd_t *tsd, arena_t *arena) +{ + + arena_decay_ticks(tsd, arena, 1); +} + JEMALLOC_ALWAYS_INLINE void * arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, tcache_t *tcache, bool slow_path) @@ -1271,7 +1362,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) tcache_dalloc_small(tsd, tcache, ptr, binind, slow_path); } else { - arena_dalloc_small(extent_node_arena_get( + arena_dalloc_small(tsd, extent_node_arena_get( &chunk->node), chunk, ptr, pageind); } } else { @@ -1286,7 +1377,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) tcache_dalloc_large(tsd, tcache, ptr, size - large_pad, slow_path); } else { - arena_dalloc_large(extent_node_arena_get( + arena_dalloc_large(tsd, extent_node_arena_get( &chunk->node), chunk, ptr); } } @@ -1326,7 +1417,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) } else { size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(extent_node_arena_get( + arena_dalloc_small(tsd, extent_node_arena_get( &chunk->node), chunk, ptr, pageind); } } else { @@ -1337,7 +1428,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) tcache_dalloc_large(tsd, tcache, ptr, size, true); } else { - arena_dalloc_large(extent_node_arena_get( + arena_dalloc_large(tsd, extent_node_arena_get( &chunk->node), chunk, ptr); } } diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 751c14b5..9add3ed9 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -35,6 +35,7 @@ struct ctl_arena_stats_s { unsigned nthreads; const char *dss; ssize_t lg_dirty_mult; + ssize_t decay_time; size_t pactive; size_t pdirty; arena_stats_t astats; diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index ece7af98..68d3789f 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -13,8 +13,8 @@ void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, tcache_t *tcache); void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, bool zero, tcache_t *tcache); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, - size_t usize_max, bool zero); +bool huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, + size_t usize_min, size_t usize_max, bool zero); void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index e84c4357..3b2f75d6 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -545,6 +545,7 @@ arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, bool refresh_if_missing); +ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -833,6 +834,17 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, return (tdata->arena); } + +JEMALLOC_INLINE ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) +{ + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) + return (NULL); + return (&tdata->decay_ticker); +} #endif #include "jemalloc/internal/bitmap.h" @@ -883,8 +895,8 @@ void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero); -bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); +bool ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -1150,8 +1162,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, - bool zero) +ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) { assert(ptr != NULL); @@ -1163,7 +1175,7 @@ ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, return (true); } - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); + return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero)); } #endif diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index a0e6d8ab..95ddf0c8 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -25,6 +25,12 @@ arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_junked_locked arena_dalloc_small +arena_decay_time_default_get +arena_decay_time_default_set +arena_decay_time_get +arena_decay_time_set +arena_decay_tick +arena_decay_ticks arena_dss_prec_get arena_dss_prec_set arena_get @@ -83,7 +89,7 @@ arena_prof_tctx_get arena_prof_tctx_reset arena_prof_tctx_set arena_ptr_small_binind_get -arena_purge_all +arena_purge arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large @@ -185,6 +191,7 @@ ctl_nametomib ctl_postfork_child ctl_postfork_parent ctl_prefork +decay_ticker_get dss_prec_names extent_node_achunk_get extent_node_achunk_set @@ -318,6 +325,7 @@ narenas_total_get ncpus nhbins opt_abort +opt_decay_time opt_dss opt_junk opt_junk_alloc @@ -336,6 +344,7 @@ opt_prof_gdump opt_prof_leak opt_prof_prefix opt_prof_thread_active_init +opt_purge opt_quarantine opt_redzone opt_stats_print @@ -397,6 +406,7 @@ prof_thread_active_init_set prof_thread_active_set prof_thread_name_get prof_thread_name_set +purge_mode_names quarantine quarantine_alloc_hook quarantine_alloc_hook_work diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index c64f5d34..09935c36 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -361,7 +361,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, usize = index2size(binind); assert(usize <= tcache_maxclass); - ret = arena_malloc_large(arena, usize, binind, zero); + ret = arena_malloc_large(tsd, arena, usize, binind, zero); if (ret == NULL) return (NULL); } else { diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h index a290f386..dd1dd5bd 100644 --- a/include/jemalloc/internal/time.h +++ b/include/jemalloc/internal/time.h @@ -26,7 +26,12 @@ void time_imultiply(struct timespec *time, uint64_t multiplier); void time_idivide(struct timespec *time, uint64_t divisor); uint64_t time_divide(const struct timespec *time, const struct timespec *divisor); +#ifdef JEMALLOC_JET +typedef bool (time_update_t)(struct timespec *); +extern time_update_t *time_update; +#else bool time_update(struct timespec *time); +#endif #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/src/arena.c b/src/arena.c index 47b136b6..b1078ae9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,8 +4,17 @@ /******************************************************************************/ /* Data. */ +purge_mode_t opt_purge = PURGE_DEFAULT; +const char *purge_mode_names[] = { + "ratio", + "decay", + "N/A" +}; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; static ssize_t lg_dirty_mult_default; +ssize_t opt_decay_time = DECAY_TIME_DEFAULT; +static ssize_t decay_time_default; + arena_bin_info_t arena_bin_info[NBINS]; size_t map_bias; @@ -1205,10 +1214,193 @@ arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult) return (false); } +static void +arena_decay_deadline_init(arena_t *arena) +{ + + assert(opt_purge == purge_mode_decay); + + /* + * Generate a new deadline that is uniformly random within the next + * epoch after the current one. + */ + time_copy(&arena->decay_deadline, &arena->decay_epoch); + time_add(&arena->decay_deadline, &arena->decay_interval); + if (arena->decay_time > 0) { + uint64_t decay_interval_ns, r; + struct timespec jitter; + + decay_interval_ns = time_sec(&arena->decay_interval) * + 1000000000 + time_nsec(&arena->decay_interval); + r = prng_range(&arena->decay_jitter_state, decay_interval_ns); + time_init(&jitter, r / 1000000000, r % 1000000000); + time_add(&arena->decay_deadline, &jitter); + } +} + +static bool +arena_decay_deadline_reached(const arena_t *arena, const struct timespec *time) +{ + + assert(opt_purge == purge_mode_decay); + + return (time_compare(&arena->decay_deadline, time) <= 0); +} + +static size_t +arena_decay_backlog_npages_limit(const arena_t *arena) +{ + static const uint64_t h_steps[] = { +#define STEP(step, h, x, y) \ + h, + SMOOTHSTEP +#undef STEP + }; + uint64_t sum; + size_t npages_limit_backlog; + unsigned i; + + assert(opt_purge == purge_mode_decay); + + /* + * For each element of decay_backlog, multiply by the corresponding + * fixed-point smoothstep decay factor. Sum the products, then divide + * to round down to the nearest whole number of pages. + */ + sum = 0; + for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) + sum += arena->decay_backlog[i] * h_steps[i]; + npages_limit_backlog = (sum >> SMOOTHSTEP_BFP); + + return (npages_limit_backlog); +} + +static void +arena_decay_epoch_advance(arena_t *arena, const struct timespec *time) +{ + uint64_t nadvance; + struct timespec delta; + size_t ndirty_delta; + + assert(opt_purge == purge_mode_decay); + assert(arena_decay_deadline_reached(arena, time)); + + time_copy(&delta, time); + time_subtract(&delta, &arena->decay_epoch); + nadvance = time_divide(&delta, &arena->decay_interval); + assert(nadvance > 0); + + /* Add nadvance decay intervals to epoch. */ + time_copy(&delta, &arena->decay_interval); + time_imultiply(&delta, nadvance); + time_add(&arena->decay_epoch, &delta); + + /* Set a new deadline. */ + arena_decay_deadline_init(arena); + + /* Update the backlog. */ + if (nadvance >= SMOOTHSTEP_NSTEPS) { + memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) * + sizeof(size_t)); + } else { + memmove(arena->decay_backlog, &arena->decay_backlog[nadvance], + (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t)); + if (nadvance > 1) { + memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS - + nadvance], 0, (nadvance-1) * sizeof(size_t)); + } + } + ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty - + arena->decay_ndirty : 0; + arena->decay_ndirty = arena->ndirty; + arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta; + arena->decay_backlog_npages_limit = + arena_decay_backlog_npages_limit(arena); +} + +static size_t +arena_decay_npages_limit(arena_t *arena) +{ + size_t npages_limit; + + assert(opt_purge == purge_mode_decay); + + npages_limit = arena->decay_backlog_npages_limit; + + /* Add in any dirty pages created during the current epoch. */ + if (arena->ndirty > arena->decay_ndirty) + npages_limit += arena->ndirty - arena->decay_ndirty; + + return (npages_limit); +} + +static void +arena_decay_init(arena_t *arena, ssize_t decay_time) +{ + + arena->decay_time = decay_time; + if (decay_time > 0) { + time_init(&arena->decay_interval, decay_time, 0); + time_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); + } + + time_init(&arena->decay_epoch, 0, 0); + time_update(&arena->decay_epoch); + arena->decay_jitter_state = (uint64_t)(uintptr_t)arena; + arena_decay_deadline_init(arena); + arena->decay_ndirty = arena->ndirty; + arena->decay_backlog_npages_limit = 0; + memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); +} + +static bool +arena_decay_time_valid(ssize_t decay_time) +{ + + return (decay_time >= -1 && decay_time <= TIME_SEC_MAX); +} + +ssize_t +arena_decay_time_get(arena_t *arena) +{ + ssize_t decay_time; + + malloc_mutex_lock(&arena->lock); + decay_time = arena->decay_time; + malloc_mutex_unlock(&arena->lock); + + return (decay_time); +} + +bool +arena_decay_time_set(arena_t *arena, ssize_t decay_time) +{ + + if (!arena_decay_time_valid(decay_time)) + return (true); + + malloc_mutex_lock(&arena->lock); + /* + * Restart decay backlog from scratch, which may cause many dirty pages + * to be immediately purged. It would conceptually be possible to map + * the old backlog onto the new backlog, but there is no justification + * for such complexity since decay_time changes are intended to be + * infrequent, either between the {-1, 0, >0} states, or a one-time + * arbitrary change during initial arena configuration. + */ + arena_decay_init(arena, decay_time); + arena_maybe_purge(arena); + malloc_mutex_unlock(&arena->lock); + + return (false); +} + static void arena_maybe_purge_ratio(arena_t *arena) { + assert(opt_purge == purge_mode_ratio); + /* Don't purge if the option is disabled. */ if (arena->lg_dirty_mult < 0) return; @@ -1231,6 +1423,41 @@ arena_maybe_purge_ratio(arena_t *arena) } } +static void +arena_maybe_purge_decay(arena_t *arena) +{ + struct timespec time; + size_t ndirty_limit; + + assert(opt_purge == purge_mode_decay); + + /* Purge all or nothing if the option is disabled. */ + if (arena->decay_time <= 0) { + if (arena->decay_time == 0) + arena_purge_to_limit(arena, 0); + return; + } + + time_copy(&time, &arena->decay_epoch); + if (unlikely(time_update(&time))) { + /* Time went backwards. Force an epoch advance. */ + time_copy(&time, &arena->decay_deadline); + } + + if (arena_decay_deadline_reached(arena, &time)) + arena_decay_epoch_advance(arena, &time); + + ndirty_limit = arena_decay_npages_limit(arena); + + /* + * Don't try to purge unless the number of purgeable pages exceeds the + * current limit. + */ + if (arena->ndirty <= ndirty_limit) + return; + arena_purge_to_limit(arena, ndirty_limit); +} + void arena_maybe_purge(arena_t *arena) { @@ -1239,7 +1466,10 @@ arena_maybe_purge(arena_t *arena) if (arena->purging) return; - arena_maybe_purge_ratio(arena); + if (opt_purge == purge_mode_ratio) + arena_maybe_purge_ratio(arena); + else + arena_maybe_purge_decay(arena); } static size_t @@ -1298,6 +1528,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, UNUSED void *chunk; npages = extent_node_size_get(chunkselm) >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; chunkselm_next = qr_next(chunkselm, cc_link); /* @@ -1327,6 +1560,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, arena_mapbits_unallocated_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + if (opt_purge == purge_mode_decay && arena->ndirty - + (nstashed + npages) < ndirty_limit) + break; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == @@ -1352,7 +1588,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, } nstashed += npages; - if (arena->ndirty - nstashed <= ndirty_limit) + if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <= + ndirty_limit) break; } @@ -1492,6 +1729,15 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks, } } +/* + * NB: ndirty_limit is interpreted differently depending on opt_purge: + * - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the + * desired state: + * (arena->ndirty <= ndirty_limit) + * - purge_mode_decay: Purge as many dirty runs/chunks as possible without + * violating the invariant: + * (arena->ndirty >= ndirty_limit) + */ static void arena_purge_to_limit(arena_t *arena, size_t ndirty_limit) { @@ -1510,8 +1756,8 @@ arena_purge_to_limit(arena_t *arena, size_t ndirty_limit) size_t ndirty = arena_dirty_count(arena); assert(ndirty == arena->ndirty); } - assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || - ndirty_limit == 0); + assert(opt_purge != purge_mode_ratio || (arena->nactive >> + arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0); qr_new(&purge_runs_sentinel, rd_link); extent_node_dirty_linkage_init(&purge_chunks_sentinel); @@ -1534,11 +1780,14 @@ label_return: } void -arena_purge_all(arena_t *arena) +arena_purge(arena_t *arena, bool all) { malloc_mutex_lock(&arena->lock); - arena_purge_to_limit(arena, 0); + if (all) + arena_purge_to_limit(arena, 0); + else + arena_maybe_purge(arena); malloc_mutex_unlock(&arena->lock); } @@ -1960,8 +2209,8 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) } void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, - uint64_t prof_accumbytes) +arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin, + szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; arena_bin_t *bin; @@ -2008,6 +2257,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind, } malloc_mutex_unlock(&bin->lock); tbin->ncached = i; + arena_decay_tick(tsd, arena); } void @@ -2118,7 +2368,8 @@ arena_quarantine_junk_small(void *ptr, size_t usize) } static void * -arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero) +arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, + bool zero) { void *ret; arena_bin_t *bin; @@ -2166,11 +2417,13 @@ arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero) memset(ret, 0, size); } + arena_decay_tick(tsd, arena); return (ret); } void * -arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) +arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, + bool zero) { void *ret; size_t usize; @@ -2227,6 +2480,7 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero) } } + arena_decay_tick(tsd, arena); return (ret); } @@ -2240,9 +2494,9 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, return (NULL); if (likely(size <= SMALL_MAXCLASS)) - return (arena_malloc_small(arena, size, ind, zero)); + return (arena_malloc_small(tsd, arena, size, ind, zero)); if (likely(size <= large_maxclass)) - return (arena_malloc_large(arena, size, ind, zero)); + return (arena_malloc_large(tsd, arena, size, ind, zero)); return (huge_malloc(tsd, arena, size, zero, tcache)); } @@ -2329,6 +2583,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, else if (unlikely(opt_zero)) memset(ret, 0, usize); } + arena_decay_tick(tsd, arena); return (ret); } @@ -2515,7 +2770,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, } void -arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, +arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind) { arena_chunk_map_bits_t *bitselm; @@ -2527,6 +2782,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, } bitselm = arena_bitselm_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm); + arena_decay_tick(tsd, arena); } #ifdef JEMALLOC_JET @@ -2583,12 +2839,13 @@ arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, } void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr) { malloc_mutex_lock(&arena->lock); arena_dalloc_large_locked_impl(arena, chunk, ptr, false); malloc_mutex_unlock(&arena->lock); + arena_decay_tick(tsd, arena); } static void @@ -2789,14 +3046,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min, } bool -arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) +arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero) { size_t usize_min, usize_max; usize_min = s2u(size); usize_max = s2u(size + extra); if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { + arena_chunk_t *chunk; + /* * Avoid moving the allocation if the size class can be left the * same. @@ -2816,10 +3075,12 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, return (true); } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena_decay_tick(tsd, extent_node_arena_get(&chunk->node)); return (false); } else { - return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max, - zero)); + return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min, + usize_max, zero)); } } @@ -2852,7 +3113,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t copysize; /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero)) + if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero)) return (ptr); /* @@ -2915,15 +3176,36 @@ bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult) { + if (opt_purge != purge_mode_ratio) + return (true); if (!arena_lg_dirty_mult_valid(lg_dirty_mult)) return (true); atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult); return (false); } +ssize_t +arena_decay_time_default_get(void) +{ + + return ((ssize_t)atomic_read_z((size_t *)&decay_time_default)); +} + +bool +arena_decay_time_default_set(ssize_t decay_time) +{ + + if (opt_purge != purge_mode_decay) + return (true); + if (!arena_decay_time_valid(decay_time)) + return (true); + atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time); + return (false); +} + void arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, - size_t *nactive, size_t *ndirty, arena_stats_t *astats, + ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) { @@ -2932,6 +3214,7 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, malloc_mutex_lock(&arena->lock); *dss = dss_prec_names[arena->dss_prec]; *lg_dirty_mult = arena->lg_dirty_mult; + *decay_time = arena->decay_time; *nactive += arena->nactive; *ndirty += arena->ndirty; @@ -3050,6 +3333,9 @@ arena_new(unsigned ind) qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); + if (opt_purge == purge_mode_decay) + arena_decay_init(arena, arena_decay_time_default_get()); + ql_new(&arena->huge); if (malloc_mutex_init(&arena->huge_mtx)) return (NULL); @@ -3227,6 +3513,7 @@ arena_boot(void) unsigned i; arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); + arena_decay_time_default_set(opt_decay_time); /* * Compute the header size such that it is large enough to contain the diff --git a/src/ctl.c b/src/ctl.c index 9618d632..f003b415 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -92,7 +92,9 @@ CTL_PROTO(opt_abort) CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) +CTL_PROTO(opt_purge) CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_decay_time) CTL_PROTO(opt_stats_print) CTL_PROTO(opt_junk) CTL_PROTO(opt_zero) @@ -115,10 +117,12 @@ CTL_PROTO(opt_prof_accum) CTL_PROTO(tcache_create) CTL_PROTO(tcache_flush) CTL_PROTO(tcache_destroy) +static void arena_i_purge(unsigned arena_ind, bool all); CTL_PROTO(arena_i_purge) -static void arena_i_purge(unsigned arena_ind); +CTL_PROTO(arena_i_decay) CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_lg_dirty_mult) +CTL_PROTO(arena_i_decay_time) CTL_PROTO(arena_i_chunk_hooks) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) @@ -132,6 +136,7 @@ INDEX_PROTO(arenas_hchunk_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_lg_dirty_mult) +CTL_PROTO(arenas_decay_time) CTL_PROTO(arenas_quantum) CTL_PROTO(arenas_page) CTL_PROTO(arenas_tcache_max) @@ -182,6 +187,7 @@ INDEX_PROTO(stats_arenas_i_hchunks_j) CTL_PROTO(stats_arenas_i_nthreads) CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_lg_dirty_mult) +CTL_PROTO(stats_arenas_i_decay_time) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -260,7 +266,9 @@ static const ctl_named_node_t opt_node[] = { {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, + {NAME("purge"), CTL(opt_purge)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("decay_time"), CTL(opt_decay_time)}, {NAME("stats_print"), CTL(opt_stats_print)}, {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)}, @@ -290,8 +298,10 @@ static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t arena_i_node[] = { {NAME("purge"), CTL(arena_i_purge)}, + {NAME("decay"), CTL(arena_i_decay)}, {NAME("dss"), CTL(arena_i_dss)}, {NAME("lg_dirty_mult"), CTL(arena_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arena_i_decay_time)}, {NAME("chunk_hooks"), CTL(arena_i_chunk_hooks)} }; static const ctl_named_node_t super_arena_i_node[] = { @@ -341,6 +351,7 @@ static const ctl_named_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("lg_dirty_mult"), CTL(arenas_lg_dirty_mult)}, + {NAME("decay_time"), CTL(arenas_decay_time)}, {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, @@ -441,6 +452,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("lg_dirty_mult"), CTL(stats_arenas_i_lg_dirty_mult)}, + {NAME("decay_time"), CTL(stats_arenas_i_decay_time)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -523,6 +535,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->dss = dss_prec_names[dss_prec_limit]; astats->lg_dirty_mult = -1; + astats->decay_time = -1; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -545,8 +558,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) unsigned i; arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, - &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, - cstats->lstats, cstats->hstats); + &cstats->decay_time, &cstats->pactive, &cstats->pdirty, + &cstats->astats, cstats->bstats, cstats->lstats, cstats->hstats); for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].curregs * @@ -1265,7 +1278,9 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *) CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) @@ -1539,34 +1554,52 @@ label_return: /******************************************************************************/ -/* ctl_mutex must be held during execution of this function. */ static void -arena_i_purge(unsigned arena_ind) +arena_i_purge(unsigned arena_ind, bool all) { - tsd_t *tsd; - unsigned i; - bool refreshed; - VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } + malloc_mutex_lock(&ctl_mtx); + { + tsd_t *tsd = tsd_fetch(); + unsigned narenas = ctl_stats.narenas; - if (arena_ind == ctl_stats.narenas) { - unsigned i; - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); + if (arena_ind == narenas) { + unsigned i; + bool refreshed; + VARIABLE_ARRAY(arena_t *, tarenas, narenas); + + for (i = 0, refreshed = false; i < narenas; i++) { + tarenas[i] = arena_get(tsd, i, false, false); + if (tarenas[i] == NULL && !refreshed) { + tarenas[i] = arena_get(tsd, i, false, + true); + refreshed = true; + } + } + + /* + * No further need to hold ctl_mtx, since narenas and + * tarenas contain everything needed below. + */ + malloc_mutex_unlock(&ctl_mtx); + + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge(tarenas[i], all); + } + } else { + arena_t *tarena; + + assert(arena_ind < narenas); + + tarena = arena_get(tsd, arena_ind, false, true); + + /* No further need to hold ctl_mtx. */ + malloc_mutex_unlock(&ctl_mtx); + + if (tarena != NULL) + arena_purge(tarena, all); } - } else { - assert(arena_ind < ctl_stats.narenas); - if (tarenas[arena_ind] != NULL) - arena_purge_all(tarenas[arena_ind]); } } @@ -1578,9 +1611,22 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); - malloc_mutex_lock(&ctl_mtx); - arena_i_purge(mib[1]); - malloc_mutex_unlock(&ctl_mtx); + arena_i_purge(mib[1], true); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + arena_i_purge(mib[1], false); ret = 0; label_return: @@ -1677,6 +1723,40 @@ label_return: return (ret); } +static int +arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned arena_ind = mib[1]; + arena_t *arena; + + arena = arena_get(tsd_fetch(), arena_ind, false, true); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_get(arena); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_set(arena, *(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + static int arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1801,6 +1881,32 @@ label_return: return (ret); } +static int +arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (oldp != NULL && oldlenp != NULL) { + size_t oldval = arena_decay_time_default_get(); + READ(oldval, ssize_t); + } + if (newp != NULL) { + if (newlen != sizeof(ssize_t)) { + ret = EINVAL; + goto label_return; + } + if (arena_decay_time_default_set(*(ssize_t *)newp)) { + ret = EFAULT; + goto label_return; + } + } + + ret = 0; +label_return: + return (ret); +} + CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) @@ -2002,6 +2108,8 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult, ssize_t) +CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time, + ssize_t) CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) diff --git a/src/huge.c b/src/huge.c index c1fa3795..9f880484 100644 --- a/src/huge.c +++ b/src/huge.c @@ -99,6 +99,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, } else if (config_fill && unlikely(opt_junk_alloc)) memset(ret, 0xa5, size); + arena_decay_tick(tsd, arena); return (ret); } @@ -280,7 +281,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) { } bool -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, +huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero) { @@ -292,13 +293,18 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) { /* Attempt to expand the allocation in-place. */ - if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero)) + if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, + zero)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); + } /* Try again, this time with usize_min. */ if (usize_min < usize_max && CHUNK_CEILING(usize_min) > CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr, - oldsize, usize_min, zero)) + oldsize, usize_min, zero)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); + } } /* @@ -309,12 +315,17 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) { huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max, zero); + arena_decay_tick(tsd, huge_aalloc(ptr)); return (false); } /* Attempt to shrink the allocation in-place. */ - if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) - return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)); + if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) { + if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) { + arena_decay_tick(tsd, huge_aalloc(ptr)); + return (false); + } + } return (true); } @@ -336,7 +347,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, size_t copysize; /* Try to avoid moving the allocation. */ - if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero)) + if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero)) return (ptr); /* @@ -373,6 +384,8 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) arena_chunk_dalloc_huge(extent_node_arena_get(node), extent_node_addr_get(node), extent_node_size_get(node)); idalloctm(tsd, node, tcache, true, true); + + arena_decay_tick(tsd, arena); } arena_t * diff --git a/src/jemalloc.c b/src/jemalloc.c index d2b2afce..f69d951b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -577,6 +577,17 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind) * (narenas_tdata - narenas_actual)); } + /* Copy/initialize tickers. */ + for (i = 0; i < narenas_actual; i++) { + if (i < narenas_tdata_old) { + ticker_copy(&arenas_tdata[i].decay_ticker, + &arenas_tdata_old[i].decay_ticker); + } else { + ticker_init(&arenas_tdata[i].decay_ticker, + DECAY_NTICKS_PER_UPDATE); + } + } + /* Read the refreshed tdata array. */ tdata = &arenas_tdata[ind]; label_return: @@ -1120,8 +1131,27 @@ malloc_conf_init(void) } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, SIZE_T_MAX, false) + if (strncmp("purge", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < purge_mode_limit; i++) { + if (strncmp(purge_mode_names[i], v, + vlen) == 0) { + opt_purge = (purge_mode_t)i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1, + TIME_SEC_MAX); CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { if (CONF_MATCH("junk")) { @@ -2344,12 +2374,12 @@ label_oom: } JEMALLOC_ALWAYS_INLINE_C size_t -ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero) +ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero) { size_t usize; - if (ixalloc(ptr, old_usize, size, extra, alignment, zero)) + if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero)) return (old_usize); usize = isalloc(ptr, config_prof); @@ -2357,14 +2387,15 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra, } static size_t -ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, - size_t alignment, bool zero, prof_tctx_t *tctx) +ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, + size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) { size_t usize; if (tctx == NULL) return (old_usize); - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment, + zero); return (usize); } @@ -2390,11 +2421,11 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, assert(usize_max != 0); tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { - usize = ixallocx_prof_sample(ptr, old_usize, size, extra, + usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra, alignment, zero, tctx); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, + alignment, zero); } if (usize == old_usize) { prof_alloc_rollback(tsd, tctx, false); @@ -2441,8 +2472,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) usize = ixallocx_prof(tsd, ptr, old_usize, size, extra, alignment, zero); } else { - usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, - zero); + usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, + alignment, zero); } if (unlikely(usize == old_usize)) goto label_not_resized; diff --git a/src/stats.c b/src/stats.c index 7d09c23c..8d5ed71e 100644 --- a/src/stats.c +++ b/src/stats.c @@ -258,7 +258,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, { unsigned nthreads; const char *dss; - ssize_t lg_dirty_mult; + ssize_t lg_dirty_mult, decay_time; size_t page, pactive, pdirty, mapped; size_t metadata_mapped, metadata_allocated; uint64_t npurge, nmadvise, purged; @@ -278,13 +278,23 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", dss); CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t); - if (lg_dirty_mult >= 0) { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: %u:1\n", - (1U << lg_dirty_mult)); - } else { - malloc_cprintf(write_cb, cbopaque, - "min active:dirty page ratio: N/A\n"); + if (opt_purge == purge_mode_ratio) { + if (lg_dirty_mult >= 0) { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: %u:1\n", + (1U << lg_dirty_mult)); + } else { + malloc_cprintf(write_cb, cbopaque, + "min active:dirty page ratio: N/A\n"); + } + } + CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t); + if (opt_purge == purge_mode_decay) { + if (decay_time >= 0) { + malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n", + decay_time); + } else + malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n"); } CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t); CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t); @@ -292,9 +302,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t); malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64 - " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge == - 1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged); + "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", " + "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged); malloc_cprintf(write_cb, cbopaque, " allocated nmalloc ndalloc" @@ -486,7 +495,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult) + OPT_WRITE_CHAR_P(purge) + if (opt_purge == purge_mode_ratio) { + OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, + arenas.lg_dirty_mult) + } + if (opt_purge == purge_mode_decay) + OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time) OPT_WRITE_BOOL(stats_print) OPT_WRITE_CHAR_P(junk) OPT_WRITE_SIZE_T(quarantine) @@ -531,13 +546,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t); - if (ssv >= 0) { + if (opt_purge == purge_mode_ratio) { + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "%u:1\n", (1U << ssv)); + } else { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: " + "N/A\n"); + } + } + CTL_GET("arenas.decay_time", &ssv, ssize_t); + if (opt_purge == purge_mode_decay) { malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: %u:1\n", - (1U << ssv)); - } else { - malloc_cprintf(write_cb, cbopaque, - "Min active:dirty page ratio per arena: N/A\n"); + "Unused dirty page decay time: %zd%s\n", + ssv, (ssv < 0) ? " (no decay)" : ""); } if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) { malloc_cprintf(write_cb, cbopaque, diff --git a/src/tcache.c b/src/tcache.c index e8c3152d..426bb1f7 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -75,7 +75,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, { void *ret; - arena_tcache_fill_small(arena, tbin, binind, config_prof ? + arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0); if (config_prof) tcache->prof_accumbytes = 0; @@ -143,6 +143,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, } } malloc_mutex_unlock(&bin->lock); + arena_decay_ticks(tsd, bin_arena, nflush - ndeferred); } if (config_stats && !merged_stats) { /* @@ -226,6 +227,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, malloc_mutex_unlock(&locked_arena->lock); if (config_prof && idump) prof_idump(); + arena_decay_ticks(tsd, locked_arena, nflush - ndeferred); } if (config_stats && !merged_stats) { /* diff --git a/src/time.c b/src/time.c index 3f930385..2fe93e1e 100644 --- a/src/time.c +++ b/src/time.c @@ -147,6 +147,10 @@ time_divide(const struct timespec *time, const struct timespec *divisor) return (t / d); } +#ifdef JEMALLOC_JET +#undef time_update +#define time_update JEMALLOC_N(time_update_impl) +#endif bool time_update(struct timespec *time) { @@ -184,3 +188,8 @@ time_update(struct timespec *time) assert(time_valid(time)); return (false); } +#ifdef JEMALLOC_JET +#undef time_update +#define time_update JEMALLOC_N(time_update) +time_update_t *time_update = JEMALLOC_N(time_update_impl); +#endif diff --git a/test/unit/decay.c b/test/unit/decay.c new file mode 100644 index 00000000..324019dc --- /dev/null +++ b/test/unit/decay.c @@ -0,0 +1,370 @@ +#include "test/jemalloc_test.h" + +const char *malloc_conf = "purge:decay,decay_time:1"; + +static time_update_t *time_update_orig; + +static unsigned nupdates_mock; +static struct timespec time_mock; +static bool nonmonotonic_mock; + +static bool +time_update_mock(struct timespec *time) +{ + + nupdates_mock++; + if (!nonmonotonic_mock) + time_copy(time, &time_mock); + return (nonmonotonic_mock); +} + +TEST_BEGIN(test_decay_ticks) +{ + ticker_t *decay_ticker; + unsigned tick0, tick1; + size_t sz, huge0, large0; + void *p; + unsigned tcache_ind; + + test_skip_if(opt_purge != purge_mode_decay); + + decay_ticker = decay_ticker_get(tsd_fetch(), 0); + assert_ptr_not_null(decay_ticker, + "Unexpected failure getting decay ticker"); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + /* malloc(). */ + tick0 = ticker_read(decay_ticker); + p = malloc(huge0); + assert_ptr_not_null(p, "Unexpected malloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()"); + /* free(). */ + tick0 = ticker_read(decay_ticker); + free(p); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()"); + + /* calloc(). */ + tick0 = ticker_read(decay_ticker); + p = calloc(1, huge0); + assert_ptr_not_null(p, "Unexpected calloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()"); + free(p); + + /* posix_memalign(). */ + tick0 = ticker_read(decay_ticker); + assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0, + "Unexpected posix_memalign() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during posix_memalign()"); + free(p); + + /* aligned_alloc(). */ + tick0 = ticker_read(decay_ticker); + p = aligned_alloc(sizeof(size_t), huge0); + assert_ptr_not_null(p, "Unexpected aligned_alloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during aligned_alloc()"); + free(p); + + /* realloc(). */ + /* Allocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(NULL, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Reallocate. */ + tick0 = ticker_read(decay_ticker); + p = realloc(p, huge0); + assert_ptr_not_null(p, "Unexpected realloc() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + /* Deallocate. */ + tick0 = ticker_read(decay_ticker); + realloc(p, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); + + /* Huge mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(huge0, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during huge mallocx()"); + /* Huge rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, huge0, 0); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during huge rallocx()"); + /* Huge xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, huge0, 0, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during huge xallocx()"); + /* Huge dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during huge dallocx()"); + /* Huge sdallocx(). */ + p = mallocx(huge0, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, huge0, 0); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during huge sdallocx()"); + + /* Large mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(large0, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during large mallocx()"); + /* Large rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, large0, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during large rallocx()"); + /* Large xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, large0, 0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during large xallocx()"); + /* Large dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during large dallocx()"); + /* Large sdallocx(). */ + p = mallocx(large0, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, large0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during large sdallocx()"); + + /* Small mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(1, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during small mallocx()"); + /* Small rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, 1, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during small rallocx()"); + /* Small xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, 1, 0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during small xallocx()"); + /* Small dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during small dallocx()"); + /* Small sdallocx(). */ + p = mallocx(1, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, 1, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during small sdallocx()"); + + /* tcache fill. */ + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + tick0 = ticker_read(decay_ticker); + p = mallocx(1, MALLOCX_TCACHE(tcache_ind)); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache fill"); + /* tcache flush. */ + dallocx(p, MALLOCX_TCACHE(tcache_ind)); + tick0 = ticker_read(decay_ticker); + assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tcache_ind, + sizeof(unsigned)), 0, "Unexpected mallctl failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache flush"); +} +TEST_END + +TEST_BEGIN(test_decay_ticker) +{ +#define NPS 1024 + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch, npurge0, npurge1; + size_t sz, tcache_max, large; + unsigned i, nupdates0; + struct timespec time, decay_time, deadline; + + test_skip_if(opt_purge != purge_mode_decay); + + /* + * Allocate a bunch of large objects, pause the clock, deallocate the + * objects, restore the clock, then [md]allocx() in a tight loop to + * verify the ticker triggers purging. + */ + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + large = nallocx(tcache_max + 1, flags); + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + nupdates_mock = 0; + time_init(&time_mock, 0, 0); + time_update(&time_mock); + nonmonotonic_mock = false; + + time_update_orig = time_update; + time_update = time_update_mock; + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected time_update() to be called"); + } + + time_update = time_update_orig; + + time_init(&time, 0, 0); + time_update(&time); + time_init(&decay_time, opt_decay_time, 0); + time_copy(&deadline, &time); + time_add(&deadline, &decay_time); + do { + for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) { + void *p = mallocx(1, flags); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + dallocx(p, flags); + } + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, + sizeof(uint64_t)), 0, "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + + time_update(&time); + } while (time_compare(&time, &deadline) <= 0 && npurge1 == npurge0); + + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); +#undef NPS +} +TEST_END + +TEST_BEGIN(test_decay_nonmonotonic) +{ +#define NPS (SMOOTHSTEP_NSTEPS + 1) + int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); + void *ps[NPS]; + uint64_t epoch, npurge0, npurge1; + size_t sz, large0; + unsigned i, nupdates0; + + test_skip_if(opt_purge != purge_mode_decay); + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl failure"); + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + nupdates_mock = 0; + time_init(&time_mock, 0, 0); + time_update(&time_mock); + nonmonotonic_mock = true; + + time_update_orig = time_update; + time_update = time_update_mock; + + for (i = 0; i < NPS; i++) { + ps[i] = mallocx(large0, flags); + assert_ptr_not_null(ps[i], "Unexpected mallocx() failure"); + } + + for (i = 0; i < NPS; i++) { + dallocx(ps[i], flags); + nupdates0 = nupdates_mock; + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected arena.0.decay failure"); + assert_u_gt(nupdates_mock, nupdates0, + "Expected time_update() to be called"); + } + + assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, + "Unexpected mallctl failure"); + sz = sizeof(uint64_t); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); + + time_update = time_update_orig; +#undef NPS +} +TEST_END + +int +main(void) +{ + + return (test( + test_decay_ticks, + test_decay_ticker, + test_decay_nonmonotonic)); +} diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index fde223f9..b312fc64 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -164,7 +164,9 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(size_t, lg_chunk, always); TEST_MALLCTL_OPT(const char *, dss, always); TEST_MALLCTL_OPT(size_t, narenas, always); + TEST_MALLCTL_OPT(const char *, purge, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); + TEST_MALLCTL_OPT(ssize_t, decay_time, always); TEST_MALLCTL_OPT(bool, stats_print, always); TEST_MALLCTL_OPT(const char *, junk, fill); TEST_MALLCTL_OPT(size_t, quarantine, fill); @@ -355,6 +357,8 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); + test_skip_if(opt_purge != purge_mode_ratio); + assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -382,6 +386,39 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult) } TEST_END +TEST_BEGIN(test_arena_i_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = TIME_SEC_MAX; + assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arena.0.decay_time", &old_decay_time, + &sz, &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arena.0.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arena_i_purge) { unsigned narenas; @@ -402,6 +439,26 @@ TEST_BEGIN(test_arena_i_purge) } TEST_END +TEST_BEGIN(test_arena_i_decay) +{ + unsigned narenas; + size_t sz = sizeof(unsigned); + size_t mib[3]; + size_t miblen = 3; + + assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, + "Unexpected mallctl() failure"); + + assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0, + "Unexpected mallctl() failure"); + assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0, + "Unexpected mallctlnametomib() failure"); + mib[1] = narenas; + assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0, + "Unexpected mallctlbymib() failure"); +} +TEST_END + TEST_BEGIN(test_arena_i_dss) { const char *dss_prec_old, *dss_prec_new; @@ -466,6 +523,8 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult; size_t sz = sizeof(ssize_t); + test_skip_if(opt_purge != purge_mode_ratio); + assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz, NULL, 0), 0, "Unexpected mallctl() failure"); @@ -493,6 +552,39 @@ TEST_BEGIN(test_arenas_lg_dirty_mult) } TEST_END +TEST_BEGIN(test_arenas_decay_time) +{ + ssize_t decay_time, orig_decay_time, prev_decay_time; + size_t sz = sizeof(ssize_t); + + test_skip_if(opt_purge != purge_mode_decay); + + assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz, + NULL, 0), 0, "Unexpected mallctl() failure"); + + decay_time = -2; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), EFAULT, + "Unexpected mallctl() success"); + + decay_time = TIME_SEC_MAX; + assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, + &decay_time, sizeof(ssize_t)), 0, + "Expected mallctl() failure"); + + for (prev_decay_time = decay_time, decay_time = -1; + decay_time < 20; prev_decay_time = decay_time, decay_time++) { + ssize_t old_decay_time; + + assert_d_eq(mallctl("arenas.decay_time", &old_decay_time, + &sz, &decay_time, sizeof(ssize_t)), 0, + "Unexpected mallctl() failure"); + assert_zd_eq(old_decay_time, prev_decay_time, + "Unexpected old arenas.decay_time"); + } +} +TEST_END + TEST_BEGIN(test_arenas_constants) { @@ -621,10 +713,13 @@ main(void) test_tcache, test_thread_arena, test_arena_i_lg_dirty_mult, + test_arena_i_decay_time, test_arena_i_purge, + test_arena_i_decay, test_arena_i_dss, test_arenas_initialized, test_arenas_lg_dirty_mult, + test_arenas_decay_time, test_arenas_constants, test_arenas_bin_constants, test_arenas_lrun_constants, From 9f24c944744e91d0cfe1864287ca7a52c16598fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 09:02:49 -0800 Subject: [PATCH 34/96] Increase test coverage in test_decay_ticks. --- test/unit/decay.c | 217 ++++++++++++++++++++-------------------------- 1 file changed, 96 insertions(+), 121 deletions(-) diff --git a/test/unit/decay.c b/test/unit/decay.c index 324019dc..20730de4 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -24,7 +24,6 @@ TEST_BEGIN(test_decay_ticks) unsigned tick0, tick1; size_t sz, huge0, large0; void *p; - unsigned tcache_ind; test_skip_if(opt_purge != purge_mode_decay); @@ -38,6 +37,12 @@ TEST_BEGIN(test_decay_ticks) assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0, "Unexpected mallctl failure"); + /* + * Test the standard APIs using a huge size class, since we can't + * control tcache interactions (except by completely disabling tcache + * for the entire test program). + */ + /* malloc(). */ tick0 = ticker_read(decay_ticker); p = malloc(huge0); @@ -95,129 +100,99 @@ TEST_BEGIN(test_decay_ticks) tick1 = ticker_read(decay_ticker); assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()"); - /* Huge mallocx(). */ - tick0 = ticker_read(decay_ticker); - p = mallocx(huge0, 0); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during huge mallocx()"); - /* Huge rallocx(). */ - tick0 = ticker_read(decay_ticker); - p = rallocx(p, huge0, 0); - assert_ptr_not_null(p, "Unexpected rallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during huge rallocx()"); - /* Huge xallocx(). */ - tick0 = ticker_read(decay_ticker); - xallocx(p, huge0, 0, 0); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during huge xallocx()"); - /* Huge dallocx(). */ - tick0 = ticker_read(decay_ticker); - dallocx(p, 0); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during huge dallocx()"); - /* Huge sdallocx(). */ - p = mallocx(huge0, 0); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick0 = ticker_read(decay_ticker); - sdallocx(p, huge0, 0); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during huge sdallocx()"); + /* + * Test the *allocx() APIs using huge, large, and small size classes, + * with tcache explicitly disabled. + */ + { + unsigned i; + size_t allocx_sizes[3]; + allocx_sizes[0] = huge0; + allocx_sizes[1] = large0; + allocx_sizes[2] = 1; - /* Large mallocx(). */ - tick0 = ticker_read(decay_ticker); - p = mallocx(large0, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during large mallocx()"); - /* Large rallocx(). */ - tick0 = ticker_read(decay_ticker); - p = rallocx(p, large0, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected rallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during large rallocx()"); - /* Large xallocx(). */ - tick0 = ticker_read(decay_ticker); - xallocx(p, large0, 0, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during large xallocx()"); - /* Large dallocx(). */ - tick0 = ticker_read(decay_ticker); - dallocx(p, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during large dallocx()"); - /* Large sdallocx(). */ - p = mallocx(large0, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick0 = ticker_read(decay_ticker); - sdallocx(p, large0, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during large sdallocx()"); + for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) { + sz = allocx_sizes[i]; - /* Small mallocx(). */ - tick0 = ticker_read(decay_ticker); - p = mallocx(1, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during small mallocx()"); - /* Small rallocx(). */ - tick0 = ticker_read(decay_ticker); - p = rallocx(p, 1, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected rallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during small rallocx()"); - /* Small xallocx(). */ - tick0 = ticker_read(decay_ticker); - xallocx(p, 1, 0, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during small xallocx()"); - /* Small dallocx(). */ - tick0 = ticker_read(decay_ticker); - dallocx(p, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during small dallocx()"); - /* Small sdallocx(). */ - p = mallocx(1, MALLOCX_TCACHE_NONE); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick0 = ticker_read(decay_ticker); - sdallocx(p, 1, MALLOCX_TCACHE_NONE); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during small sdallocx()"); + /* mallocx(). */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during mallocx() (sz=%zu)", + sz); + /* rallocx(). */ + tick0 = ticker_read(decay_ticker); + p = rallocx(p, sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected rallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during rallocx() (sz=%zu)", + sz); + /* xallocx(). */ + tick0 = ticker_read(decay_ticker); + xallocx(p, sz, 0, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during xallocx() (sz=%zu)", + sz); + /* dallocx(). */ + tick0 = ticker_read(decay_ticker); + dallocx(p, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during dallocx() (sz=%zu)", + sz); + /* sdallocx(). */ + p = mallocx(sz, MALLOCX_TCACHE_NONE); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick0 = ticker_read(decay_ticker); + sdallocx(p, sz, MALLOCX_TCACHE_NONE); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during sdallocx() " + "(sz=%zu)", sz); + } + } - /* tcache fill. */ - sz = sizeof(unsigned); - assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - tick0 = ticker_read(decay_ticker); - p = mallocx(1, MALLOCX_TCACHE(tcache_ind)); - assert_ptr_not_null(p, "Unexpected mallocx() failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during tcache fill"); - /* tcache flush. */ - dallocx(p, MALLOCX_TCACHE(tcache_ind)); - tick0 = ticker_read(decay_ticker); - assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tcache_ind, - sizeof(unsigned)), 0, "Unexpected mallctl failure"); - tick1 = ticker_read(decay_ticker); - assert_u32_ne(tick1, tick0, - "Expected ticker to tick during tcache flush"); + /* + * Test tcache fill/flush interactions for large and small size classes, + * using an explicit tcache. + */ + { + unsigned tcache_ind, i; + size_t tcache_sizes[2]; + tcache_sizes[0] = large0; + tcache_sizes[1] = 1; + + sz = sizeof(unsigned); + assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), + 0, "Unexpected mallctl failure"); + + for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) { + sz = tcache_sizes[i]; + + /* tcache fill. */ + tick0 = ticker_read(decay_ticker); + p = mallocx(sz, MALLOCX_TCACHE(tcache_ind)); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache fill " + "(sz=%zu)", sz); + /* tcache flush. */ + dallocx(p, MALLOCX_TCACHE(tcache_ind)); + tick0 = ticker_read(decay_ticker); + assert_d_eq(mallctl("tcache.flush", NULL, NULL, + &tcache_ind, sizeof(unsigned)), 0, + "Unexpected mallctl failure"); + tick1 = ticker_read(decay_ticker); + assert_u32_ne(tick1, tick0, + "Expected ticker to tick during tcache flush " + "(sz=%zu)", sz); + } + } } TEST_END From a0aaad1afa8c1c4b30bf15c6b8744084ffc32055 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 10:23:48 -0800 Subject: [PATCH 35/96] Handle unaligned keys in hash(). Reported by Christopher Ferris . --- include/jemalloc/internal/hash.h | 18 +++++++++++++++++- test/unit/hash.c | 19 ++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index bcead337..8b5fb037 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -1,6 +1,6 @@ /* * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for * details. */ /******************************************************************************/ @@ -49,6 +49,14 @@ JEMALLOC_INLINE uint32_t hash_get_block_32(const uint32_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; + + memcpy(&ret, &p[i], sizeof(uint32_t)); + return (ret); + } + return (p[i]); } @@ -56,6 +64,14 @@ JEMALLOC_INLINE uint64_t hash_get_block_64(const uint64_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; + + memcpy(&ret, &p[i], sizeof(uint64_t)); + return (ret); + } + return (p[i]); } diff --git a/test/unit/hash.c b/test/unit/hash.c index 77a8cede..ea73d701 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -59,17 +59,17 @@ hash_variant_string(hash_variant_t variant) } } +#define KEY_SIZE 256 static void -hash_variant_verify(hash_variant_t variant) +hash_variant_verify_key(hash_variant_t variant, uint8_t *key) { const size_t hashbytes = hash_variant_bits(variant) / 8; - uint8_t key[256]; VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; uint32_t computed, expected; - memset(key, 0, sizeof(key)); + memset(key, 0, KEY_SIZE); memset(hashes, 0, sizeof(hashes)); memset(final, 0, sizeof(final)); @@ -139,6 +139,19 @@ hash_variant_verify(hash_variant_t variant) hash_variant_string(variant), expected, computed); } +static void +hash_variant_verify(hash_variant_t variant) +{ +#define MAX_ALIGN 16 + uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)]; + unsigned i; + + for (i = 0; i < MAX_ALIGN; i++) + hash_variant_verify_key(variant, &key[i]); +#undef MAX_ALIGN +} +#undef KEY_SIZE + TEST_BEGIN(test_hash_x86_32) { From effaf7d40fba191386162e907195b0198c75866a Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Sat, 20 Feb 2016 10:26:17 -0800 Subject: [PATCH 36/96] Fix a typo in the ckh_search() prototype. --- include/jemalloc/internal/ckh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h index 45fb3455..f75ad90b 100644 --- a/include/jemalloc/internal/ckh.h +++ b/include/jemalloc/internal/ckh.h @@ -72,7 +72,7 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); bool ckh_string_keycomp(const void *k1, const void *k2); void ckh_pointer_hash(const void *key, size_t r_hash[2]); From 46e0b2301c0e0ee71f5714d0cdf320ba2d027271 Mon Sep 17 00:00:00 2001 From: rustyx Date: Sat, 30 Jan 2016 13:37:26 +0100 Subject: [PATCH 37/96] Detect LG_SIZEOF_PTR depending on MSVC platform target --- configure.ac | 17 +++++++++++------ include/jemalloc/jemalloc_defs.h.in | 8 ++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 5232c8f3..9a489d98 100644 --- a/configure.ac +++ b/configure.ac @@ -166,13 +166,18 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99" fi -AC_CHECK_SIZEOF([void *]) -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 +if test "x${je_cv_msvc}" = "xyes" ; then + LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN + AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit]) else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + AC_CHECK_SIZEOF([void *]) + if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 + elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 + else + AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) + fi fi AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index ab13c375..6d89435c 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -33,5 +33,13 @@ */ #undef JEMALLOC_USE_CXX_THROW +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR From bc49863fb5c59d5a0ff63845464d901ef00c7845 Mon Sep 17 00:00:00 2001 From: rustyx Date: Sat, 30 Jan 2016 13:38:33 +0100 Subject: [PATCH 38/96] Fix error "+ 2")syntax error: invalid arithmetic operator (error token is " in Cygwin x64 --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 9a489d98..3ae468ab 100644 --- a/configure.ac +++ b/configure.ac @@ -1116,7 +1116,7 @@ if test "x$LG_PAGE" = "xdetect"; then if (f == NULL) { return 1; } - fprintf(f, "%d\n", result); + fprintf(f, "%d", result); fclose(f); return 0; From 90c7269c0588bd6d49bf27ba05a261744ad97165 Mon Sep 17 00:00:00 2001 From: rustyx Date: Sat, 30 Jan 2016 13:41:09 +0100 Subject: [PATCH 39/96] Add CPU "pause" intrinsic for MSVC --- configure.ac | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 3ae468ab..8adb6f7b 100644 --- a/configure.ac +++ b/configure.ac @@ -218,12 +218,22 @@ dnl CPU-specific settings. CPU_SPINWAIT="" case "${host_cpu}" in i686|x86_64) - AC_CACHE_VAL([je_cv_pause], - [JE_COMPILABLE([pause instruction], [], - [[__asm__ volatile("pause"); return 0;]], - [je_cv_pause])]) - if test "x${je_cv_pause}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' + if test "x${je_cv_msvc}" = "xyes" ; then + AC_CACHE_VAL([je_cv_pause_msvc], + [JE_COMPILABLE([pause instruction MSVC], [], + [[_mm_pause(); return 0;]], + [je_cv_pause_msvc])]) + if test "x${je_cv_pause_msvc}" = "xyes" ; then + CPU_SPINWAIT='_mm_pause()' + fi + else + AC_CACHE_VAL([je_cv_pause], + [JE_COMPILABLE([pause instruction], [], + [[__asm__ volatile("pause"); return 0;]], + [je_cv_pause])]) + if test "x${je_cv_pause}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi fi ;; powerpc) From 7f283980f00f0543e97f46567fbe5bdd4d732724 Mon Sep 17 00:00:00 2001 From: rustyx Date: Sat, 30 Jan 2016 14:51:16 +0100 Subject: [PATCH 40/96] getpid() fix for Win32 --- include/msvc_compat/windows_extra.h | 2 ++ src/prof.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h index 0c5e323f..114f43b1 100644 --- a/include/msvc_compat/windows_extra.h +++ b/include/msvc_compat/windows_extra.h @@ -23,4 +23,6 @@ # define ERANGE ERROR_INVALID_DATA #endif +#define getpid() GetCurrentProcessId() + #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ diff --git a/src/prof.c b/src/prof.c index 31f5e601..3abb38e2 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1384,6 +1384,8 @@ prof_dump_maps(bool propagate_err) cassert(config_prof); #ifdef __FreeBSD__ mfd = prof_open_maps("/proc/curproc/map"); +#elif defined(_WIN32) + mfd = -1; // Not implemented #else { int pid = getpid(); From efbee8627873fbcee454319573cdf94816b25824 Mon Sep 17 00:00:00 2001 From: rustyx Date: Tue, 2 Feb 2016 11:27:18 +0100 Subject: [PATCH 41/96] Prevent MSVC from optimizing away tls_callback (resolves #318) --- src/tsd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tsd.c b/src/tsd.c index b85b8b9d..34c1573c 100644 --- a/src/tsd.c +++ b/src/tsd.c @@ -148,13 +148,15 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) #ifdef _MSC_VER # ifdef _M_IX86 # pragma comment(linker, "/INCLUDE:__tls_used") +# pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") +# pragma comment(linker, "/INCLUDE:tls_callback") # endif # pragma section(".CRT$XLY",long,read) #endif JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) -static BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, +BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; #endif From 3c2c5a5071416b9be6a114ccbc7796443b24f1cd Mon Sep 17 00:00:00 2001 From: rustyx Date: Tue, 2 Feb 2016 11:52:07 +0100 Subject: [PATCH 42/96] Fix warning in ipalloc --- include/jemalloc/internal/jemalloc_internal.h.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 3b2f75d6..aa97d7c7 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -1001,8 +1001,8 @@ JEMALLOC_ALWAYS_INLINE void * ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, - NULL), false, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true), + false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t From 984c64f724bfeb73e32251801e6df6ab6df53d15 Mon Sep 17 00:00:00 2001 From: rustyx Date: Tue, 2 Feb 2016 11:52:41 +0100 Subject: [PATCH 43/96] Add MS Visual Studio 2015 support --- .gitignore | 16 + msvc/ReadMe.txt | 24 ++ msvc/jemalloc_vc2015.sln | 63 +++ .../projects/vc2015/jemalloc/jemalloc.vcxproj | 391 ++++++++++++++++++ .../vc2015/jemalloc/jemalloc.vcxproj.filters | 242 +++++++++++ .../vc2015/test_threads/test_threads.cpp | 100 +++++ .../vc2015/test_threads/test_threads.h | 3 + .../vc2015/test_threads/test_threads.vcxproj | 327 +++++++++++++++ .../test_threads/test_threads.vcxproj.filters | 26 ++ .../vc2015/test_threads/test_threads_main.cpp | 12 + 10 files changed, 1204 insertions(+) create mode 100644 msvc/ReadMe.txt create mode 100644 msvc/jemalloc_vc2015.sln create mode 100644 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj create mode 100644 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters create mode 100644 msvc/projects/vc2015/test_threads/test_threads.cpp create mode 100644 msvc/projects/vc2015/test_threads/test_threads.h create mode 100644 msvc/projects/vc2015/test_threads/test_threads.vcxproj create mode 100644 msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters create mode 100644 msvc/projects/vc2015/test_threads/test_threads_main.cpp diff --git a/.gitignore b/.gitignore index d0e39361..08278d08 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,19 @@ test/include/test/jemalloc_test_defs.h /test/unit/*.out /VERSION + +*.pdb +*.sdf +*.opendb +*.opensdf +*.cachefile +*.suo +*.user +*.sln.docstates +*.tmp +/msvc/Win32/ +/msvc/x64/ +/msvc/projects/*/*/Debug*/ +/msvc/projects/*/*/Release*/ +/msvc/projects/*/*/Win32/ +/msvc/projects/*/*/x64/ diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt new file mode 100644 index 00000000..02b97f74 --- /dev/null +++ b/msvc/ReadMe.txt @@ -0,0 +1,24 @@ + +How to build jemalloc for Windows +================================= + +1. Install Cygwin with at least the following packages: + * autoconf + * autogen + * gawk + * grep + * sed + +2. Install Visual Studio 2015 with Visual C++ + +3. Add Cygwin\bin to the PATH environment variable + +4. Open "VS2015 x86 Native Tools Command Prompt" + (note: x86/x64 doesn't matter at this point) + +5. Generate header files: + sh -c "./autogen.sh CC=cl --enable-lazy-lock=no" + +6. Now the project can be opened and built in Visual Studio: + msvc\jemalloc_vc2015.sln + diff --git a/msvc/jemalloc_vc2015.sln b/msvc/jemalloc_vc2015.sln new file mode 100644 index 00000000..aedd5e5e --- /dev/null +++ b/msvc/jemalloc_vc2015.sln @@ -0,0 +1,63 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.24720.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}" + ProjectSection(SolutionItems) = preProject + ReadMe.txt = ReadMe.txt + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Debug-static|x64 = Debug-static|x64 + Debug-static|x86 = Debug-static|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + Release-static|x64 = Release-static|x64 + Release-static|x86 = Release-static|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32 + {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32 + {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj new file mode 100644 index 00000000..395837c3 --- /dev/null +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -0,0 +1,391 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {8D6BB292-9E1C-413D-9F98-4864BDC1514A} + Win32Proj + jemalloc + 8.1 + + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + StaticLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + StaticLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)d + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + $(ProjectName)-$(PlatformToolset)-$(Configuration) + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + _REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + + + Level3 + Disabled + JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + _REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions) + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + _REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions) + ..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + 4090;4146;4244;4267;4334 + $(OutputPath)$(TargetName).pdb + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters new file mode 100644 index 00000000..69f64169 --- /dev/null +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -0,0 +1,242 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {5697dfa3-16cf-4932-b428-6e0ec6e9f98e} + + + {0cbd2ca6-42a7-4f82-8517-d7e7a14fd986} + + + {0abe6f30-49b5-46dd-8aca-6e33363fa52c} + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\internal + + + Header Files\msvc_compat + + + Header Files\msvc_compat + + + Header Files\msvc_compat\C99 + + + Header Files\msvc_compat\C99 + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp new file mode 100644 index 00000000..046843f3 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.cpp @@ -0,0 +1,100 @@ +// jemalloc C++ threaded test +// Author: Rustam Abdullaev +// Public Domain + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using std::vector; +using std::thread; +using std::uniform_int_distribution; +using std::minstd_rand; + +#if NDEBUG && JEMALLOC_ISSUE_318_WORKAROUND +extern "C" JEMALLOC_EXPORT void _malloc_thread_cleanup(void); + +static thread_local struct JeMallocThreadHelper { + ~JeMallocThreadHelper() { + _malloc_thread_cleanup(); + } +} tls_jemallocThreadHelper; +#endif + +int test_threads() +{ + je_malloc_conf = "narenas:3"; + int narenas = 0; + size_t sz = sizeof(narenas); + je_mallctl("opt.narenas", &narenas, &sz, NULL, 0); + if (narenas != 3) { + printf("Error: unexpected number of arenas: %d\n", narenas); + return 1; + } + static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; + static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); + vector workers; + static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated1; + size_t sz1 = sizeof(allocated1); + je_mallctl("stats.active", &allocated1, &sz1, NULL, 0); + printf("\nPress Enter to start threads...\n"); + getchar(); + printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); + for (int i = 0; i < numThreads; i++) { + workers.emplace_back([tid=i]() { + uniform_int_distribution sizeDist(0, numSizes - 1); + minstd_rand rnd(tid * 17); + uint8_t* ptrs[numAllocsMax]; + int ptrsz[numAllocsMax]; + for (int i = 0; i < numIter1; ++i) { + thread t([&]() { + for (int i = 0; i < numIter2; ++i) { + const int numAllocs = numAllocsMax - sizeDist(rnd); + for (int j = 0; j < numAllocs; j++) { + const int x = sizeDist(rnd); + const int sz = sizes[x]; + ptrsz[j] = sz; + ptrs[j] = (uint8_t*)je_malloc(sz); + if (!ptrs[j]) { + printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d", sz, tid, i, j, x); + exit(1); + } + for (int k = 0; k < sz; k++) + ptrs[j][k] = tid + k; + } + for (int j = 0; j < numAllocs; j++) { + for (int k = 0, sz = ptrsz[j]; k < sz; k++) + if (ptrs[j][k] != (uint8_t)(tid + k)) { + printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); + exit(1); + } + je_free(ptrs[j]); + } + } + }); + t.join(); + } + }); + } + for (thread& t : workers) { + t.join(); + } + je_malloc_stats_print(NULL, NULL, NULL); + size_t allocated2; + je_mallctl("stats.active", &allocated2, &sz1, NULL, 0); + size_t leaked = allocated2 - allocated1; + printf("\nDone. Leaked: %Id bytes\n", leaked); + bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet) + printf("\nTest %s!\n", (failed ? "FAILED" : "successful")); + printf("\nPress Enter to continue...\n"); + getchar(); + return failed ? 1 : 0; +} diff --git a/msvc/projects/vc2015/test_threads/test_threads.h b/msvc/projects/vc2015/test_threads/test_threads.h new file mode 100644 index 00000000..64d0cdb3 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.h @@ -0,0 +1,3 @@ +#pragma once + +int test_threads(); diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj new file mode 100644 index 00000000..b681e71e --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj @@ -0,0 +1,327 @@ + + + + + Debug-static + Win32 + + + Debug-static + x64 + + + Debug + Win32 + + + Release-static + Win32 + + + Release-static + x64 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {09028CFD-4EB7-491D-869C-0708DB97ED44} + Win32Proj + test_threads + 8.1 + + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + true + v140 + MultiByte + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + Application + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + true + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + true + $(SolutionDir)$(Platform)\$(Configuration)\ + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + Level3 + Disabled + _DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + + + Level3 + Disabled + JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreadedDebug + + + Console + true + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(SolutionDir)$(Platform)\$(Configuration) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level3 + + + MaxSpeed + true + true + JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories) + MultiThreaded + + + Console + true + true + true + $(SolutionDir)$(Platform)\$(Configuration) + jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + {8d6bb292-9e1c-413d-9f98-4864bdc1514a} + + + + + + + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters new file mode 100644 index 00000000..4c233407 --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters @@ -0,0 +1,26 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + + + Header Files + + + \ No newline at end of file diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/projects/vc2015/test_threads/test_threads_main.cpp new file mode 100644 index 00000000..ffd96e6a --- /dev/null +++ b/msvc/projects/vc2015/test_threads/test_threads_main.cpp @@ -0,0 +1,12 @@ +#include "test_threads.h" +#include +#include +#include + +using namespace std::chrono_literals; + +int main(int argc, char** argv) +{ + int rc = test_threads(); + return rc; +} From aac93f414eaeea8b84e14bd9b6a2430828ba700d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 11:25:30 -0800 Subject: [PATCH 44/96] Add symbol mangling for prng_[lg_]range(). --- include/jemalloc/internal/private_symbols.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 95ddf0c8..284410a9 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -362,6 +362,8 @@ pages_unmap pow2_ceil_u32 pow2_ceil_u64 pow2_ceil_zu +prng_lg_range +prng_range prof_active_get prof_active_get_unlocked prof_active_set From ecae12323d44cd739662051a2b9a5965cbe0e965 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 23:41:33 -0800 Subject: [PATCH 45/96] Fix overflow in prng_range(). Add jemalloc_ffs64() and use it instead of jemalloc_ffsl() in prng_range(), since long is not guaranteed to be a 64-bit type. --- configure.ac | 14 +++++++++++++- .../internal/jemalloc_internal_defs.h.in | 8 ++++++-- include/jemalloc/internal/private_symbols.txt | 3 +++ include/jemalloc/internal/prng.h | 2 +- include/jemalloc/internal/util.h | 19 +++++++++++++++++-- 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 8adb6f7b..da4ee3ac 100644 --- a/configure.ac +++ b/configure.ac @@ -201,6 +201,16 @@ else fi AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) +AC_CHECK_SIZEOF([long long]) +if test "x${ac_cv_sizeof_long_long}" = "x8" ; then + LG_SIZEOF_LONG_LONG=3 +elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then + LG_SIZEOF_LONG_LONG=2 +else + AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG]) + AC_CHECK_SIZEOF([intmax_t]) if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then LG_SIZEOF_INTMAX_T=4 @@ -1040,7 +1050,7 @@ dnl ============================================================================ dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found. dnl One of those two functions should (theoretically) exist on all platforms dnl that jemalloc currently has a chance of functioning on without modification. -dnl We additionally assume ffs() or __builtin_ffs() are defined if +dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if dnl ffsl() or __builtin_ffsl() are defined, respectively. JE_COMPILABLE([a program using __builtin_ffsl], [ #include @@ -1053,6 +1063,7 @@ JE_COMPILABLE([a program using __builtin_ffsl], [ } ], [je_cv_gcc_builtin_ffsl]) if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs]) else @@ -1067,6 +1078,7 @@ else } ], [je_cv_function_ffsl]) if test "x${je_cv_function_ffsl}" = "xyes" ; then + AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll]) AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl]) AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs]) else diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index c84e27c9..4bcda716 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -189,9 +189,10 @@ #undef JEMALLOC_TLS /* - * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; - * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use jemalloc_ffs*() from util.h. */ +#undef JEMALLOC_INTERNAL_FFSLL #undef JEMALLOC_INTERNAL_FFSL #undef JEMALLOC_INTERNAL_FFS @@ -241,6 +242,9 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#undef LG_SIZEOF_LONG_LONG + /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 284410a9..8428cf48 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -292,6 +292,9 @@ isqalloc isthreaded ivsalloc ixalloc +jemalloc_ffs +jemalloc_ffs64 +jemalloc_ffsl jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 83c90906..44d67c9a 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -64,7 +64,7 @@ prng_range(uint64_t *state, uint64_t range) assert(range > 1); /* Compute the ceiling of lg(range). */ - lg_range = jemalloc_ffsl(pow2_ceil_u64(range)) - 1; + lg_range = jemalloc_ffs64(pow2_ceil_u64(range)) - 1; /* Generate a result in [0..range) via repeated trial. */ do { diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index dfe5c93c..39f70878 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -121,6 +121,7 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +int jemalloc_ffs64(uint64_t bitmap); int jemalloc_ffsl(long bitmap); int jemalloc_ffs(int bitmap); uint64_t pow2_ceil_u64(uint64_t x); @@ -134,10 +135,24 @@ int get_errno(void); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) /* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) -# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure #endif +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs64(uint64_t bitmap) +{ + +#if LG_SIZEOF_LONG == 3 + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +#elif LG_SIZEOF_LONG_LONG == 3 + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +#else +#error No implementation for 64-bit ffs() +#endif +} + JEMALLOC_ALWAYS_INLINE int jemalloc_ffsl(long bitmap) { From 56139dc4035abc76744ad24844daaba77a721640 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 23:43:17 -0800 Subject: [PATCH 46/96] Remove _WIN32-specific struct timespec declaration. struct timespec is already defined by the system (at least on MinGW). --- include/jemalloc/internal/jemalloc_internal_decls.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 0f29e676..0bca63e5 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -62,11 +62,5 @@ isblank(int c) #include #include -#ifdef _WIN32 -struct timespec { - time_t tv_sec; - long tv_nsec; -}; -#endif #endif /* JEMALLOC_INTERNAL_H */ From fd9cd7a6cc575cab43e22f989c6709ffe0da451f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 23:45:22 -0800 Subject: [PATCH 47/96] Fix time_update() to compile and work on MinGW. --- src/time.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/time.c b/src/time.c index 2fe93e1e..8205c61b 100644 --- a/src/time.c +++ b/src/time.c @@ -161,12 +161,15 @@ time_update(struct timespec *time) time_copy(&old_time, time); #ifdef _WIN32 - FILETIME ft; - uint64_t ticks; - GetSystemTimeAsFileTime(&ft); - ticks = (ft.dwHighDateTime << 32) | ft.dWLowDateTime; - time->tv_sec = ticks / 10000; - time->tv_nsec = ((ticks % 10000) * 100); + { + FILETIME ft; + uint64_t ticks; + GetSystemTimeAsFileTime(&ft); + ticks = (((uint64_t)ft.dwHighDateTime) << 32) | + ft.dwLowDateTime; + time->tv_sec = ticks / 10000000; + time->tv_nsec = ((ticks % 10000000) * 100); + } #elif JEMALLOC_CLOCK_GETTIME if (sysconf(_SC_MONOTONIC_CLOCK) > 0) clock_gettime(CLOCK_MONOTONIC, time); From 788d29d397574396c4c93bf1f90da59dd7efc5cc Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 20 Feb 2016 23:46:14 -0800 Subject: [PATCH 48/96] Fix Windows-specific prof-related compilation portability issues. --- include/msvc_compat/windows_extra.h | 2 -- src/prof.c | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h index 114f43b1..0c5e323f 100644 --- a/include/msvc_compat/windows_extra.h +++ b/include/msvc_compat/windows_extra.h @@ -23,6 +23,4 @@ # define ERANGE ERROR_INVALID_DATA #endif -#define getpid() GetCurrentProcessId() - #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ diff --git a/src/prof.c b/src/prof.c index 3abb38e2..173da69f 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1359,6 +1359,7 @@ label_return: return (ret); } +#ifndef _WIN32 JEMALLOC_FORMAT_PRINTF(1, 2) static int prof_open_maps(const char *format, ...) @@ -1374,6 +1375,18 @@ prof_open_maps(const char *format, ...) return (mfd); } +#endif + +static int +prof_getpid(void) +{ + +#ifdef _WIN32 + return (GetCurrentProcessId()); +#else + return (getpid()); +#endif +} static bool prof_dump_maps(bool propagate_err) @@ -1388,7 +1401,7 @@ prof_dump_maps(bool propagate_err) mfd = -1; // Not implemented #else { - int pid = getpid(); + int pid = prof_getpid(); mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); if (mfd == -1) @@ -1557,12 +1570,12 @@ prof_dump_filename(char *filename, char v, uint64_t vseq) /* "...v.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c%"FMTu64".heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); } else { /* "....heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"FMTu64".%c.heap", - opt_prof_prefix, (int)getpid(), prof_dump_seq, v); + opt_prof_prefix, prof_getpid(), prof_dump_seq, v); } prof_dump_seq++; } From 9bad07903962962de9f656d281b9b1e7e9501c87 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 21 Feb 2016 11:25:02 -0800 Subject: [PATCH 49/96] Refactor time_* into nstime_*. Use a single uint64_t in nstime_t to store nanoseconds rather than using struct timespec. This reduces fragility around conversions between long and uint64_t, especially missing casts that only cause problems on 32-bit platforms. --- Makefile.in | 38 ++- include/jemalloc/internal/arena.h | 6 +- .../jemalloc/internal/jemalloc_internal.h.in | 8 +- include/jemalloc/internal/nstime.h | 48 ++++ include/jemalloc/internal/private_symbols.txt | 24 +- include/jemalloc/internal/time.h | 41 ---- src/arena.c | 53 ++--- src/jemalloc.c | 2 +- src/nstime.c | 148 ++++++++++++ src/time.c | 198 ---------------- test/include/test/jemalloc_test.h.in | 2 +- test/include/test/timer.h | 4 +- test/src/timer.c | 16 +- test/unit/decay.c | 48 ++-- test/unit/mallctl.c | 4 +- test/unit/nstime.c | 220 +++++++++++++++++ test/unit/time.c | 223 ------------------ 17 files changed, 526 insertions(+), 557 deletions(-) create mode 100644 include/jemalloc/internal/nstime.h delete mode 100644 include/jemalloc/internal/time.h create mode 100644 src/nstime.c delete mode 100644 src/time.c create mode 100644 test/unit/nstime.c delete mode 100644 test/unit/time.c diff --git a/Makefile.in b/Makefile.in index e5681926..a4555c03 100644 --- a/Makefile.in +++ b/Makefile.in @@ -78,15 +78,31 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \ - $(srcroot)src/prng.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \ - $(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \ - $(srcroot)src/ticker.c $(srcroot)src/time.c $(srcroot)src/tsd.c \ +C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c \ + $(srcroot)src/base.c \ + $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c \ + $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c \ + $(srcroot)src/hash.c \ + $(srcroot)src/huge.c \ + $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c \ + $(srcroot)src/nstime.c \ + $(srcroot)src/pages.c \ + $(srcroot)src/prng.c \ + $(srcroot)src/prof.c \ + $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c \ + $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c \ + $(srcroot)src/tsd.c \ $(srcroot)src/util.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c @@ -117,7 +133,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ @@ -148,7 +164,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ $(srcroot)test/unit/ticker.c \ - $(srcroot)test/unit/time.c \ + $(srcroot)test/unit/nstime.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ $(srcroot)test/unit/zero.c diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 76d3be19..65d4158b 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -395,7 +395,7 @@ struct arena_s { */ ssize_t decay_time; /* decay_time / SMOOTHSTEP_NSTEPS. */ - struct timespec decay_interval; + nstime_t decay_interval; /* * Time at which the current decay interval logically started. We do * not actually advance to a new epoch until sometime after it starts @@ -403,7 +403,7 @@ struct arena_s { * to completely skip epochs. In all cases, during epoch advancement we * merge all relevant activity into the most recently recorded epoch. */ - struct timespec decay_epoch; + nstime_t decay_epoch; /* decay_deadline randomness generator. */ uint64_t decay_jitter_state; /* @@ -413,7 +413,7 @@ struct arena_s { * decay_interval, but we randomize the deadline to reduce the * likelihood of arenas purging in lockstep. */ - struct timespec decay_deadline; + nstime_t decay_deadline; /* * Number of dirty pages at beginning of current epoch. During epoch * advancement we use the delta between decay_ndirty and ndirty to diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index aa97d7c7..0260b9a8 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -356,7 +356,7 @@ typedef unsigned szind_t; # define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif -#include "jemalloc/internal/time.h" +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -387,7 +387,7 @@ typedef unsigned szind_t; /******************************************************************************/ #define JEMALLOC_H_STRUCTS -#include "jemalloc/internal/time.h" +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -477,7 +477,7 @@ void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); -#include "jemalloc/internal/time.h" +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" @@ -508,7 +508,7 @@ void jemalloc_postfork_child(void); /******************************************************************************/ #define JEMALLOC_H_INLINES -#include "jemalloc/internal/time.h" +#include "jemalloc/internal/nstime.h" #include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h new file mode 100644 index 00000000..bd04f04b --- /dev/null +++ b/include/jemalloc/internal/nstime.h @@ -0,0 +1,48 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ + && _POSIX_MONOTONIC_CLOCK >= 0 + +typedef struct nstime_s nstime_t; + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX 18446744072 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct nstime_s { + uint64_t ns; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); +#ifdef JEMALLOC_JET +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *nstime_update; +#else +bool nstime_update(nstime_t *time); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 8428cf48..c12baadb 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -327,6 +327,19 @@ narenas_tdata_cleanup narenas_total_get ncpus nhbins +nstime_add +nstime_compare +nstime_copy +nstime_divide +nstime_idivide +nstime_imultiply +nstime_init +nstime_init2 +nstime_ns +nstime_nsec +nstime_sec +nstime_subtract +nstime_update opt_abort opt_decay_time opt_dss @@ -484,17 +497,6 @@ ticker_init ticker_read ticker_tick ticker_ticks -time_add -time_compare -time_copy -time_divide -time_idivide -time_imultiply -time_init -time_nsec -time_sec -time_subtract -time_update tsd_arena_get tsd_arena_set tsd_boot diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h deleted file mode 100644 index dd1dd5bd..00000000 --- a/include/jemalloc/internal/time.h +++ /dev/null @@ -1,41 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \ - && _POSIX_MONOTONIC_CLOCK >= 0 - -/* Maximum supported number of seconds (~584 years). */ -#define TIME_SEC_MAX 18446744072 - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void time_init(struct timespec *time, time_t sec, long nsec); -time_t time_sec(const struct timespec *time); -long time_nsec(const struct timespec *time); -void time_copy(struct timespec *time, const struct timespec *source); -int time_compare(const struct timespec *a, const struct timespec *b); -void time_add(struct timespec *time, const struct timespec *addend); -void time_subtract(struct timespec *time, const struct timespec *subtrahend); -void time_imultiply(struct timespec *time, uint64_t multiplier); -void time_idivide(struct timespec *time, uint64_t divisor); -uint64_t time_divide(const struct timespec *time, - const struct timespec *divisor); -#ifdef JEMALLOC_JET -typedef bool (time_update_t)(struct timespec *); -extern time_update_t *time_update; -#else -bool time_update(struct timespec *time); -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/src/arena.c b/src/arena.c index b1078ae9..77c691a1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1224,27 +1224,24 @@ arena_decay_deadline_init(arena_t *arena) * Generate a new deadline that is uniformly random within the next * epoch after the current one. */ - time_copy(&arena->decay_deadline, &arena->decay_epoch); - time_add(&arena->decay_deadline, &arena->decay_interval); + nstime_copy(&arena->decay_deadline, &arena->decay_epoch); + nstime_add(&arena->decay_deadline, &arena->decay_interval); if (arena->decay_time > 0) { - uint64_t decay_interval_ns, r; - struct timespec jitter; + nstime_t jitter; - decay_interval_ns = time_sec(&arena->decay_interval) * - 1000000000 + time_nsec(&arena->decay_interval); - r = prng_range(&arena->decay_jitter_state, decay_interval_ns); - time_init(&jitter, r / 1000000000, r % 1000000000); - time_add(&arena->decay_deadline, &jitter); + nstime_init(&jitter, prng_range(&arena->decay_jitter_state, + nstime_ns(&arena->decay_interval))); + nstime_add(&arena->decay_deadline, &jitter); } } static bool -arena_decay_deadline_reached(const arena_t *arena, const struct timespec *time) +arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) { assert(opt_purge == purge_mode_decay); - return (time_compare(&arena->decay_deadline, time) <= 0); + return (nstime_compare(&arena->decay_deadline, time) <= 0); } static size_t @@ -1276,24 +1273,24 @@ arena_decay_backlog_npages_limit(const arena_t *arena) } static void -arena_decay_epoch_advance(arena_t *arena, const struct timespec *time) +arena_decay_epoch_advance(arena_t *arena, const nstime_t *time) { uint64_t nadvance; - struct timespec delta; + nstime_t delta; size_t ndirty_delta; assert(opt_purge == purge_mode_decay); assert(arena_decay_deadline_reached(arena, time)); - time_copy(&delta, time); - time_subtract(&delta, &arena->decay_epoch); - nadvance = time_divide(&delta, &arena->decay_interval); + nstime_copy(&delta, time); + nstime_subtract(&delta, &arena->decay_epoch); + nadvance = nstime_divide(&delta, &arena->decay_interval); assert(nadvance > 0); /* Add nadvance decay intervals to epoch. */ - time_copy(&delta, &arena->decay_interval); - time_imultiply(&delta, nadvance); - time_add(&arena->decay_epoch, &delta); + nstime_copy(&delta, &arena->decay_interval); + nstime_imultiply(&delta, nadvance); + nstime_add(&arena->decay_epoch, &delta); /* Set a new deadline. */ arena_decay_deadline_init(arena); @@ -1340,12 +1337,12 @@ arena_decay_init(arena_t *arena, ssize_t decay_time) arena->decay_time = decay_time; if (decay_time > 0) { - time_init(&arena->decay_interval, decay_time, 0); - time_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); + nstime_init2(&arena->decay_interval, decay_time, 0); + nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS); } - time_init(&arena->decay_epoch, 0, 0); - time_update(&arena->decay_epoch); + nstime_init(&arena->decay_epoch, 0); + nstime_update(&arena->decay_epoch); arena->decay_jitter_state = (uint64_t)(uintptr_t)arena; arena_decay_deadline_init(arena); arena->decay_ndirty = arena->ndirty; @@ -1357,7 +1354,7 @@ static bool arena_decay_time_valid(ssize_t decay_time) { - return (decay_time >= -1 && decay_time <= TIME_SEC_MAX); + return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX); } ssize_t @@ -1426,7 +1423,7 @@ arena_maybe_purge_ratio(arena_t *arena) static void arena_maybe_purge_decay(arena_t *arena) { - struct timespec time; + nstime_t time; size_t ndirty_limit; assert(opt_purge == purge_mode_decay); @@ -1438,10 +1435,10 @@ arena_maybe_purge_decay(arena_t *arena) return; } - time_copy(&time, &arena->decay_epoch); - if (unlikely(time_update(&time))) { + nstime_copy(&time, &arena->decay_epoch); + if (unlikely(nstime_update(&time))) { /* Time went backwards. Force an epoch advance. */ - time_copy(&time, &arena->decay_deadline); + nstime_copy(&time, &arena->decay_deadline); } if (arena_decay_deadline_reached(arena, &time)) diff --git a/src/jemalloc.c b/src/jemalloc.c index f69d951b..76b4f154 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1151,7 +1151,7 @@ malloc_conf_init(void) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", -1, (sizeof(size_t) << 3) - 1) CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1, - TIME_SEC_MAX); + NSTIME_SEC_MAX); CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true) if (config_fill) { if (CONF_MATCH("junk")) { diff --git a/src/nstime.c b/src/nstime.c new file mode 100644 index 00000000..4cf90b58 --- /dev/null +++ b/src/nstime.c @@ -0,0 +1,148 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +#define BILLION UINT64_C(1000000000) + +void +nstime_init(nstime_t *time, uint64_t ns) +{ + + time->ns = ns; +} + +void +nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) +{ + + time->ns = sec * BILLION + nsec; +} + +uint64_t +nstime_ns(const nstime_t *time) +{ + + return (time->ns); +} + +uint64_t +nstime_sec(const nstime_t *time) +{ + + return (time->ns / BILLION); +} + +uint64_t +nstime_nsec(const nstime_t *time) +{ + + return (time->ns % BILLION); +} + +void +nstime_copy(nstime_t *time, const nstime_t *source) +{ + + *time = *source; +} + +int +nstime_compare(const nstime_t *a, const nstime_t *b) +{ + + return ((a->ns > b->ns) - (a->ns < b->ns)); +} + +void +nstime_add(nstime_t *time, const nstime_t *addend) +{ + + assert(UINT64_MAX - time->ns >= addend->ns); + + time->ns += addend->ns; +} + +void +nstime_subtract(nstime_t *time, const nstime_t *subtrahend) +{ + + assert(nstime_compare(time, subtrahend) >= 0); + + time->ns -= subtrahend->ns; +} + +void +nstime_imultiply(nstime_t *time, uint64_t multiplier) +{ + + assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << + 2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns)); + + time->ns *= multiplier; +} + +void +nstime_idivide(nstime_t *time, uint64_t divisor) +{ + + assert(divisor != 0); + + time->ns /= divisor; +} + +uint64_t +nstime_divide(const nstime_t *time, const nstime_t *divisor) +{ + + assert(divisor->ns != 0); + + return (time->ns / divisor->ns); +} + +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(nstime_update_impl) +#endif +bool +nstime_update(nstime_t *time) +{ + nstime_t old_time; + + nstime_copy(&old_time, time); + +#ifdef _WIN32 + { + FILETIME ft; + uint64_t ticks; + GetSystemTimeAsFileTime(&ft); + ticks = (((uint64_t)ft.dwHighDateTime) << 32) | + ft.dwLowDateTime; + time->ns = ticks * 100; + } +#elif JEMALLOC_CLOCK_GETTIME + { + struct timespec ts; + + if (sysconf(_SC_MONOTONIC_CLOCK) > 0) + clock_gettime(CLOCK_MONOTONIC, &ts); + else + clock_gettime(CLOCK_REALTIME, &ts); + time->ns = ts.tv_sec * BILLION + ts.tv_nsec; + } +#else + struct timeval tv; + gettimeofday(&tv, NULL); + time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000; +#endif + + /* Handle non-monotonic clocks. */ + if (unlikely(nstime_compare(&old_time, time) > 0)) { + nstime_copy(time, &old_time); + return (true); + } + + return (false); +} +#ifdef JEMALLOC_JET +#undef nstime_update +#define nstime_update JEMALLOC_N(nstime_update) +nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl); +#endif diff --git a/src/time.c b/src/time.c deleted file mode 100644 index 8205c61b..00000000 --- a/src/time.c +++ /dev/null @@ -1,198 +0,0 @@ -#include "jemalloc/internal/jemalloc_internal.h" - -#define BILLION 1000000000 - -UNUSED static bool -time_valid(const struct timespec *time) -{ - - if (time->tv_sec > TIME_SEC_MAX) - return (false); - if (time->tv_nsec >= BILLION) - return (false); - - return (true); -} - -void -time_init(struct timespec *time, time_t sec, long nsec) -{ - - time->tv_sec = sec; - time->tv_nsec = nsec; - - assert(time_valid(time)); -} - -time_t -time_sec(const struct timespec *time) -{ - - assert(time_valid(time)); - - return (time->tv_sec); -} - -long -time_nsec(const struct timespec *time) -{ - - assert(time_valid(time)); - - return (time->tv_nsec); -} - -void -time_copy(struct timespec *time, const struct timespec *source) -{ - - assert(time_valid(source)); - - *time = *source; -} - -int -time_compare(const struct timespec *a, const struct timespec *b) -{ - int ret; - - assert(time_valid(a)); - assert(time_valid(b)); - - ret = (a->tv_sec > b->tv_sec) - (a->tv_sec < b->tv_sec); - if (ret == 0) - ret = (a->tv_nsec > b->tv_nsec) - (a->tv_nsec < b->tv_nsec); - - return (ret); -} - -void -time_add(struct timespec *time, const struct timespec *addend) -{ - - assert(time_valid(time)); - assert(time_valid(addend)); - - time->tv_sec += addend->tv_sec; - time->tv_nsec += addend->tv_nsec; - if (time->tv_nsec >= BILLION) { - time->tv_sec++; - time->tv_nsec -= BILLION; - } - - assert(time_valid(time)); -} - -void -time_subtract(struct timespec *time, const struct timespec *subtrahend) -{ - - assert(time_valid(time)); - assert(time_valid(subtrahend)); - assert(time_compare(time, subtrahend) >= 0); - - time->tv_sec -= subtrahend->tv_sec; - if (time->tv_nsec < subtrahend->tv_nsec) { - time->tv_sec--; - time->tv_nsec += BILLION; - } - time->tv_nsec -= subtrahend->tv_nsec; -} - -void -time_imultiply(struct timespec *time, uint64_t multiplier) -{ - time_t sec; - uint64_t nsec; - - assert(time_valid(time)); - - sec = time->tv_sec * multiplier; - nsec = time->tv_nsec * multiplier; - sec += nsec / BILLION; - nsec %= BILLION; - time_init(time, sec, (long)nsec); - - assert(time_valid(time)); -} - -void -time_idivide(struct timespec *time, uint64_t divisor) -{ - time_t sec; - uint64_t nsec; - - assert(time_valid(time)); - - sec = time->tv_sec / divisor; - nsec = ((time->tv_sec % divisor) * BILLION + time->tv_nsec) / divisor; - sec += nsec / BILLION; - nsec %= BILLION; - time_init(time, sec, (long)nsec); - - assert(time_valid(time)); -} - -uint64_t -time_divide(const struct timespec *time, const struct timespec *divisor) -{ - uint64_t t, d; - - assert(time_valid(time)); - assert(time_valid(divisor)); - - t = time_sec(time) * BILLION + time_nsec(time); - d = time_sec(divisor) * BILLION + time_nsec(divisor); - assert(d != 0); - return (t / d); -} - -#ifdef JEMALLOC_JET -#undef time_update -#define time_update JEMALLOC_N(time_update_impl) -#endif -bool -time_update(struct timespec *time) -{ - struct timespec old_time; - - assert(time_valid(time)); - - time_copy(&old_time, time); - -#ifdef _WIN32 - { - FILETIME ft; - uint64_t ticks; - GetSystemTimeAsFileTime(&ft); - ticks = (((uint64_t)ft.dwHighDateTime) << 32) | - ft.dwLowDateTime; - time->tv_sec = ticks / 10000000; - time->tv_nsec = ((ticks % 10000000) * 100); - } -#elif JEMALLOC_CLOCK_GETTIME - if (sysconf(_SC_MONOTONIC_CLOCK) > 0) - clock_gettime(CLOCK_MONOTONIC, time); - else - clock_gettime(CLOCK_REALTIME, time); -#else - struct timeval tv; - gettimeofday(&tv, NULL); - time->tv_sec = tv.tv_sec; - time->tv_nsec = tv.tv_usec * 1000; -#endif - - /* Handle non-monotonic clocks. */ - if (unlikely(time_compare(&old_time, time) > 0)) { - time_copy(time, &old_time); - return (true); - } - - assert(time_valid(time)); - return (false); -} -#ifdef JEMALLOC_JET -#undef time_update -#define time_update JEMALLOC_N(time_update) -time_update_t *time_update = JEMALLOC_N(time_update_impl); -#endif diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 223162e1..4aaaf952 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -94,7 +94,7 @@ # define JEMALLOC_H_STRUCTS # define JEMALLOC_H_EXTERNS # define JEMALLOC_H_INLINES -# include "jemalloc/internal/time.h" +# include "jemalloc/internal/nstime.h" # include "jemalloc/internal/util.h" # include "jemalloc/internal/qr.h" # include "jemalloc/internal/ql.h" diff --git a/test/include/test/timer.h b/test/include/test/timer.h index a791f9ce..0b27e019 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -4,8 +4,8 @@ #include typedef struct { - struct timespec t0; - struct timespec t1; + nstime_t t0; + nstime_t t1; } timedelta_t; void timer_start(timedelta_t *timer); diff --git a/test/src/timer.c b/test/src/timer.c index 15306cfd..e91b3cf2 100644 --- a/test/src/timer.c +++ b/test/src/timer.c @@ -4,26 +4,26 @@ void timer_start(timedelta_t *timer) { - time_init(&timer->t0, 0, 0); - time_update(&timer->t0); + nstime_init(&timer->t0, 0); + nstime_update(&timer->t0); } void timer_stop(timedelta_t *timer) { - time_copy(&timer->t1, &timer->t0); - time_update(&timer->t1); + nstime_copy(&timer->t1, &timer->t0); + nstime_update(&timer->t1); } uint64_t timer_usec(const timedelta_t *timer) { - struct timespec delta; + nstime_t delta; - time_copy(&delta, &timer->t1); - time_subtract(&delta, &timer->t0); - return (time_sec(&delta) * 1000000 + time_nsec(&delta) / 1000); + nstime_copy(&delta, &timer->t1); + nstime_subtract(&delta, &timer->t0); + return (nstime_ns(&delta) / 1000); } void diff --git a/test/unit/decay.c b/test/unit/decay.c index 20730de4..66d54dc8 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -2,19 +2,19 @@ const char *malloc_conf = "purge:decay,decay_time:1"; -static time_update_t *time_update_orig; +static nstime_update_t *nstime_update_orig; static unsigned nupdates_mock; -static struct timespec time_mock; +static nstime_t time_mock; static bool nonmonotonic_mock; static bool -time_update_mock(struct timespec *time) +nstime_update_mock(nstime_t *time) { nupdates_mock++; if (!nonmonotonic_mock) - time_copy(time, &time_mock); + nstime_copy(time, &time_mock); return (nonmonotonic_mock); } @@ -204,7 +204,7 @@ TEST_BEGIN(test_decay_ticker) uint64_t epoch, npurge0, npurge1; size_t sz, tcache_max, large; unsigned i, nupdates0; - struct timespec time, decay_time, deadline; + nstime_t time, decay_time, deadline; test_skip_if(opt_purge != purge_mode_decay); @@ -233,12 +233,12 @@ TEST_BEGIN(test_decay_ticker) } nupdates_mock = 0; - time_init(&time_mock, 0, 0); - time_update(&time_mock); + nstime_init(&time_mock, 0); + nstime_update(&time_mock); nonmonotonic_mock = false; - time_update_orig = time_update; - time_update = time_update_mock; + nstime_update_orig = nstime_update; + nstime_update = nstime_update_mock; for (i = 0; i < NPS; i++) { dallocx(ps[i], flags); @@ -246,16 +246,16 @@ TEST_BEGIN(test_decay_ticker) assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, "Unexpected arena.0.decay failure"); assert_u_gt(nupdates_mock, nupdates0, - "Expected time_update() to be called"); + "Expected nstime_update() to be called"); } - time_update = time_update_orig; + nstime_update = nstime_update_orig; - time_init(&time, 0, 0); - time_update(&time); - time_init(&decay_time, opt_decay_time, 0); - time_copy(&deadline, &time); - time_add(&deadline, &decay_time); + nstime_init(&time, 0); + nstime_update(&time); + nstime_init2(&decay_time, opt_decay_time, 0); + nstime_copy(&deadline, &time); + nstime_add(&deadline, &decay_time); do { for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) { void *p = mallocx(1, flags); @@ -268,8 +268,8 @@ TEST_BEGIN(test_decay_ticker) assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), 0, "Unexpected mallctl failure"); - time_update(&time); - } while (time_compare(&time, &deadline) <= 0 && npurge1 == npurge0); + nstime_update(&time); + } while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0); assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); #undef NPS @@ -300,12 +300,12 @@ TEST_BEGIN(test_decay_nonmonotonic) "Unexpected mallctl failure"); nupdates_mock = 0; - time_init(&time_mock, 0, 0); - time_update(&time_mock); + nstime_init(&time_mock, 0); + nstime_update(&time_mock); nonmonotonic_mock = true; - time_update_orig = time_update; - time_update = time_update_mock; + nstime_update_orig = nstime_update; + nstime_update = nstime_update_mock; for (i = 0; i < NPS; i++) { ps[i] = mallocx(large0, flags); @@ -318,7 +318,7 @@ TEST_BEGIN(test_decay_nonmonotonic) assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0, "Unexpected arena.0.decay failure"); assert_u_gt(nupdates_mock, nupdates0, - "Expected time_update() to be called"); + "Expected nstime_update() to be called"); } assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, @@ -329,7 +329,7 @@ TEST_BEGIN(test_decay_nonmonotonic) assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); - time_update = time_update_orig; + nstime_update = nstime_update_orig; #undef NPS } TEST_END diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index b312fc64..e8dc4926 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -401,7 +401,7 @@ TEST_BEGIN(test_arena_i_decay_time) &decay_time, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); - decay_time = TIME_SEC_MAX; + decay_time = 0x7fffffff; assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL, &decay_time, sizeof(ssize_t)), 0, "Unexpected mallctl() failure"); @@ -567,7 +567,7 @@ TEST_BEGIN(test_arenas_decay_time) &decay_time, sizeof(ssize_t)), EFAULT, "Unexpected mallctl() success"); - decay_time = TIME_SEC_MAX; + decay_time = 0x7fffffff; assert_d_eq(mallctl("arenas.decay_time", NULL, NULL, &decay_time, sizeof(ssize_t)), 0, "Expected mallctl() failure"); diff --git a/test/unit/nstime.c b/test/unit/nstime.c new file mode 100644 index 00000000..cd7d9a6d --- /dev/null +++ b/test/unit/nstime.c @@ -0,0 +1,220 @@ +#include "test/jemalloc_test.h" + +#define BILLION UINT64_C(1000000000) + +TEST_BEGIN(test_nstime_init) +{ + nstime_t nst; + + nstime_init(&nst, 42000000043); + assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read"); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_init2) +{ + nstime_t nst; + + nstime_init2(&nst, 42, 43); + assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read"); + assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read"); +} +TEST_END + +TEST_BEGIN(test_nstime_copy) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_init(&nstb, 0); + nstime_copy(&nstb, &nsta); + assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied"); + assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied"); +} +TEST_END + +TEST_BEGIN(test_nstime_compare) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal"); + assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal"); + + nstime_init2(&nstb, 42, 42); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 42, 44); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); + + nstime_init2(&nstb, 41, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 1, + "nsta should be greater than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), -1, + "nstb should be less than nsta"); + + nstime_init2(&nstb, 43, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), -1, + "nsta should be less than nstb"); + assert_d_eq(nstime_compare(&nstb, &nsta), 1, + "nstb should be greater than nsta"); +} +TEST_END + +TEST_BEGIN(test_nstime_add) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 84, 86); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); + + nstime_init2(&nsta, 42, BILLION - 1); + nstime_copy(&nstb, &nsta); + nstime_add(&nsta, &nstb); + nstime_init2(&nstb, 85, BILLION - 2); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect addition result"); +} +TEST_END + +TEST_BEGIN(test_nstime_subtract) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_subtract(&nsta, &nstb); + nstime_init(&nstb, 0); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); + + nstime_init2(&nsta, 42, 43); + nstime_init2(&nstb, 41, 44); + nstime_subtract(&nsta, &nstb); + nstime_init2(&nstb, 0, BILLION - 1); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect subtraction result"); +} +TEST_END + +TEST_BEGIN(test_nstime_imultiply) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_imultiply(&nsta, 10); + nstime_init2(&nstb, 420, 430); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_imultiply(&nsta, 3); + nstime_init2(&nstb, 127, 999999998); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect multiplication result"); +} +TEST_END + +TEST_BEGIN(test_nstime_idivide) +{ + nstime_t nsta, nstb; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_idivide(&nsta, 10); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 666666666); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 3); + nstime_idivide(&nsta, 3); + assert_d_eq(nstime_compare(&nsta, &nstb), 0, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_divide) +{ + nstime_t nsta, nstb, nstc; + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_add(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 10, + "Incorrect division result"); + + nstime_init2(&nsta, 42, 43); + nstime_copy(&nstb, &nsta); + nstime_imultiply(&nsta, 10); + nstime_init(&nstc, 1); + nstime_subtract(&nsta, &nstc); + assert_u64_eq(nstime_divide(&nsta, &nstb), 9, + "Incorrect division result"); +} +TEST_END + +TEST_BEGIN(test_nstime_update) +{ + nstime_t nst; + + nstime_init(&nst, 0); + + assert_false(nstime_update(&nst), "Basic time update failed."); + + /* Only Rip Van Winkle sleeps this long. */ + { + nstime_t addend; + nstime_init2(&addend, 631152000, 0); + nstime_add(&nst, &addend); + } + { + nstime_t nst0; + nstime_copy(&nst0, &nst); + assert_true(nstime_update(&nst), + "Update should detect time roll-back."); + assert_d_eq(nstime_compare(&nst, &nst0), 0, + "Time should not have been modified"); + } + +} +TEST_END + +int +main(void) +{ + + return (test( + test_nstime_init, + test_nstime_init2, + test_nstime_copy, + test_nstime_compare, + test_nstime_add, + test_nstime_subtract, + test_nstime_imultiply, + test_nstime_idivide, + test_nstime_divide, + test_nstime_update)); +} diff --git a/test/unit/time.c b/test/unit/time.c deleted file mode 100644 index 941e6f13..00000000 --- a/test/unit/time.c +++ /dev/null @@ -1,223 +0,0 @@ -#include "test/jemalloc_test.h" - -#define BILLION 1000000000 - -TEST_BEGIN(test_time_init) -{ - struct timespec ts; - - time_init(&ts, 42, 43); - assert_ld_eq(ts.tv_sec, 42, "tv_sec incorrectly initialized"); - assert_ld_eq(ts.tv_nsec, 43, "tv_nsec incorrectly initialized"); -} -TEST_END - -TEST_BEGIN(test_time_sec) -{ - struct timespec ts; - - time_init(&ts, 42, 43); - assert_ld_eq(time_sec(&ts), 42, "tv_sec incorrectly read"); -} -TEST_END - -TEST_BEGIN(test_time_nsec) -{ - struct timespec ts; - - time_init(&ts, 42, 43); - assert_ld_eq(time_nsec(&ts), 43, "tv_nsec incorrectly read"); -} -TEST_END - -TEST_BEGIN(test_time_copy) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_init(&tsb, 0, 0); - time_copy(&tsb, &tsa); - assert_ld_eq(time_sec(&tsb), 42, "tv_sec incorrectly copied"); - assert_ld_eq(time_nsec(&tsb), 43, "tv_nsec incorrectly copied"); -} -TEST_END - -TEST_BEGIN(test_time_compare) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - assert_d_eq(time_compare(&tsa, &tsb), 0, "Times should be equal"); - assert_d_eq(time_compare(&tsb, &tsa), 0, "Times should be equal"); - - time_init(&tsb, 42, 42); - assert_d_eq(time_compare(&tsa, &tsb), 1, - "tsa should be greater than tsb"); - assert_d_eq(time_compare(&tsb, &tsa), -1, - "tsb should be less than tsa"); - - time_init(&tsb, 42, 44); - assert_d_eq(time_compare(&tsa, &tsb), -1, - "tsa should be less than tsb"); - assert_d_eq(time_compare(&tsb, &tsa), 1, - "tsb should be greater than tsa"); - - time_init(&tsb, 41, BILLION - 1); - assert_d_eq(time_compare(&tsa, &tsb), 1, - "tsa should be greater than tsb"); - assert_d_eq(time_compare(&tsb, &tsa), -1, - "tsb should be less than tsa"); - - time_init(&tsb, 43, 0); - assert_d_eq(time_compare(&tsa, &tsb), -1, - "tsa should be less than tsb"); - assert_d_eq(time_compare(&tsb, &tsa), 1, - "tsb should be greater than tsa"); -} -TEST_END - -TEST_BEGIN(test_time_add) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_add(&tsa, &tsb); - time_init(&tsb, 84, 86); - assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result"); - - time_init(&tsa, 42, BILLION - 1); - time_copy(&tsb, &tsa); - time_add(&tsa, &tsb); - time_init(&tsb, 85, BILLION - 2); - assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result"); -} -TEST_END - -TEST_BEGIN(test_time_subtract) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_subtract(&tsa, &tsb); - time_init(&tsb, 0, 0); - assert_d_eq(time_compare(&tsa, &tsb), 0, - "Incorrect subtraction result"); - - time_init(&tsa, 42, 43); - time_init(&tsb, 41, 44); - time_subtract(&tsa, &tsb); - time_init(&tsb, 0, BILLION - 1); - assert_d_eq(time_compare(&tsa, &tsb), 0, - "Incorrect subtraction result"); -} -TEST_END - -TEST_BEGIN(test_time_imultiply) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_imultiply(&tsa, 10); - time_init(&tsb, 420, 430); - assert_d_eq(time_compare(&tsa, &tsb), 0, - "Incorrect multiplication result"); - - time_init(&tsa, 42, 666666666); - time_imultiply(&tsa, 3); - time_init(&tsb, 127, 999999998); - assert_d_eq(time_compare(&tsa, &tsb), 0, - "Incorrect multiplication result"); -} -TEST_END - -TEST_BEGIN(test_time_idivide) -{ - struct timespec tsa, tsb; - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_imultiply(&tsa, 10); - time_idivide(&tsa, 10); - assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result"); - - time_init(&tsa, 42, 666666666); - time_copy(&tsb, &tsa); - time_imultiply(&tsa, 3); - time_idivide(&tsa, 3); - assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result"); -} -TEST_END - -TEST_BEGIN(test_time_divide) -{ - struct timespec tsa, tsb, tsc; - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_imultiply(&tsa, 10); - assert_u64_eq(time_divide(&tsa, &tsb), 10, - "Incorrect division result"); - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_imultiply(&tsa, 10); - time_init(&tsc, 0, 1); - time_add(&tsa, &tsc); - assert_u64_eq(time_divide(&tsa, &tsb), 10, - "Incorrect division result"); - - time_init(&tsa, 42, 43); - time_copy(&tsb, &tsa); - time_imultiply(&tsa, 10); - time_init(&tsc, 0, 1); - time_subtract(&tsa, &tsc); - assert_u64_eq(time_divide(&tsa, &tsb), 9, "Incorrect division result"); -} -TEST_END - -TEST_BEGIN(test_time_update) -{ - struct timespec ts; - - time_init(&ts, 0, 0); - - assert_false(time_update(&ts), "Basic time update failed."); - - /* Only Rip Van Winkle sleeps this long. */ - { - struct timespec addend; - time_init(&addend, 631152000, 0); - time_add(&ts, &addend); - } - { - struct timespec ts0; - time_copy(&ts0, &ts); - assert_true(time_update(&ts), - "Update should detect time roll-back."); - assert_d_eq(time_compare(&ts, &ts0), 0, - "Time should not have been modified"); - } - -} -TEST_END - -int -main(void) -{ - - return (test( - test_time_init, - test_time_sec, - test_time_nsec, - test_time_copy, - test_time_compare, - test_time_add, - test_time_subtract, - test_time_imultiply, - test_time_idivide, - test_time_divide, - test_time_update)); -} From 817d9030a5811f98c43b10ac53b8f17180dbc44f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Feb 2016 10:44:58 -0800 Subject: [PATCH 50/96] Indentation style cleanup. --- include/jemalloc/internal/arena.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 65d4158b..561b5886 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -167,8 +167,8 @@ struct arena_chunk_map_misc_s { /* Profile counters, used for large object runs. */ union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; }; /* Small region run metadata. */ @@ -233,28 +233,28 @@ struct arena_chunk_s { */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ - size_t reg_size; + size_t reg_size; /* Redzone size. */ - size_t redzone_size; + size_t redzone_size; /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; + size_t reg_interval; /* Total size of a run for this bin's size class. */ - size_t run_size; + size_t run_size; /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; + uint32_t nregs; /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ - bitmap_info_t bitmap_info; + bitmap_info_t bitmap_info; /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; + uint32_t reg0_offset; }; struct arena_bin_s { @@ -264,13 +264,13 @@ struct arena_bin_s { * which may be acquired while holding one or more bin locks, but not * vise versa. */ - malloc_mutex_t lock; + malloc_mutex_t lock; /* * Current run being used to service allocations of this bin's size * class. */ - arena_run_t *runcur; + arena_run_t *runcur; /* * Tree of non-full runs. This tree is used when looking for an @@ -279,10 +279,10 @@ struct arena_bin_s { * objects packed well, and it can also help reduce the number of * almost-empty chunks. */ - arena_run_tree_t runs; + arena_run_tree_t runs; /* Bin statistics. */ - malloc_bin_stats_t stats; + malloc_bin_stats_t stats; }; struct arena_s { From a9a46847925e38373e6a5da250c0cecb11a8277b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Feb 2016 14:58:05 -0800 Subject: [PATCH 51/96] Test run quantization. Also rename run_quantize_*() to improve clarity. These tests demonstrate that run_quantize_ceil() is flawed. --- Makefile.in | 1 + include/jemalloc/internal/arena.h | 6 + include/jemalloc/internal/private_symbols.txt | 2 + src/arena.c | 38 +++-- test/unit/run_quantize.c | 157 ++++++++++++++++++ 5 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 test/unit/run_quantize.c diff --git a/Makefile.in b/Makefile.in index a4555c03..f60823f5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -159,6 +159,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/run_quantize.c \ $(srcroot)test/unit/SFMT.c \ $(srcroot)test/unit/size_classes.c \ $(srcroot)test/unit/smoothstep.c \ diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 561b5886..f98aeb80 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -494,9 +494,15 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ +extern size_t small_maxrun; /* Max run size for small size classes. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ +#ifdef JEMALLOC_JET +typedef size_t (run_quantize_t)(size_t); +extern run_quantize_t *run_quantize_floor; +extern run_quantize_t *run_quantize_ceil; +#endif void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache); void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index c12baadb..3e37a61a 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -445,6 +445,8 @@ rtree_subtree_read_hard rtree_subtree_tryread rtree_val_read rtree_val_write +run_quantize_ceil +run_quantize_floor s2u s2u_compute s2u_lookup diff --git a/src/arena.c b/src/arena.c index 77c691a1..ff5b5fb8 100644 --- a/src/arena.c +++ b/src/arena.c @@ -21,7 +21,7 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -static size_t small_maxrun; /* Max run size used for small size classes. */ +size_t small_maxrun; /* Max run size for small size classes. */ static bool *small_run_tab; /* Valid small run page multiples. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ @@ -100,8 +100,12 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, rb_link, arena_run_comp) +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl) +#endif static size_t -run_quantize(size_t size) +run_quantize_floor(size_t size) { size_t qsize; @@ -119,13 +123,18 @@ run_quantize(size_t size) */ qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize(size - large_pad)); + return (run_quantize_floor(size - large_pad)); assert(qsize <= size); return (qsize); } +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor) +run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl); +#endif static size_t -run_quantize_next(size_t size) +run_quantize_ceil_hard(size_t size) { size_t large_run_size_next; @@ -158,10 +167,14 @@ run_quantize_next(size_t size) } } +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl) +#endif static size_t -run_quantize_first(size_t size) +run_quantize_ceil(size_t size) { - size_t qsize = run_quantize(size); + size_t qsize = run_quantize_floor(size); if (qsize < size) { /* @@ -172,10 +185,15 @@ run_quantize_first(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - qsize = run_quantize_next(size); + qsize = run_quantize_ceil_hard(size); } return (qsize); } +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) +run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl); +#endif JEMALLOC_INLINE_C int arena_avail_comp(const arena_chunk_map_misc_t *a, @@ -183,9 +201,9 @@ arena_avail_comp(const arena_chunk_map_misc_t *a, { int ret; uintptr_t a_miscelm = (uintptr_t)a; - size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ? + size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ? arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); - size_t b_qsize = run_quantize(arena_miscelm_size_get(b)); + size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b)); /* * Compare based on quantized size rather than size, in order to sort @@ -1081,7 +1099,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - size_t search_size = run_quantize_first(size); + size_t search_size = run_quantize_ceil(size); arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); arena_chunk_map_misc_t *miscelm = arena_avail_tree_nsearch(&arena->runs_avail, key); diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c new file mode 100644 index 00000000..aff4056b --- /dev/null +++ b/test/unit/run_quantize.c @@ -0,0 +1,157 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_small_run_size) +{ + unsigned nbins, i; + size_t sz, run_size; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all small size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nbins; i++) { + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL, 0), + 0, "Unexpected mallctlbymib failure"); + assert_zu_eq(run_size, run_quantize_floor(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + assert_zu_eq(run_size, run_quantize_ceil(run_size), + "Small run quantization should be a no-op (run_size=%zu)", + run_size); + } +} +TEST_END + +TEST_BEGIN(test_large_run_size) +{ + bool cache_oblivious; + unsigned nlruns, i; + size_t sz, run_size_prev, ceil_prev; + size_t mib[4]; + size_t miblen = sizeof(mib) / sizeof(size_t); + + /* + * Iterate over all large size classes, get their run sizes, and verify + * that the quantized size is the same as the run size. + */ + + sz = sizeof(bool); + assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0, + "Unexpected mallctlnametomib failure"); + for (i = 0; i < nlruns; i++) { + size_t lrun_size, run_size, floor, ceil; + + mib[2] = i; + sz = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &lrun_size, &sz, NULL, 0), + 0, "Unexpected mallctlbymib failure"); + run_size = cache_oblivious ? lrun_size + PAGE : lrun_size; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_eq(run_size, floor, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + assert_zu_eq(run_size, ceil, + "Large run quantization should be a no-op for precise " + "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size); + + if (i > 0) { + assert_zu_eq(run_size_prev, run_quantize_floor(run_size + - PAGE), "Floor should be a precise size"); + if (run_size_prev < ceil_prev) { + assert_zu_eq(ceil_prev, run_size, + "Ceiling should be a precise size " + "(run_size_prev=%zu, ceil_prev=%zu, " + "run_size=%zu)", run_size_prev, ceil_prev, + run_size); + } + } + run_size_prev = floor; + ceil_prev = run_quantize_ceil(run_size + PAGE); + } +} +TEST_END + +TEST_BEGIN(test_monotonic) +{ + bool cache_oblivious; + unsigned nbins, nlruns, i; + size_t sz, max_run_size, floor_prev, ceil_prev; + + /* + * Iterate over all run sizes and verify that + * run_quantize_{floor,ceil}() are monotonic. + */ + + sz = sizeof(bool); + assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz, + NULL, 0), 0, "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + sz = sizeof(unsigned); + assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0, + "Unexpected mallctl failure"); + + max_run_size = (large_maxclass > small_maxrun) ? large_maxclass : + small_maxrun; + + floor_prev = 0; + ceil_prev = 0; + for (i = 1; i < max_run_size >> LG_PAGE; i++) { + size_t run_size, floor, ceil; + + run_size = i << LG_PAGE; + floor = run_quantize_floor(run_size); + ceil = run_quantize_ceil(run_size); + + assert_zu_le(floor, run_size, + "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + assert_zu_ge(ceil, run_size, + "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)", + floor, run_size, ceil); + + assert_zu_le(floor_prev, floor, "Floor should be monotonic " + "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)", + floor_prev, floor, run_size, ceil); + assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic " + "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)", + floor, run_size, ceil_prev, ceil); + + floor_prev = floor; + ceil_prev = ceil; + } +} +TEST_END + +int +main(void) +{ + + return (test( + test_small_run_size, + test_large_run_size, + test_monotonic)); +} From 08551eee586eefa8c98f33b97679f259af50afab Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Feb 2016 15:01:37 -0800 Subject: [PATCH 52/96] Fix run_quantize_ceil(). In practice this bug had limited impact (and then only by increasing chunk fragmentation) because run_quantize_ceil() returned correct results except for inputs that could only arise from aligned allocation requests that required more than page alignment. This bug existed in the original run quantization implementation, which was introduced by 8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index randomization for large allocations.). --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index ff5b5fb8..97dea843 100644 --- a/src/arena.c +++ b/src/arena.c @@ -185,7 +185,7 @@ run_quantize_ceil(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - qsize = run_quantize_ceil_hard(size); + qsize = run_quantize_ceil_hard(qsize); } return (qsize); } From 0da8ce1e96bedff697f7133c8cfb328390b6d11d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 22 Feb 2016 16:20:56 -0800 Subject: [PATCH 53/96] Use table lookup for run_quantize_{floor,ceil}(). Reduce run quantization overhead by generating lookup tables during bootstrapping, and using the tables for all subsequent run quantization. --- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 107 ++++++++++++++---- test/unit/run_quantize.c | 12 +- 4 files changed, 90 insertions(+), 32 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f98aeb80..8dc6852d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -494,7 +494,7 @@ extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t map_misc_offset; extern size_t arena_maxrun; /* Max run size for arenas. */ extern size_t large_maxclass; /* Max large size class. */ -extern size_t small_maxrun; /* Max run size for small size classes. */ +extern size_t run_quantize_max; /* Max run_quantize_*() input. */ extern unsigned nlclasses; /* Number of large size classes. */ extern unsigned nhclasses; /* Number of huge size classes. */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 3e37a61a..761aa754 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -447,6 +447,7 @@ rtree_val_read rtree_val_write run_quantize_ceil run_quantize_floor +run_quantize_max s2u s2u_compute s2u_lookup diff --git a/src/arena.c b/src/arena.c index 97dea843..c4149461 100644 --- a/src/arena.c +++ b/src/arena.c @@ -21,8 +21,11 @@ size_t map_bias; size_t map_misc_offset; size_t arena_maxrun; /* Max run size for arenas. */ size_t large_maxclass; /* Max large size class. */ -size_t small_maxrun; /* Max run size for small size classes. */ +size_t run_quantize_max; /* Max run_quantize_*() input. */ +static size_t small_maxrun; /* Max run size for small size classes. */ static bool *small_run_tab; /* Valid small run page multiples. */ +static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ +static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ @@ -100,12 +103,8 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, rb_link, arena_run_comp) -#ifdef JEMALLOC_JET -#undef run_quantize_floor -#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl) -#endif static size_t -run_quantize_floor(size_t size) +run_quantize_floor_compute(size_t size) { size_t qsize; @@ -123,18 +122,13 @@ run_quantize_floor(size_t size) */ qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad; if (qsize <= SMALL_MAXCLASS + large_pad) - return (run_quantize_floor(size - large_pad)); + return (run_quantize_floor_compute(size - large_pad)); assert(qsize <= size); return (qsize); } -#ifdef JEMALLOC_JET -#undef run_quantize_floor -#define run_quantize_floor JEMALLOC_N(run_quantize_floor) -run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl); -#endif static size_t -run_quantize_ceil_hard(size_t size) +run_quantize_ceil_compute_hard(size_t size) { size_t large_run_size_next; @@ -167,14 +161,10 @@ run_quantize_ceil_hard(size_t size) } } -#ifdef JEMALLOC_JET -#undef run_quantize_ceil -#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl) -#endif static size_t -run_quantize_ceil(size_t size) +run_quantize_ceil_compute(size_t size) { - size_t qsize = run_quantize_floor(size); + size_t qsize = run_quantize_floor_compute(size); if (qsize < size) { /* @@ -185,10 +175,51 @@ run_quantize_ceil(size_t size) * search would potentially find sufficiently aligned available * memory somewhere lower. */ - qsize = run_quantize_ceil_hard(qsize); + qsize = run_quantize_ceil_compute_hard(qsize); } return (qsize); } + +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor_impl) +#endif +static size_t +run_quantize_floor(size_t size) +{ + size_t ret; + + assert(size > 0); + assert(size <= run_quantize_max); + assert((size & PAGE_MASK) == 0); + + ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1]; + assert(ret == run_quantize_floor_compute(size)); + return (ret); +} +#ifdef JEMALLOC_JET +#undef run_quantize_floor +#define run_quantize_floor JEMALLOC_N(run_quantize_floor) +run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl); +#endif + +#ifdef JEMALLOC_JET +#undef run_quantize_ceil +#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl) +#endif +static size_t +run_quantize_ceil(size_t size) +{ + size_t ret; + + assert(size > 0); + assert(size <= run_quantize_max); + assert((size & PAGE_MASK) == 0); + + ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1]; + assert(ret == run_quantize_ceil_compute(size)); + return (ret); +} #ifdef JEMALLOC_JET #undef run_quantize_ceil #define run_quantize_ceil JEMALLOC_N(run_quantize_ceil) @@ -3522,6 +3553,35 @@ small_run_size_init(void) return (false); } +static bool +run_quantize_init(void) +{ + unsigned i; + + run_quantize_max = chunksize + large_pad; + + run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) * + (run_quantize_max >> LG_PAGE)); + if (run_quantize_floor_tab == NULL) + return (true); + + run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) * + (run_quantize_max >> LG_PAGE)); + if (run_quantize_ceil_tab == NULL) + return (true); + + for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) { + size_t run_size = i << LG_PAGE; + + run_quantize_floor_tab[i-1] = + run_quantize_floor_compute(run_size); + run_quantize_ceil_tab[i-1] = + run_quantize_ceil_compute(run_size); + } + + return (false); +} + bool arena_boot(void) { @@ -3570,7 +3630,12 @@ arena_boot(void) nhclasses = NSIZES - nlclasses - NBINS; bin_info_init(); - return (small_run_size_init()); + if (small_run_size_init()) + return (true); + if (run_quantize_init()) + return (true); + + return (false); } void diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c index aff4056b..f6a2f74f 100644 --- a/test/unit/run_quantize.c +++ b/test/unit/run_quantize.c @@ -93,19 +93,14 @@ TEST_END TEST_BEGIN(test_monotonic) { - bool cache_oblivious; unsigned nbins, nlruns, i; - size_t sz, max_run_size, floor_prev, ceil_prev; + size_t sz, floor_prev, ceil_prev; /* * Iterate over all run sizes and verify that * run_quantize_{floor,ceil}() are monotonic. */ - sz = sizeof(bool); - assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz, - NULL, 0), 0, "Unexpected mallctl failure"); - sz = sizeof(unsigned); assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0, "Unexpected mallctl failure"); @@ -114,12 +109,9 @@ TEST_BEGIN(test_monotonic) assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0, "Unexpected mallctl failure"); - max_run_size = (large_maxclass > small_maxrun) ? large_maxclass : - small_maxrun; - floor_prev = 0; ceil_prev = 0; - for (i = 1; i < max_run_size >> LG_PAGE; i++) { + for (i = 1; i < run_quantize_max >> LG_PAGE; i++) { size_t run_size, floor, ceil; run_size = i << LG_PAGE; From 2b1fc90b7b109c5efac7974b8f9abe269ecb6daf Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 17 Feb 2016 06:56:14 -0800 Subject: [PATCH 54/96] Remove rbt_nil Since this is an intrusive tree, rbt_nil is the whole size of the node and can be quite large. For example, miscelm is ~100 bytes. --- include/jemalloc/internal/rb.h | 154 ++++++++++++++------------------- test/unit/rb.c | 41 +++++---- 2 files changed, 86 insertions(+), 109 deletions(-) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h index 30ccab44..3770342f 100644 --- a/include/jemalloc/internal/rb.h +++ b/include/jemalloc/internal/rb.h @@ -42,7 +42,6 @@ struct { \ #define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ - a_type rbt_nil; \ } /* Left accessors. */ @@ -84,8 +83,8 @@ struct { \ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ /* Bookkeeping bit cannot be used by node pointer. */ \ assert(((uintptr_t)(a_node) & 0x1) == 0); \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) #else @@ -111,25 +110,23 @@ struct { \ /* Node initializer. */ #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) #endif /* Tree initializer. */ #define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ + (a_rbt)->rbt_root = NULL; \ } while (0) /* Internal utility macros. */ #define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ + if ((r_node) != NULL) { \ for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ } \ } \ @@ -137,10 +134,9 @@ struct { \ #define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ + if ((r_node) != NULL) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ } \ } \ } while (0) @@ -335,8 +331,8 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ * Args: * tree: Pointer to an initialized red-black tree object. * cb : Callback function, which, if non-null, is called for each node - * during iteration. There is no way to stop iteration once it has - * begun. + * during iteration. There is no way to stop iteration once it + * has begun. * arg : Opaque pointer passed to cb(). */ #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ @@ -346,36 +342,30 @@ a_prefix##new(a_rbt_type *rbtree) { \ } \ a_attr bool \ a_prefix##empty(a_rbt_type *rbtree) { \ - return (rbtree->rbt_root == &rbtree->rbt_nil); \ + return (rbtree->rbt_root == NULL); \ } \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_right_get(a_type, a_field, node) != NULL) { \ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -386,24 +376,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_left_get(a_type, a_field, node) != NULL) { \ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -414,12 +401,9 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ @@ -427,7 +411,7 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ + while (ret != NULL \ && (cmp = (a_cmp)(key, ret)) != 0) { \ if (cmp < 0) { \ ret = rbtn_left_get(a_type, a_field, ret); \ @@ -435,17 +419,14 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ ret = tnode; \ @@ -457,17 +438,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ tnode = rbtn_left_get(a_type, a_field, tnode); \ @@ -479,9 +457,6 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ return (ret); \ } \ a_attr void \ @@ -493,7 +468,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbt_node_new(a_type, a_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ @@ -513,7 +488,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_left_set(a_type, a_field, cnode, left); \ if (rbtn_red_get(a_type, a_field, left)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* Fix up 4-node. */ \ a_type *tnode; \ rbtn_black_set(a_type, a_field, leftleft); \ @@ -528,7 +504,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_right_set(a_type, a_field, cnode, right); \ if (rbtn_red_get(a_type, a_field, right)) { \ a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ + if (left != NULL && rbtn_red_get(a_type, a_field, \ + left)) { \ /* Split 4-node. */ \ rbtn_black_set(a_type, a_field, left); \ rbtn_black_set(a_type, a_field, right); \ @@ -561,7 +538,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Wind. */ \ nodep = NULL; /* Silence compiler warning. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -573,7 +550,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ + for (pathp++; pathp->node != NULL; \ pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -616,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ + if (left != NULL) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ assert(!rbtn_red_get(a_type, a_field, node)); \ @@ -636,33 +613,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ + rbtree->rbt_root = NULL; \ return; \ } \ } \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ /* Prune red node, which requires no fixup. */ \ assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ return; \ } \ /* The node to be pruned is black, so unwind until balance is */\ /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ + pathp->node = NULL; \ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ assert(pathp->cmp != 0); \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* In the following diagrams, ||, //, and \\ */\ /* indicate the path to the removed node. */\ /* */\ @@ -705,7 +681,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* || */\ /* pathp(b) */\ /* // \ */\ @@ -759,7 +736,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ left); \ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + if (leftrightleft != NULL && rbtn_red_get(a_type, \ + a_field, leftrightleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -785,7 +763,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* (b) */\ /* / */\ /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ + assert(leftright != NULL); \ rbtn_red_set(a_type, a_field, leftright); \ rbtn_rotate_right(a_type, a_field, pathp->node, \ tnode); \ @@ -808,7 +786,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(r) */\ /* / \\ */\ @@ -846,7 +825,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -892,13 +872,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ + a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ + arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -912,8 +892,8 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if (cmp < 0) { \ a_type *ret; \ if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ @@ -940,21 +920,18 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return (NULL); \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -969,8 +946,8 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if (cmp > 0) { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ return (ret); \ } \ return (a_prefix##reverse_iter_recurse(rbtree, \ @@ -998,23 +975,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ return (ret); \ } \ a_attr void \ a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ + if (node == NULL) { \ return; \ } \ a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ node), cb, arg); \ - rbtn_left_set(a_type, a_field, (node), &rbtree->rbt_nil); \ + rbtn_left_set(a_type, a_field, (node), NULL); \ a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ node), cb, arg); \ - rbtn_right_set(a_type, a_field, (node), &rbtree->rbt_nil); \ + rbtn_right_set(a_type, a_field, (node), NULL); \ if (cb) { \ cb(node, arg); \ } \ @@ -1023,7 +997,7 @@ a_attr void \ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ void *arg) { \ a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ - rbtree->rbt_root = &rbtree->rbt_nil; \ + rbtree->rbt_root = NULL; \ } #endif /* RB_H_ */ diff --git a/test/unit/rb.c b/test/unit/rb.c index 14132c13..cf3d3a78 100644 --- a/test/unit/rb.c +++ b/test/unit/rb.c @@ -3,7 +3,7 @@ #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \ a_type *rbp_bh_t; \ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \ - rbp_bh_t != &(a_rbt)->rbt_nil; \ + rbp_bh_t != NULL; \ rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \ (r_height)++; \ @@ -68,38 +68,43 @@ TEST_BEGIN(test_rb_empty) TEST_END static unsigned -tree_recurse(node_t *node, unsigned black_height, unsigned black_depth, - node_t *nil) +tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) { unsigned ret = 0; - node_t *left_node = rbtn_left_get(node_t, link, node); - node_t *right_node = rbtn_right_get(node_t, link, node); + node_t *left_node; + node_t *right_node; + + if (node == NULL) + return (ret); + + left_node = rbtn_left_get(node_t, link, node); + right_node = rbtn_right_get(node_t, link, node); if (!rbtn_red_get(node_t, link, node)) black_depth++; /* Red nodes must be interleaved with black nodes. */ if (rbtn_red_get(node_t, link, node)) { - assert_false(rbtn_red_get(node_t, link, left_node), - "Node should be black"); - assert_false(rbtn_red_get(node_t, link, right_node), - "Node should be black"); + if (left_node != NULL) + assert_false(rbtn_red_get(node_t, link, left_node), + "Node should be black"); + if (right_node != NULL) + assert_false(rbtn_red_get(node_t, link, right_node), + "Node should be black"); } - if (node == nil) - return (ret); /* Self. */ assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic"); /* Left subtree. */ - if (left_node != nil) - ret += tree_recurse(left_node, black_height, black_depth, nil); + if (left_node != NULL) + ret += tree_recurse(left_node, black_height, black_depth); else ret += (black_depth != black_height); /* Right subtree. */ - if (right_node != nil) - ret += tree_recurse(right_node, black_height, black_depth, nil); + if (right_node != NULL) + ret += tree_recurse(right_node, black_height, black_depth); else ret += (black_depth != black_height); @@ -181,8 +186,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) node->magic = 0; rbtn_black_height(node_t, link, tree, black_height); - imbalances = tree_recurse(tree->rbt_root, black_height, 0, - &(tree->rbt_nil)); + imbalances = tree_recurse(tree->rbt_root, black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); assert_u_eq(tree_iterate(tree), nnodes-1, "Unexpected node iteration count"); @@ -253,7 +257,6 @@ TEST_BEGIN(test_rb_random) for (j = 1; j <= NNODES; j++) { /* Initialize tree and nodes. */ tree_new(&tree); - tree.rbt_nil.magic = 0; for (k = 0; k < j; k++) { nodes[k].magic = NODE_MAGIC; nodes[k].key = bag[k]; @@ -266,7 +269,7 @@ TEST_BEGIN(test_rb_random) rbtn_black_height(node_t, link, &tree, black_height); imbalances = tree_recurse(tree.rbt_root, - black_height, 0, &(tree.rbt_nil)); + black_height, 0); assert_u_eq(imbalances, 0, "Tree is unbalanced"); From 3417a304ccde61ac1f68b436ec22c03f1d6824ec Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Tue, 23 Feb 2016 12:06:21 -0800 Subject: [PATCH 55/96] Separate arena_avail trees Separate run trees by index, replacing the previous quantize logic. Quantization by index is now performed only on insertion / removal from the tree, and not on node comparison, saving some cpu. This also means we don't have to dereference the miscelm* pointers, saving half of the memory loads from miscelms/mapbits that have fallen out of cache. A linear scan of the indicies appears to be fast enough. The only cost of this is an extra tree array in each arena. --- include/jemalloc/internal/arena.h | 12 +-- src/arena.c | 142 +++++++++++------------------- 2 files changed, 58 insertions(+), 96 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 8dc6852d..2548082b 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -351,12 +351,6 @@ struct arena_s { */ size_t ndirty; - /* - * Size/address-ordered tree of this arena's available runs. The tree - * is used for first-best-fit run allocation. - */ - arena_avail_tree_t runs_avail; - /* * Unused dirty memory this arena manages. Dirty memory is conceptually * tracked as an arbitrarily interleaved LRU of dirty runs and cached @@ -462,6 +456,12 @@ struct arena_s { /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; + + /* + * Quantized address-ordered trees of this arena's available runs. The + * trees are used for first-best-fit run allocation. + */ + arena_avail_tree_t runs_avail[1]; /* Dynamically sized. */ }; /* Used in conjunction with tsd for fast arena-related context lookup. */ diff --git a/src/arena.c b/src/arena.c index c4149461..06422727 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,6 +28,8 @@ static size_t *run_quantize_floor_tab; /* run_quantize_floor() memoization. */ static size_t *run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */ unsigned nlclasses; /* Number of large size classes. */ unsigned nhclasses; /* Number of huge size classes. */ +static szind_t runs_avail_bias; /* Size index for first runs_avail tree. */ +static szind_t runs_avail_nclasses; /* Number of runs_avail trees. */ /******************************************************************************/ /* @@ -45,42 +47,12 @@ static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, /******************************************************************************/ -#define CHUNK_MAP_KEY ((uintptr_t)0x1U) - -JEMALLOC_INLINE_C arena_chunk_map_misc_t * -arena_miscelm_key_create(size_t size) -{ - - return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) | - CHUNK_MAP_KEY)); -} - -JEMALLOC_INLINE_C bool -arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm) -{ - - return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0); -} - -#undef CHUNK_MAP_KEY - -JEMALLOC_INLINE_C size_t -arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm) -{ - - assert(arena_miscelm_is_key(miscelm)); - - return (arena_mapbits_size_decode((uintptr_t)miscelm)); -} - JEMALLOC_INLINE_C size_t arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) { arena_chunk_t *chunk; size_t pageind, mapbits; - assert(!arena_miscelm_is_key(miscelm)); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); pageind = arena_miscelm_to_pageind(miscelm); mapbits = arena_mapbits_get(chunk, pageind); @@ -88,7 +60,8 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm) } JEMALLOC_INLINE_C int -arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) +arena_run_addr_comp(const arena_chunk_map_misc_t *a, + const arena_chunk_map_misc_t *b) { uintptr_t a_miscelm = (uintptr_t)a; uintptr_t b_miscelm = (uintptr_t)b; @@ -101,7 +74,7 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b) /* Generate red-black tree functions. */ rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t, - rb_link, arena_run_comp) + rb_link, arena_run_addr_comp) static size_t run_quantize_floor_compute(size_t size) @@ -226,61 +199,42 @@ run_quantize_ceil(size_t size) run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl); #endif -JEMALLOC_INLINE_C int -arena_avail_comp(const arena_chunk_map_misc_t *a, - const arena_chunk_map_misc_t *b) -{ - int ret; - uintptr_t a_miscelm = (uintptr_t)a; - size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ? - arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a)); - size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b)); - - /* - * Compare based on quantized size rather than size, in order to sort - * equally useful runs only by address. - */ - ret = (a_qsize > b_qsize) - (a_qsize < b_qsize); - if (ret == 0) { - if (!arena_miscelm_is_key(a)) { - uintptr_t b_miscelm = (uintptr_t)b; - - ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm); - } else { - /* - * Treat keys as if they are lower than anything else. - */ - ret = -1; - } - } - - return (ret); -} - /* Generate red-black tree functions. */ rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_misc_t, rb_link, arena_avail_comp) + arena_chunk_map_misc_t, rb_link, arena_run_addr_comp) + +static arena_avail_tree_t * +arena_runs_avail_get(arena_t *arena, szind_t ind) +{ + + assert(ind >= runs_avail_bias); + assert(ind - runs_avail_bias < runs_avail_nclasses); + + return (&arena->runs_avail[ind - runs_avail_bias]); +} static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + arena_avail_tree_insert(arena_runs_avail_get(arena, ind), + arena_miscelm_get(chunk, pageind)); } static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, size_t npages) { - + szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get( + arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk, - pageind)); + arena_avail_tree_remove(arena_runs_avail_get(arena, ind), + arena_miscelm_get(chunk, pageind)); } static void @@ -770,7 +724,6 @@ arena_chunk_alloc(arena_t *arena) return (NULL); } - /* Insert the run into the runs_avail tree. */ arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias); return (chunk); @@ -791,10 +744,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk) assert(arena_mapbits_decommitted_get(chunk, map_bias) == arena_mapbits_decommitted_get(chunk, chunk_npages-1)); - /* - * Remove run from the runs_avail tree, so that the arena does not use - * it. - */ + /* Remove run from runs_avail, so that the arena does not use it. */ arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias); if (arena->spare != NULL) { @@ -1124,19 +1074,23 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, /* * Do first-best-fit run selection, i.e. select the lowest run that best fits. - * Run sizes are quantized, so not all candidate runs are necessarily exactly - * the same size. + * Run sizes are indexed, so not all candidate runs are necessarily exactly the + * same size. */ static arena_run_t * arena_run_first_best_fit(arena_t *arena, size_t size) { - size_t search_size = run_quantize_ceil(size); - arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size); - arena_chunk_map_misc_t *miscelm = - arena_avail_tree_nsearch(&arena->runs_avail, key); - if (miscelm == NULL) - return (NULL); - return (&miscelm->run); + szind_t ind, i; + + ind = size2index(run_quantize_ceil(size)); + for (i = ind; i < runs_avail_nclasses; i++) { + arena_chunk_map_misc_t *miscelm = arena_avail_tree_first( + arena_runs_avail_get(arena, i)); + if (miscelm != NULL) + return (&miscelm->run); + } + + return (NULL); } static arena_run_t * @@ -3315,19 +3269,23 @@ arena_t * arena_new(unsigned ind) { arena_t *arena; + size_t arena_size; unsigned i; arena_bin_t *bin; + /* Compute arena size to incorporate sufficient runs_avail elements. */ + arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t) + * (runs_avail_nclasses - 1)); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. */ if (config_stats) { - arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t)) - + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + + arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) + + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) + nhclasses) * sizeof(malloc_huge_stats_t)); } else - arena = (arena_t *)base_alloc(sizeof(arena_t)); + arena = (arena_t *)base_alloc(arena_size); if (arena == NULL) return (NULL); @@ -3339,11 +3297,11 @@ arena_new(unsigned ind) if (config_stats) { memset(&arena->stats, 0, sizeof(arena_stats_t)); arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(sizeof(arena_t))); + + CACHELINE_CEILING(arena_size)); memset(arena->stats.lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena - + CACHELINE_CEILING(sizeof(arena_t)) + + + CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t))); memset(arena->stats.hstats, 0, nhclasses * sizeof(malloc_huge_stats_t)); @@ -3375,7 +3333,8 @@ arena_new(unsigned ind) arena->nactive = 0; arena->ndirty = 0; - arena_avail_tree_new(&arena->runs_avail); + for(i = 0; i < runs_avail_nclasses; i++) + arena_avail_tree_new(&arena->runs_avail[i]); qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); @@ -3635,6 +3594,9 @@ arena_boot(void) if (run_quantize_init()) return (true); + runs_avail_bias = size2index(PAGE); + runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias; + return (false); } From ae45142adc12d39793c45ecac4dafad5674a4591 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 23 Feb 2016 18:27:24 -0800 Subject: [PATCH 56/96] Collapse arena_avail_tree_* into arena_run_tree_*. These tree types converged to become identical, yet they still had independently generated red-black tree implementations. --- include/jemalloc/internal/arena.h | 3 +-- src/arena.c | 18 +++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 2548082b..05800e4f 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -175,7 +175,6 @@ struct arena_chunk_map_misc_s { arena_run_t run; }; }; -typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; #endif /* JEMALLOC_ARENA_STRUCTS_A */ @@ -461,7 +460,7 @@ struct arena_s { * Quantized address-ordered trees of this arena's available runs. The * trees are used for first-best-fit run allocation. */ - arena_avail_tree_t runs_avail[1]; /* Dynamically sized. */ + arena_run_tree_t runs_avail[1]; /* Dynamically sized. */ }; /* Used in conjunction with tsd for fast arena-related context lookup. */ diff --git a/src/arena.c b/src/arena.c index 06422727..ec81336b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -199,11 +199,7 @@ run_quantize_ceil(size_t size) run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl); #endif -/* Generate red-black tree functions. */ -rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_misc_t, rb_link, arena_run_addr_comp) - -static arena_avail_tree_t * +static arena_run_tree_t * arena_runs_avail_get(arena_t *arena, szind_t ind) { @@ -221,7 +217,7 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_insert(arena_runs_avail_get(arena, ind), + arena_run_tree_insert(arena_runs_avail_get(arena, ind), arena_miscelm_get(chunk, pageind)); } @@ -233,7 +229,7 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, arena_miscelm_get(chunk, pageind)))); assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> LG_PAGE)); - arena_avail_tree_remove(arena_runs_avail_get(arena, ind), + arena_run_tree_remove(arena_runs_avail_get(arena, ind), arena_miscelm_get(chunk, pageind)); } @@ -1084,7 +1080,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size) ind = size2index(run_quantize_ceil(size)); for (i = ind; i < runs_avail_nclasses; i++) { - arena_chunk_map_misc_t *miscelm = arena_avail_tree_first( + arena_chunk_map_misc_t *miscelm = arena_run_tree_first( arena_runs_avail_get(arena, i)); if (miscelm != NULL) return (&miscelm->run); @@ -3274,8 +3270,8 @@ arena_new(unsigned ind) arena_bin_t *bin; /* Compute arena size to incorporate sufficient runs_avail elements. */ - arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t) - * (runs_avail_nclasses - 1)); + arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) * + (runs_avail_nclasses - 1)); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. @@ -3334,7 +3330,7 @@ arena_new(unsigned ind) arena->ndirty = 0; for(i = 0; i < runs_avail_nclasses; i++) - arena_avail_tree_new(&arena->runs_avail[i]); + arena_run_tree_new(&arena->runs_avail[i]); qr_new(&arena->runs_dirty, rd_link); qr_new(&arena->chunks_cache, cc_link); From b41a07c31a53cb91729f69b4a23e3a8801ee9846 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Tue, 23 Feb 2016 11:39:02 -0800 Subject: [PATCH 57/96] Fix Windows build issues This resolves #333. --- .../internal/jemalloc_internal_decls.h | 3 +- include/msvc_compat/strings.h | 30 +++++++++++++++++++ test/include/test/jemalloc_test.h.in | 1 - test/include/test/timer.h | 3 -- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h index 0bca63e5..2b8ca5d0 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -18,6 +18,7 @@ # endif # include # include +# include #endif #include @@ -61,6 +62,4 @@ isblank(int c) #endif #include -#include - #endif /* JEMALLOC_INTERNAL_H */ diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h index f01ffdd1..a3ee2506 100644 --- a/include/msvc_compat/strings.h +++ b/include/msvc_compat/strings.h @@ -21,7 +21,37 @@ static __forceinline int ffs(int x) return (ffsl(x)); } +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# endif + +static __forceinline int ffsll(unsigned __int64 x) +{ + unsigned long i; +#ifdef _M_X64 + if (_BitScanForward64(&i, x)) + return (i + 1); + return (0); #else +// Fallback for 32-bit build where 64-bit version not available +// assuming little endian + union { + unsigned __int64 ll; + unsigned long l[2]; + } s; + + s.ll = x; + + if (_BitScanForward(&i, s.l[0])) + return (i + 1); + else if(_BitScanForward(&i, s.l[1])) + return (i + 33); + return (0); +#endif +} + +#else +# define ffsll(x) __builtin_ffsll(x) # define ffsl(x) __builtin_ffsl(x) # define ffs(x) __builtin_ffs(x) #endif diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in index 4aaaf952..0a3dbeac 100644 --- a/test/include/test/jemalloc_test.h.in +++ b/test/include/test/jemalloc_test.h.in @@ -11,7 +11,6 @@ #ifdef _WIN32 # include "msvc_compat/strings.h" #endif -#include #ifdef _WIN32 # include diff --git a/test/include/test/timer.h b/test/include/test/timer.h index 0b27e019..ace6191b 100644 --- a/test/include/test/timer.h +++ b/test/include/test/timer.h @@ -1,8 +1,5 @@ /* Simple timer, for use in benchmark reporting. */ -#include -#include - typedef struct { nstime_t t0; nstime_t t1; From 9f4ee6034c3ac6a8c8b5f9a0d76822fb2fd90c41 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 10:32:45 -0800 Subject: [PATCH 58/96] Refactor jemalloc_ffs*() into ffs_*(). Use appropriate versions to resolve 64-to-32-bit data loss warnings. --- include/jemalloc/internal/arena.h | 2 +- include/jemalloc/internal/bitmap.h | 4 +- .../internal/jemalloc_internal_defs.h.in | 2 +- include/jemalloc/internal/private_symbols.txt | 9 +- include/jemalloc/internal/prng.h | 2 +- include/jemalloc/internal/util.h | 90 ++++++++++++------- src/arena.c | 3 +- src/chunk.c | 2 +- 8 files changed, 72 insertions(+), 42 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 05800e4f..165fb52d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1099,7 +1099,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = jemalloc_ffs(interval) - 1; + shift = ffs_zu(interval) - 1; diff >>= shift; interval >>= shift; diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index fcc6005c..c14e7162 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -176,11 +176,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = jemalloc_ffsl(g) - 1; + bit = ffs_lu(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); } bitmap_set(bitmap, binfo, bit); diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index 4bcda716..2c753719 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -190,7 +190,7 @@ /* * ffs*() functions to use for bitmapping. Don't use these directly; instead, - * use jemalloc_ffs*() from util.h. + * use ffs_*() from util.h. */ #undef JEMALLOC_INTERNAL_FFSLL #undef JEMALLOC_INTERNAL_FFSL diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 761aa754..adab8a5c 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -243,6 +243,12 @@ extent_tree_szad_reverse_iter extent_tree_szad_reverse_iter_recurse extent_tree_szad_reverse_iter_start extent_tree_szad_search +ffs_llu +ffs_lu +ffs_u +ffs_u32 +ffs_u64 +ffs_zu get_errno hash hash_fmix_32 @@ -292,9 +298,6 @@ isqalloc isthreaded ivsalloc ixalloc -jemalloc_ffs -jemalloc_ffs64 -jemalloc_ffsl jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h index 44d67c9a..5830f8b7 100644 --- a/include/jemalloc/internal/prng.h +++ b/include/jemalloc/internal/prng.h @@ -64,7 +64,7 @@ prng_range(uint64_t *state, uint64_t range) assert(range > 1); /* Compute the ceiling of lg(range). */ - lg_range = jemalloc_ffs64(pow2_ceil_u64(range)) - 1; + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; /* Generate a result in [0..range) via repeated trial. */ do { diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 39f70878..46d47df3 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -121,9 +121,12 @@ void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -int jemalloc_ffs64(uint64_t bitmap); -int jemalloc_ffsl(long bitmap); -int jemalloc_ffs(int bitmap); +unsigned ffs_llu(unsigned long long bitmap); +unsigned ffs_lu(unsigned long bitmap); +unsigned ffs_u(unsigned bitmap); +unsigned ffs_zu(size_t bitmap); +unsigned ffs_u64(uint64_t bitmap); +unsigned ffs_u32(uint32_t bitmap); uint64_t pow2_ceil_u64(uint64_t x); uint32_t pow2_ceil_u32(uint32_t x); size_t pow2_ceil_zu(size_t x); @@ -140,31 +143,63 @@ int get_errno(void); # error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure #endif -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffs64(uint64_t bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_llu(unsigned long long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSLL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_lu(unsigned long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u(unsigned bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_zu(size_t bitmap) +{ + +#if LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_INT + return (ffs_u(bitmap)); +#else +#error No implementation for size_t ffs() +#endif +} + +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u64(uint64_t bitmap) { #if LG_SIZEOF_LONG == 3 - return (JEMALLOC_INTERNAL_FFSL(bitmap)); + return (ffs_lu(bitmap)); #elif LG_SIZEOF_LONG_LONG == 3 - return (JEMALLOC_INTERNAL_FFSLL(bitmap)); + return (ffs_llu(bitmap)); #else #error No implementation for 64-bit ffs() #endif } -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffsl(long bitmap) +JEMALLOC_ALWAYS_INLINE unsigned +ffs_u32(uint32_t bitmap) { - return (JEMALLOC_INTERNAL_FFSL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffs(int bitmap) -{ - - return (JEMALLOC_INTERNAL_FFS(bitmap)); +#if LG_SIZEOF_INT == 2 + return (ffs_u(bitmap)); +#else +#error No implementation for 32-bit ffs() +#endif + return (ffs_u(bitmap)); } JEMALLOC_INLINE uint64_t @@ -235,7 +270,7 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == 2) _BitScanReverse(&ret, x); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif return (ret); } @@ -251,7 +286,7 @@ lg_floor(size_t x) #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); #else -# error "Unsupported type sizes for lg_floor()" +# error "Unsupported type size for lg_floor()" #endif } #else @@ -266,20 +301,13 @@ lg_floor(size_t x) x |= (x >> 4); x |= (x >> 8); x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) +#if (LG_SIZEOF_PTR == 3) x |= (x >> 32); - if (x == KZU(0xffffffffffffffff)) - return (63); - x++; - return (jemalloc_ffsl(x) - 2); -#elif (LG_SIZEOF_PTR == 2) - if (x == KZU(0xffffffff)) - return (31); - x++; - return (jemalloc_ffs(x) - 2); -#else -# error "Unsupported type sizes for lg_floor()" #endif + if (x == SIZE_T_MAX) + return ((8 << LG_SIZEOF_PTR) - 1); + x++; + return (ffs_zu(x) - 2); } #endif diff --git a/src/arena.c b/src/arena.c index ec81336b..7b065d60 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3391,8 +3391,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * be twice as large in order to maintain alignment. */ if (config_fill && unlikely(opt_redzone)) { - size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) - - 1); + size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1); if (align_min <= REDZONE_MINSIZE) { bin_info->redzone_size = REDZONE_MINSIZE; pad_size = 0; diff --git a/src/chunk.c b/src/chunk.c index 6ba1ca7a..3d32a404 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -716,7 +716,7 @@ chunk_boot(void) * so pages_map will always take fast path. */ if (!opt_lg_chunk) { - opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity) + opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity) - 1; } #else From 8dd5115edee9e778d3b45d0924530ee49a4e34e6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 11:00:40 -0800 Subject: [PATCH 59/96] Explicitly cast mib[] elements to unsigned where appropriate. --- src/ctl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ctl.c b/src/ctl.c index f003b415..a7f1b675 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -24,7 +24,7 @@ ctl_named_node(const ctl_node_t *node) } JEMALLOC_INLINE_C const ctl_named_node_t * -ctl_named_children(const ctl_named_node_t *node, int index) +ctl_named_children(const ctl_named_node_t *node, size_t index) { const ctl_named_node_t *children = ctl_named_node(node->children); @@ -975,7 +975,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, assert(node->nchildren > 0); if (ctl_named_node(node->children) != NULL) { /* Children are named. */ - if (node->nchildren <= mib[i]) { + if (node->nchildren <= (unsigned)mib[i]) { ret = ENOENT; goto label_return; } @@ -1611,7 +1611,7 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); - arena_i_purge(mib[1], true); + arena_i_purge((unsigned)mib[1], true); ret = 0; label_return: @@ -1626,7 +1626,7 @@ arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, READONLY(); WRITEONLY(); - arena_i_purge(mib[1], false); + arena_i_purge((unsigned)mib[1], false); ret = 0; label_return: @@ -1639,7 +1639,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, { int ret; const char *dss = NULL; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; dss_prec_t dss_prec_old = dss_prec_limit; dss_prec_t dss_prec = dss_prec_limit; @@ -1694,7 +1694,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; arena = arena_get(tsd_fetch(), arena_ind, false, true); @@ -1728,7 +1728,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; arena = arena_get(tsd_fetch(), arena_ind, false, true); @@ -1762,7 +1762,7 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena_ind = mib[1]; + unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; malloc_mutex_lock(&ctl_mtx); @@ -1841,7 +1841,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; + ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; nread = ctl_stats.narenas; From 603b3bd413d670909811ce49d28a8b0a4ae3ba6b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 11:02:14 -0800 Subject: [PATCH 60/96] Make nhbins unsigned rather than size_t. --- include/jemalloc/internal/tcache.h | 2 +- src/tcache.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 09935c36..25eaf142 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -115,7 +115,7 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern unsigned nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; diff --git a/src/tcache.c b/src/tcache.c index 426bb1f7..fb1f057f 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -10,7 +10,7 @@ ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; tcache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ -size_t nhbins; +unsigned nhbins; size_t tcache_maxclass; tcaches_t *tcaches; From 8f683b94a751c65af8f9fa25970ccf2917b96bb8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 11:03:40 -0800 Subject: [PATCH 61/96] Make opt_narenas unsigned rather than size_t. --- doc/jemalloc.xml.in | 2 +- .../jemalloc/internal/jemalloc_internal.h.in | 2 +- src/ctl.c | 2 +- src/jemalloc.c | 20 +++++++++++-------- src/stats.c | 10 ++++++++-- test/unit/mallctl.c | 2 +- 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 0ced0aaa..28b5fb78 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -941,7 +941,7 @@ for (i = 0; i < nbins; i++) { opt.narenas - (size_t) + (unsigned) r- Maximum number of arenas to use for automatic diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 0260b9a8..a61a13a9 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -433,7 +433,7 @@ extern bool opt_redzone; extern bool opt_utrace; extern bool opt_xmalloc; extern bool opt_zero; -extern size_t opt_narenas; +extern unsigned opt_narenas; extern bool in_valgrind; diff --git a/src/ctl.c b/src/ctl.c index a7f1b675..e0044336 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1277,7 +1277,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t) diff --git a/src/jemalloc.c b/src/jemalloc.c index 76b4f154..3d356c30 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -40,7 +40,7 @@ bool opt_redzone = false; bool opt_utrace = false; bool opt_xmalloc = false; bool opt_zero = false; -size_t opt_narenas = 0; +unsigned opt_narenas = 0; /* Initialized to true if the process is running inside Valgrind. */ bool in_valgrind; @@ -1031,7 +1031,7 @@ malloc_conf_init(void) if (cont) \ continue; \ } -#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ +#define CONF_HANDLE_T_U(t, o, n, min, max, clip) \ if (CONF_MATCH(n)) { \ uintmax_t um; \ char *end; \ @@ -1045,11 +1045,11 @@ malloc_conf_init(void) k, klen, v, vlen); \ } else if (clip) { \ if ((min) != 0 && um < (min)) \ - o = (min); \ + o = (t)(min); \ else if (um > (max)) \ - o = (max); \ + o = (t)(max); \ else \ - o = um; \ + o = (t)um; \ } else { \ if (((min) != 0 && um < (min)) \ || um > (max)) { \ @@ -1058,10 +1058,14 @@ malloc_conf_init(void) "conf value", \ k, klen, v, vlen); \ } else \ - o = um; \ + o = (t)um; \ } \ continue; \ } +#define CONF_HANDLE_UNSIGNED(o, n, min, max, clip) \ + CONF_HANDLE_T_U(unsigned, o, n, min, max, clip) +#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \ + CONF_HANDLE_T_U(size_t, o, n, min, max, clip) #define CONF_HANDLE_SSIZE_T(o, n, min, max) \ if (CONF_MATCH(n)) { \ long l; \ @@ -1129,8 +1133,8 @@ malloc_conf_init(void) } continue; } - CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, - SIZE_T_MAX, false) + CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1, + UINT_MAX, false) if (strncmp("purge", k, klen) == 0) { int i; bool match = false; diff --git a/src/stats.c b/src/stats.c index 8d5ed71e..a7249479 100644 --- a/src/stats.c +++ b/src/stats.c @@ -435,9 +435,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, bool bv; unsigned uv; ssize_t ssv; - size_t sv, bsz, ssz, sssz, cpsz; + size_t sv, bsz, usz, ssz, sssz, cpsz; bsz = sizeof(bool); + usz = sizeof(unsigned); ssz = sizeof(size_t); sssz = sizeof(ssize_t); cpsz = sizeof(const char *); @@ -464,6 +465,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, : "false", bv2 ? "true" : "false"); \ } \ } +#define OPT_WRITE_UNSIGNED(n) \ + if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } #define OPT_WRITE_SIZE_T(n) \ if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) { \ malloc_cprintf(write_cb, cbopaque, \ @@ -494,7 +500,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_CHAR_P(dss) - OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_UNSIGNED(narenas) OPT_WRITE_CHAR_P(purge) if (opt_purge == purge_mode_ratio) { OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index e8dc4926..01333514 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -163,7 +163,7 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, abort, always); TEST_MALLCTL_OPT(size_t, lg_chunk, always); TEST_MALLCTL_OPT(const char *, dss, always); - TEST_MALLCTL_OPT(size_t, narenas, always); + TEST_MALLCTL_OPT(unsigned, narenas, always); TEST_MALLCTL_OPT(const char *, purge, always); TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always); TEST_MALLCTL_OPT(ssize_t, decay_time, always); From 0931cecbfaeada8b10fed56ff0175c8ffb9e9233 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 11:04:08 -0800 Subject: [PATCH 62/96] Use ssize_t for readlink() rather than int. --- src/jemalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index 3d356c30..1acea404 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -958,7 +958,7 @@ malloc_conf_init(void) } break; case 2: { - int linklen = 0; + ssize_t linklen = 0; #ifndef _WIN32 int saved_errno = errno; const char *linkname = From 1c42a04cc6d3cc5d92bec55432015785584a4b0d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 11:04:51 -0800 Subject: [PATCH 63/96] Change lg_floor() return type from size_t to unsigned. --- .../jemalloc/internal/jemalloc_internal.h.in | 19 +++++++++---------- include/jemalloc/internal/util.h | 16 +++++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a61a13a9..ffad04ba 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -555,27 +555,27 @@ size2index_compute(size_t size) #if (NTBINS != 0) if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); } #endif { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) : lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; + szind_t grp = shift << LG_SIZE_CLASS_GROUP; - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - size_t index = NTBINS + grp + mod; + szind_t index = NTBINS + grp + mod; return (index); } } @@ -586,8 +586,7 @@ size2index_lookup(size_t size) assert(size <= LOOKUP_MAXCLASS); { - size_t ret = ((size_t)(size2index_tab[(size-1) >> - LG_TINY_MIN])); + szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]); assert(ret == size2index_compute(size)); return (ret); } diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 46d47df3..031f8045 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -130,7 +130,7 @@ unsigned ffs_u32(uint32_t bitmap); uint64_t pow2_ceil_u64(uint64_t x); uint32_t pow2_ceil_u32(uint32_t x); size_t pow2_ceil_zu(size_t x); -size_t lg_floor(size_t x); +unsigned lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif @@ -244,7 +244,7 @@ pow2_ceil_zu(size_t x) } #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { size_t ret; @@ -255,10 +255,11 @@ lg_floor(size_t x) : "=r"(ret) // Outputs. : "r"(x) // Inputs. ); - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(_MSC_VER)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { unsigned long ret; @@ -272,10 +273,11 @@ lg_floor(size_t x) #else # error "Unsupported type size for lg_floor()" #endif - return (ret); + assert(ret < UINT_MAX); + return ((unsigned)ret); } #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { @@ -290,7 +292,7 @@ lg_floor(size_t x) #endif } #else -JEMALLOC_INLINE size_t +JEMALLOC_INLINE unsigned lg_floor(size_t x) { From 9e1810ca9dc4a5f5f0841b9a6c1abb4337753552 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 12:42:23 -0800 Subject: [PATCH 64/96] Silence miscellaneous 64-to-32-bit data loss warnings. --- include/jemalloc/internal/arena.h | 4 ++-- include/jemalloc/internal/bitmap.h | 2 +- include/jemalloc/internal/hash.h | 15 ++++++++++----- include/jemalloc/jemalloc_macros.h.in | 6 +++--- src/arena.c | 21 +++++++++++---------- src/chunk.c | 4 ++-- src/ckh.c | 12 ++++++------ src/ctl.c | 5 +++-- src/jemalloc.c | 2 +- src/tcache.c | 2 +- src/util.c | 3 ++- test/integration/rallocx.c | 10 +++++----- test/unit/hash.c | 4 ++-- 13 files changed, 49 insertions(+), 41 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 165fb52d..59b480b5 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -1053,7 +1053,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) run = &miscelm->run; run_binind = run->binind; bin = &arena->bins[run_binind]; - actual_binind = bin - arena->bins; + actual_binind = (szind_t)(bin - arena->bins); assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; rpages = arena_miscelm_to_rpages(miscelm); @@ -1070,7 +1070,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = bin - arena->bins; + szind_t binind = (szind_t)(bin - arena->bins); assert(binind < NBINS); return (binind); } diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index c14e7162..8452bfed 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -113,7 +113,7 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h index 8b5fb037..864fda81 100644 --- a/include/jemalloc/internal/hash.h +++ b/include/jemalloc/internal/hash.h @@ -337,13 +337,18 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, JEMALLOC_INLINE void hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { + + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, len, seed, (uint64_t *)r_hash); + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); #else - uint64_t hashes[2]; - hash_x86_128(key, len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } #endif } #endif diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index 698caa19..d164edac 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -11,12 +11,12 @@ #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) (la) +# define MALLOCX_LG_ALIGN(la) ((int)(la)) # if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) (ffs(a)-1) +# define MALLOCX_ALIGN(a) ((int)(ffs(a)-1)) # else # define MALLOCX_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) + ((int)((a < (size_t)INT_MAX) ? ffs((int)a)-1 : ffs((int)(a>>32))+31)) # endif # define MALLOCX_ZERO ((int)0x40) /* diff --git a/src/arena.c b/src/arena.c index 7b065d60..987e2064 100644 --- a/src/arena.c +++ b/src/arena.c @@ -308,7 +308,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) assert(run->nfree > 0); assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info)); - regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info); + regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info); miscelm = arena_run_to_miscelm(run); rpages = arena_miscelm_to_rpages(miscelm); ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset + @@ -3411,18 +3411,19 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) * size). */ try_run_size = PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); do { perfect_run_size = try_run_size; perfect_nregs = try_nregs; try_run_size += PAGE; - try_nregs = try_run_size / bin_info->reg_size; + try_nregs = (uint32_t)(try_run_size / bin_info->reg_size); } while (perfect_run_size != perfect_nregs * bin_info->reg_size); assert(perfect_nregs <= RUN_MAXREGS); actual_run_size = perfect_run_size; - actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); /* * Redzones can require enough padding that not even a single region can @@ -3434,8 +3435,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) assert(config_fill && unlikely(opt_redzone)); actual_run_size += PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } /* @@ -3443,8 +3444,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) */ while (actual_run_size > arena_maxrun) { actual_run_size -= PAGE; - actual_nregs = (actual_run_size - pad_size) / - bin_info->reg_interval; + actual_nregs = (uint32_t)((actual_run_size - pad_size) / + bin_info->reg_interval); } assert(actual_nregs > 0); assert(actual_run_size == s2u(actual_run_size)); @@ -3452,8 +3453,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info) /* Copy final settings. */ bin_info->run_size = actual_run_size; bin_info->nregs = actual_nregs; - bin_info->reg0_offset = actual_run_size - (actual_nregs * - bin_info->reg_interval) - pad_size + bin_info->redzone_size; + bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs * + bin_info->reg_interval) - pad_size + bin_info->redzone_size); if (actual_run_size > small_maxrun) small_maxrun = actual_run_size; diff --git a/src/chunk.c b/src/chunk.c index 3d32a404..9de36eb6 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -732,8 +732,8 @@ chunk_boot(void) if (have_dss && chunk_dss_boot()) return (true); - if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) - - opt_lg_chunk, chunks_rtree_node_alloc, NULL)) + if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk), chunks_rtree_node_alloc, NULL)) return (true); return (false); diff --git a/src/ckh.c b/src/ckh.c index 08fc433d..d1cfd234 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, * Cycle through the cells in the bucket, starting at a random position. * The randomness avoids worst-case search overhead as buckets fill up. */ - offset = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); + offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; @@ -141,7 +141,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, * were an item for which both hashes indicated the same * bucket. */ - i = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS); + i = (unsigned)prng_lg_range(&ckh->prng_state, + LG_CKH_BUCKET_CELLS); cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; assert(cell->key != NULL); @@ -247,8 +248,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) { bool ret; ckhc_t *tab, *ttab; - size_t lg_curcells; - unsigned lg_prevbuckets; + unsigned lg_prevbuckets, lg_curcells; #ifdef CKH_COUNT ckh->ngrows++; @@ -302,8 +302,8 @@ static void ckh_shrink(tsd_t *tsd, ckh_t *ckh) { ckhc_t *tab, *ttab; - size_t lg_curcells, usize; - unsigned lg_prevbuckets; + size_t usize; + unsigned lg_prevbuckets, lg_curcells; /* * It is possible (though unlikely, given well behaved hashes) that the diff --git a/src/ctl.c b/src/ctl.c index e0044336..107bacd6 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -1925,7 +1925,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned) -CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t) static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1936,7 +1936,8 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned) -CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t) +CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]), + size_t) static const ctl_named_node_t * arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i) { diff --git a/src/jemalloc.c b/src/jemalloc.c index 1acea404..ced27b88 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1396,7 +1396,7 @@ malloc_init_hard_finish(void) * machinery will fail to allocate memory at far lower limits. */ if (narenas_auto > chunksize / sizeof(arena_t *)) { - narenas_auto = chunksize / sizeof(arena_t *); + narenas_auto = (unsigned)(chunksize / sizeof(arena_t *)); malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } diff --git a/src/tcache.c b/src/tcache.c index fb1f057f..9f10a745 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -461,7 +461,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) elm = tcaches_avail; tcaches_avail = tcaches_avail->next; elm->tcache = tcache; - *r_ind = elm - tcaches; + *r_ind = (unsigned)(elm - tcaches); } else { elm = &tcaches[tcaches_past]; elm->tcache = tcache; diff --git a/src/util.c b/src/util.c index 1373ee15..d519818d 100644 --- a/src/util.c +++ b/src/util.c @@ -581,7 +581,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) str[i] = '\0'; else str[size - 1] = '\0'; - ret = i; + assert(i < INT_MAX); + ret = (int)i; #undef APPEND_C #undef APPEND_S diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index be1b27b7..022e0bf0 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -138,22 +138,22 @@ TEST_END TEST_BEGIN(test_lg_align_and_zero) { void *p, *q; - size_t lg_align, sz; + unsigned lg_align; + size_t sz; #define MAX_LG_ALIGN 25 #define MAX_VALIDATE (ZU(1) << 22) - lg_align = ZU(0); + lg_align = 0; p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(p, "Unexpected mallocx() error"); for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) { q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO); assert_ptr_not_null(q, - "Unexpected rallocx() error for lg_align=%zu", lg_align); + "Unexpected rallocx() error for lg_align=%u", lg_align); assert_ptr_null( (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)), - "%p inadequately aligned for lg_align=%zu", - q, lg_align); + "%p inadequately aligned for lg_align=%u", q, lg_align); sz = sallocx(q, 0); if ((sz << 1) <= MAX_VALIDATE) { assert_false(validate_fill(q, 0, 0, sz), diff --git a/test/unit/hash.c b/test/unit/hash.c index ea73d701..f50ba81b 100644 --- a/test/unit/hash.c +++ b/test/unit/hash.c @@ -35,7 +35,7 @@ typedef enum { hash_variant_x64_128 } hash_variant_t; -static size_t +static int hash_variant_bits(hash_variant_t variant) { @@ -63,7 +63,7 @@ hash_variant_string(hash_variant_t variant) static void hash_variant_verify_key(hash_variant_t variant, uint8_t *key) { - const size_t hashbytes = hash_variant_bits(variant) / 8; + const int hashbytes = hash_variant_bits(variant) / 8; VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256); VARIABLE_ARRAY(uint8_t, final, hashbytes); unsigned i; From b3d0070b1495ddd36893d481c512b5da1ab8acef Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 13:00:40 -0800 Subject: [PATCH 65/96] Compile with -Wshorten-64-to-32. This will prevent accidental creation of potential integer truncation bugs when developing on LP64 systems. --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index da4ee3ac..eb387ed9 100644 --- a/configure.ac +++ b/configure.ac @@ -140,6 +140,7 @@ if test "x$CFLAGS" = "x" ; then fi JE_CFLAGS_APPEND([-Wall]) JE_CFLAGS_APPEND([-Werror=declaration-after-statement]) + JE_CFLAGS_APPEND([-Wshorten-64-to-32]) JE_CFLAGS_APPEND([-pipe]) JE_CFLAGS_APPEND([-g3]) elif test "x$je_cv_msvc" = "xyes" ; then From ca8fffb5c13b6a7c45fd034667a8910c61d09c3b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 13:16:51 -0800 Subject: [PATCH 66/96] Silence miscellaneous 64-to-32-bit data loss warnings. --- src/prof.c | 2 +- src/util.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/prof.c b/src/prof.c index 173da69f..93421abb 100644 --- a/src/prof.c +++ b/src/prof.c @@ -989,7 +989,7 @@ prof_dump_close(bool propagate_err) static bool prof_dump_write(bool propagate_err, const char *s) { - unsigned i, slen, n; + size_t i, slen, n; cassert(config_prof); diff --git a/src/util.c b/src/util.c index d519818d..9aaa8062 100644 --- a/src/util.c +++ b/src/util.c @@ -53,8 +53,12 @@ wrtmessage(void *cbopaque, const char *s) * Use syscall(2) rather than write(2) when possible in order to avoid * the possibility of memory allocation within libc. This is necessary * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * unused result in the widest plausible type to avoid compiler + * warnings. */ - UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); + UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); #else UNUSED int result = write(STDERR_FILENO, s, strlen(s)); #endif From aa63d5d377b4508b83502e923690d1d7b67c8c88 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 14:01:47 -0800 Subject: [PATCH 67/96] Fix ffs_zu() compilation error on MinGW. This regression was caused by 9f4ee6034c3ac6a8c8b5f9a0d76822fb2fd90c41 (Refactor jemalloc_ffs*() into ffs_*().). --- include/jemalloc/internal/util.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h index 031f8045..b8885bfa 100644 --- a/include/jemalloc/internal/util.h +++ b/include/jemalloc/internal/util.h @@ -168,10 +168,12 @@ JEMALLOC_ALWAYS_INLINE unsigned ffs_zu(size_t bitmap) { -#if LG_SIZEOF_PTR == LG_SIZEOF_LONG - return (ffs_lu(bitmap)); -#elif LG_SIZEOF_PTR == LG_SIZEOF_INT +#if LG_SIZEOF_PTR == LG_SIZEOF_INT return (ffs_u(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return (ffs_lu(bitmap)); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return (ffs_llu(bitmap)); #else #error No implementation for size_t ffs() #endif From f591d2611a311e8d100273fccfeb462c92ae9ce7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 14:36:58 -0800 Subject: [PATCH 68/96] Update manual to reflect removal of global huge object tree. This resolves #323. --- doc/jemalloc.xml.in | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 28b5fb78..bbccabd7 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -518,23 +518,18 @@ for (i = 0; i < nbins; i++) { common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache. - Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. - - User objects are broken into three categories according to size: - small, large, and huge. Small and large objects are managed entirely by - arenas; huge objects are additionally aggregated in a single data structure - that is shared by all threads. Huge objects are typically used by - applications infrequently enough that this single data structure is not a - scalability issue. - - Each chunk that is managed by an arena tracks its contents as runs of + Memory is conceptually broken into equal-sized chunks, where the chunk + size is a power of two that is greater than the page size. Chunks are + always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. User objects are + broken into three categories according to size: small, large, and huge. + Multiple small and large objects can reside within a single chunk, whereas + huge objects each have one or more chunks backing them. Each chunk that + contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time. + large object). The combination of chunk alignment and chunk page maps makes + it possible to determine all metadata regarding small and large allocations + in constant time. Small objects are managed in groups by page runs. Each run maintains a bitmap to track which regions are in use. Allocation requests that are no From 5ec703dd33b60924ec39534d3fbc234dfa01b15a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 15:35:24 -0800 Subject: [PATCH 69/96] Document the heap profile format. This resolves #258. --- doc/jemalloc.xml.in | 50 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index bbccabd7..d7b33582 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -1191,7 +1191,8 @@ malloc_conf = "xmalloc:true";]]> the jeprof command, which is based on the pprof that is developed as part of the gperftools - package. + package. See HEAP PROFILE + FORMAT for heap profile format documentation. @@ -2623,6 +2624,53 @@ typedef struct { + + HEAP PROFILE FORMAT + Although the heap profiling functionality was originally designed to + be compatible with the + pprof command that is developed as part of the gperftools + package, the addition of per thread heap profiling functionality + required a different heap profile format. The jeprof + command is derived from pprof, with enhancements to + support the heap profile format described here. + + In the following hypothetical heap profile, [...] + indicates elision for the sake of compactness. The following matches the above heap profile, but most +tokens are replaced with <description> to indicate +descriptions of the corresponding fields. / + : : [: ] + [...] + : : [: ] + [...] + : : [: ] + [...] +@ [...] [...] + : : [: ] + : : [: ] + : : [: ] +[...] + +MAPPED_LIBRARIES: +/maps>]]> + + DEBUGGING MALLOC PROBLEMS When debugging, it is a good idea to configure/build jemalloc with From c7a9a6c86b483d4aebb51bd62d902f4022a7367b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 17:18:44 -0800 Subject: [PATCH 70/96] Attempt mmap-based in-place huge reallocation. Attempt mmap-based in-place huge reallocation by plumbing new_addr into chunk_alloc_mmap(). This can dramatically speed up incremental huge reallocation. This resolves #335. --- include/jemalloc/internal/chunk_mmap.h | 4 ++-- src/chunk.c | 11 ++++------- src/chunk_mmap.c | 10 ++++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h index 7d8014c5..6f2d0ac2 100644 --- a/include/jemalloc/internal/chunk_mmap.h +++ b/include/jemalloc/internal/chunk_mmap.h @@ -9,8 +9,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, - bool *commit); +void *chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/src/chunk.c b/src/chunk.c index 9de36eb6..6a107e1d 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -350,12 +350,9 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != NULL) return (ret); - /* - * mmap. Requesting an address is not implemented for - * chunk_alloc_mmap(), so only call it if (new_addr == NULL). - */ - if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero, - commit)) != NULL) + /* mmap. */ + if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) != + NULL) return (ret); /* "secondary" dss. */ if (have_dss && dss_prec == dss_prec_secondary && (ret = @@ -380,7 +377,7 @@ chunk_alloc_base(size_t size) */ zero = true; commit = true; - ret = chunk_alloc_mmap(size, chunksize, &zero, &commit); + ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit); if (ret == NULL) return (NULL); if (config_valgrind) diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index b9ba7419..56b2ee42 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -32,7 +32,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit) } void * -chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) +chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero, + bool *commit) { void *ret; size_t offset; @@ -53,9 +54,10 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); + ret = pages_map(new_addr, size); + if (ret == NULL || ret == new_addr) + return (ret); + assert(new_addr == NULL); offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { pages_unmap(ret, size); From cd86c1481ad7356a7bbcd14549e938769f474fd6 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 24 Feb 2016 11:02:49 -0800 Subject: [PATCH 71/96] Fix arena_run_first_best_fit Merge of 3417a304ccde61ac1f68b436ec22c03f1d6824ec looks like a small bug: first_best_fit doesn't scan through all the classes, since ind is offset from runs_avail_nclasses by run_avail_bias. --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 987e2064..3b125b05 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1079,7 +1079,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size) szind_t ind, i; ind = size2index(run_quantize_ceil(size)); - for (i = ind; i < runs_avail_nclasses; i++) { + for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) { arena_chunk_map_misc_t *miscelm = arena_run_tree_first( arena_runs_avail_get(arena, i)); if (miscelm != NULL) From 38127291670af8d12a21eb78ba49201f3a5af7d1 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 24 Feb 2016 20:10:02 -0800 Subject: [PATCH 72/96] Fix arena_size computation. Fix arena_size arena_new() computation to incorporate runs_avail_nclasses elements for runs_avail, rather than (runs_avail_nclasses - 1) elements. Since offsetof(arena_t, runs_avail) is used rather than sizeof(arena_t) for the first term of the computation, all of the runs_avail elements must be added into the second term. This bug was introduced (by Jason Evans) while merging pull request #330 as 3417a304ccde61ac1f68b436ec22c03f1d6824ec (Separate arena_avail trees). --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 3b125b05..ad675d13 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3271,7 +3271,7 @@ arena_new(unsigned ind) /* Compute arena size to incorporate sufficient runs_avail elements. */ arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) * - (runs_avail_nclasses - 1)); + runs_avail_nclasses); /* * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly * because there is no way to clean up if base_alloc() OOMs. From 767d85061a6fb88ec977bbcd9b429a43aff391e6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 24 Feb 2016 23:58:10 -0800 Subject: [PATCH 73/96] Refactor arenas array (fixes deadlock). Refactor the arenas array, which contains pointers to all extant arenas, such that it starts out as a sparse array of maximum size, and use double-checked atomics-based reads as the basis for fast and simple arena_get(). Additionally, reduce arenas_lock's role such that it only protects against arena initalization races. These changes remove the possibility for arena lookups to trigger locking, which resolves at least one known (fork-related) deadlock. This resolves #315. --- include/jemalloc/internal/arena.h | 8 +- include/jemalloc/internal/atomic.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 37 +-- include/jemalloc/internal/private_symbols.txt | 7 +- src/arena.c | 21 ++ src/chunk.c | 4 +- src/ctl.c | 43 +-- src/jemalloc.c | 247 +++++++----------- src/tcache.c | 5 +- 9 files changed, 162 insertions(+), 214 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 59b480b5..470eee65 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -290,14 +290,14 @@ struct arena_s { /* * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. + * synchronized via atomic operations. */ unsigned nthreads; /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. + * 1) Thread assignment (modifies nthreads) is synchronized via atomics. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -465,7 +465,6 @@ struct arena_s { /* Used in conjunction with tsd for fast arena-related context lookup. */ struct arena_tdata_s { - arena_t *arena; ticker_t decay_ticker; }; #endif /* JEMALLOC_ARENA_STRUCTS_B */ @@ -578,6 +577,9 @@ void arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); +unsigned arena_nthreads_get(arena_t *arena); +void arena_nthreads_inc(arena_t *arena); +void arena_nthreads_dec(arena_t *arena); arena_t *arena_new(unsigned ind); bool arena_boot(void); void arena_prefork(arena_t *arena); diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index a9aad35d..3f15ea14 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -28,8 +28,8 @@ * callers. * * atomic_read_( *p) { return (*p); } - * atomic_add_( *p, x) { return (*p + x); } - * atomic_sub_( *p, x) { return (*p - x); } + * atomic_add_( *p, x) { return (*p += x); } + * atomic_sub_( *p, x) { return (*p -= x); } * bool atomic_cas_( *p, c, s) * { * if (*p != c) diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index ffad04ba..611ed36a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -438,7 +438,13 @@ extern unsigned opt_narenas; extern bool in_valgrind; /* Number of CPUs. */ -extern unsigned ncpus; +extern unsigned ncpus; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; /* * index2size_tab encodes the same information as could be computed (at @@ -452,21 +458,17 @@ extern size_t const index2size_tab[NSIZES+1]; */ extern uint8_t const size2index_tab[]; -arena_t *a0get(void); void *a0malloc(size_t size); void a0dalloc(void *ptr); void *bootstrap_malloc(size_t size); void *bootstrap_calloc(size_t num, size_t size); void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); -arena_t *arena_init(unsigned ind); unsigned narenas_total_get(void); +arena_t *arena_init(unsigned ind); arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); -arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing, - arena_tdata_t *tdata); arena_t *arena_choose_hard(tsd_t *tsd); void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); -unsigned arena_nbound(unsigned ind); void thread_allocated_cleanup(tsd_t *tsd); void thread_deallocated_cleanup(tsd_t *tsd); void arena_cleanup(tsd_t *tsd); @@ -543,8 +545,7 @@ size_t sa2u(size_t size, size_t alignment); arena_t *arena_choose(tsd_t *tsd, arena_t *arena); arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing); -arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing); +arena_t *arena_get(unsigned ind, bool init_if_missing); ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind); #endif @@ -819,19 +820,19 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) } JEMALLOC_INLINE arena_t * -arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing) +arena_get(unsigned ind, bool init_if_missing) { - arena_tdata_t *tdata; + arena_t *ret; - /* init_if_missing requires refresh_if_missing. */ - assert(!init_if_missing || refresh_if_missing); + assert(ind <= MALLOCX_ARENA_MAX); - tdata = arena_tdata_get(tsd, ind, refresh_if_missing); - if (unlikely(tdata == NULL || tdata->arena == NULL)) - return (arena_get_hard(tsd, ind, init_if_missing, tdata)); - - return (tdata->arena); + ret = arenas[ind]; + if (unlikely(ret == NULL)) { + ret = atomic_read_p((void *)&arenas[ind]); + if (init_if_missing && unlikely(ret == NULL)) + ret = arena_init(ind); + } + return (ret); } JEMALLOC_INLINE ticker_t * diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index adab8a5c..d716b82d 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -1,5 +1,4 @@ a0dalloc -a0get a0malloc arena_aalloc arena_alloc_junk_small @@ -34,7 +33,6 @@ arena_decay_ticks arena_dss_prec_get arena_dss_prec_set arena_get -arena_get_hard arena_init arena_lg_dirty_mult_default_get arena_lg_dirty_mult_default_set @@ -73,10 +71,12 @@ arena_migrate arena_miscelm_get arena_miscelm_to_pageind arena_miscelm_to_rpages -arena_nbound arena_new arena_node_alloc arena_node_dalloc +arena_nthreads_dec +arena_nthreads_get +arena_nthreads_inc arena_palloc arena_postfork_child arena_postfork_parent @@ -106,6 +106,7 @@ arena_stats_merge arena_tcache_fill_small arena_tdata_get arena_tdata_get_hard +arenas atomic_add_p atomic_add_u atomic_add_uint32 diff --git a/src/arena.c b/src/arena.c index ad675d13..3f394681 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3261,6 +3261,27 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, } } +unsigned +arena_nthreads_get(arena_t *arena) +{ + + return (atomic_read_u(&arena->nthreads)); +} + +void +arena_nthreads_inc(arena_t *arena) +{ + + atomic_add_u(&arena->nthreads, 1); +} + +void +arena_nthreads_dec(arena_t *arena) +{ + + atomic_sub_u(&arena->nthreads, 1); +} + arena_t * arena_new(unsigned ind) { diff --git a/src/chunk.c b/src/chunk.c index 6a107e1d..26622ced 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -415,9 +415,7 @@ chunk_arena_get(unsigned arena_ind) { arena_t *arena; - /* Dodge tsd for a0 in order to avoid bootstrapping issues. */ - arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind, - false, true); + arena = arena_get(arena_ind, false); /* * The arena we're allocating on behalf of must have been initialized * already. diff --git a/src/ctl.c b/src/ctl.c index 107bacd6..dbf57c36 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -694,9 +694,7 @@ ctl_grow(void) static void ctl_refresh(void) { - tsd_t *tsd; unsigned i; - bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); /* @@ -706,19 +704,14 @@ ctl_refresh(void) ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); - tsd = tsd_fetch(); - for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, true); - refreshed = true; - } - } + for (i = 0; i < ctl_stats.narenas; i++) + tarenas[i] = arena_get(i, false); for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arena_nbound(i); - else + if (tarenas[i] != NULL) { + ctl_stats.arenas[i].nthreads = + arena_nthreads_get(arena_get(i, false)); + } else ctl_stats.arenas[i].nthreads = 0; } @@ -1332,7 +1325,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Initialize arena if necessary. */ - newarena = arena_get(tsd, newind, true, true); + newarena = arena_get(newind, true); if (newarena == NULL) { ret = EAGAIN; goto label_return; @@ -1560,22 +1553,14 @@ arena_i_purge(unsigned arena_ind, bool all) malloc_mutex_lock(&ctl_mtx); { - tsd_t *tsd = tsd_fetch(); unsigned narenas = ctl_stats.narenas; if (arena_ind == narenas) { unsigned i; - bool refreshed; VARIABLE_ARRAY(arena_t *, tarenas, narenas); - for (i = 0, refreshed = false; i < narenas; i++) { - tarenas[i] = arena_get(tsd, i, false, false); - if (tarenas[i] == NULL && !refreshed) { - tarenas[i] = arena_get(tsd, i, false, - true); - refreshed = true; - } - } + for (i = 0; i < narenas; i++) + tarenas[i] = arena_get(i, false); /* * No further need to hold ctl_mtx, since narenas and @@ -1592,7 +1577,7 @@ arena_i_purge(unsigned arena_ind, bool all) assert(arena_ind < narenas); - tarena = arena_get(tsd, arena_ind, false, true); + tarena = arena_get(arena_ind, false); /* No further need to hold ctl_mtx. */ malloc_mutex_unlock(&ctl_mtx); @@ -1664,7 +1649,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } if (arena_ind < ctl_stats.narenas) { - arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena_t *arena = arena_get(arena_ind, false); if (arena == NULL || (dss_prec != dss_prec_limit && arena_dss_prec_set(arena, dss_prec))) { ret = EFAULT; @@ -1697,7 +1682,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp, unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena = arena_get(arena_ind, false); if (arena == NULL) { ret = EFAULT; goto label_return; @@ -1731,7 +1716,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp, unsigned arena_ind = (unsigned)mib[1]; arena_t *arena; - arena = arena_get(tsd_fetch(), arena_ind, false, true); + arena = arena_get(arena_ind, false); if (arena == NULL) { ret = EFAULT; goto label_return; @@ -1767,7 +1752,7 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(&ctl_mtx); if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) { + arena_get(arena_ind, false)) != NULL) { if (newp != NULL) { chunk_hooks_t old_chunk_hooks, new_chunk_hooks; WRITE(new_chunk_hooks, chunk_hooks_t); diff --git a/src/jemalloc.c b/src/jemalloc.c index ced27b88..86032a40 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -47,7 +47,7 @@ bool in_valgrind; unsigned ncpus; -/* Protects arenas initialization (arenas, narenas_total). */ +/* Protects arenas initialization. */ static malloc_mutex_t arenas_lock; /* * Arenas that are used to service external requests. Not all elements of the @@ -57,8 +57,8 @@ static malloc_mutex_t arenas_lock; * arenas. arenas[narenas_auto..narenas_total) are only used if the application * takes some action to create them and allocate from them. */ -static arena_t **arenas; -static unsigned narenas_total; +arena_t **arenas; +static unsigned narenas_total; /* Use narenas_total_*(). */ static arena_t *a0; /* arenas[0]; read-only after initialization. */ static unsigned narenas_auto; /* Read-only after initialization. */ @@ -311,14 +311,6 @@ malloc_init(void) * cannot tolerate TLS variable access. */ -arena_t * -a0get(void) -{ - - assert(a0 != NULL); - return (a0); -} - static void * a0ialloc(size_t size, bool zero, bool is_metadata) { @@ -327,7 +319,7 @@ a0ialloc(size_t size, bool zero, bool is_metadata) return (NULL); return (iallocztm(NULL, size, size2index(size), zero, false, - is_metadata, a0get(), true)); + is_metadata, arena_get(0, false), true)); } static void @@ -391,47 +383,59 @@ bootstrap_free(void *ptr) a0idalloc(ptr, false); } +static void +arena_set(unsigned ind, arena_t *arena) +{ + + atomic_write_p((void **)&arenas[ind], arena); +} + +static void +narenas_total_set(unsigned narenas) +{ + + atomic_write_u(&narenas_total, narenas); +} + +static void +narenas_total_inc(void) +{ + + atomic_add_u(&narenas_total, 1); +} + +unsigned +narenas_total_get(void) +{ + + return (atomic_read_u(&narenas_total)); +} + /* Create a new arena and insert it into the arenas array at index ind. */ static arena_t * arena_init_locked(unsigned ind) { arena_t *arena; - /* Expand arenas if necessary. */ - assert(ind <= narenas_total); + assert(ind <= narenas_total_get()); if (ind > MALLOCX_ARENA_MAX) return (NULL); - if (ind == narenas_total) { - unsigned narenas_new = narenas_total + 1; - arena_t **arenas_new = - (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new * - sizeof(arena_t *))); - if (arenas_new == NULL) - return (NULL); - memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *)); - arenas_new[ind] = NULL; - /* - * Deallocate only if arenas came from a0malloc() (not - * base_alloc()). - */ - if (narenas_total != narenas_auto) - a0dalloc(arenas); - arenas = arenas_new; - narenas_total = narenas_new; - } + if (ind == narenas_total_get()) + narenas_total_inc(); /* * Another thread may have already initialized arenas[ind] if it's an * auto arena. */ - arena = arenas[ind]; + arena = arena_get(ind, false); if (arena != NULL) { assert(ind < narenas_auto); return (arena); } /* Actually initialize the arena. */ - arena = arenas[ind] = arena_new(ind); + arena = arena_new(ind); + arena_set(ind, arena); return (arena); } @@ -446,37 +450,16 @@ arena_init(unsigned ind) return (arena); } -unsigned -narenas_total_get(void) -{ - unsigned narenas; - - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); - - return (narenas); -} - -static void -arena_bind_locked(tsd_t *tsd, unsigned ind) -{ - arena_t *arena; - - arena = arenas[ind]; - arena->nthreads++; - - if (tsd_nominal(tsd)) - tsd_arena_set(tsd, arena); -} - static void arena_bind(tsd_t *tsd, unsigned ind) { + arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena_bind_locked(tsd, ind); - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(ind, false); + arena_nthreads_inc(arena); + + if (tsd_nominal(tsd)) + tsd_arena_set(tsd, arena); } void @@ -484,35 +467,20 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) { arena_t *oldarena, *newarena; - malloc_mutex_lock(&arenas_lock); - oldarena = arenas[oldind]; - newarena = arenas[newind]; - oldarena->nthreads--; - newarena->nthreads++; - malloc_mutex_unlock(&arenas_lock); + oldarena = arena_get(oldind, false); + newarena = arena_get(newind, false); + arena_nthreads_dec(oldarena); + arena_nthreads_inc(newarena); tsd_arena_set(tsd, newarena); } -unsigned -arena_nbound(unsigned ind) -{ - unsigned nthreads; - - malloc_mutex_lock(&arenas_lock); - nthreads = arenas[ind]->nthreads; - malloc_mutex_unlock(&arenas_lock); - return (nthreads); -} - static void arena_unbind(tsd_t *tsd, unsigned ind) { arena_t *arena; - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); + arena = arena_get(ind, false); + arena_nthreads_dec(arena); tsd_arena_set(tsd, NULL); } @@ -568,14 +536,6 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind) * the arenas.extend mallctl, which we trust mallctl synchronization to * prevent. */ - malloc_mutex_lock(&arenas_lock); - for (i = 0; i < narenas_actual; i++) - arenas_tdata[i].arena = arenas[i]; - malloc_mutex_unlock(&arenas_lock); - if (narenas_tdata > narenas_actual) { - memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) - * (narenas_tdata - narenas_actual)); - } /* Copy/initialize tickers. */ for (i = 0; i < narenas_actual; i++) { @@ -587,6 +547,10 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind) DECAY_NTICKS_PER_UPDATE); } } + if (narenas_tdata > narenas_actual) { + memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t) + * (narenas_tdata - narenas_actual)); + } /* Read the refreshed tdata array. */ tdata = &arenas_tdata[ind]; @@ -596,33 +560,6 @@ label_return: return (tdata); } -arena_t * -arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing, - arena_tdata_t *tdata) -{ - arena_t *arena; - unsigned narenas_actual; - - if (init_if_missing && tdata != NULL) { - tdata->arena = arena_init(ind); - if (tdata->arena != NULL) - return (tdata->arena); - } - - /* - * This function must always tell the truth, even if it's slow, so don't - * let OOM, thread cleanup (note tsd_nominal check), nor recursive - * allocation avoidance (note arenas_tdata_bypass check) get in the way. - */ - narenas_actual = narenas_total_get(); - if (ind >= narenas_actual) - return (NULL); - malloc_mutex_lock(&arenas_lock); - arena = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - return (arena); -} - /* Slow path, called only by arena_choose(). */ arena_t * arena_choose_hard(tsd_t *tsd) @@ -635,15 +572,16 @@ arena_choose_hard(tsd_t *tsd) choose = 0; first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); - assert(a0get() != NULL); + assert(arena_get(0, false) != NULL); for (i = 1; i < narenas_auto; i++) { - if (arenas[i] != NULL) { + if (arena_get(i, false) != NULL) { /* * Choose the first arena that has the lowest * number of threads assigned to it. */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) + if (arena_nthreads_get(arena_get(i, false)) < + arena_nthreads_get(arena_get(choose, + false))) choose = i; } else if (first_null == narenas_auto) { /* @@ -659,13 +597,13 @@ arena_choose_hard(tsd_t *tsd) } } - if (arenas[choose]->nthreads == 0 + if (arena_nthreads_get(arena_get(choose, false)) == 0 || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. */ - ret = arenas[choose]; + ret = arena_get(choose, false); } else { /* Initialize a new arena. */ choose = first_null; @@ -675,10 +613,10 @@ arena_choose_hard(tsd_t *tsd) return (NULL); } } - arena_bind_locked(tsd, choose); + arena_bind(tsd, choose); malloc_mutex_unlock(&arenas_lock); } else { - ret = a0get(); + ret = arena_get(0, false); arena_bind(tsd, 0); } @@ -750,7 +688,7 @@ stats_print_atexit(void) * continue to allocate. */ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { - arena_t *arena = arenas[i]; + arena_t *arena = arena_get(i, false); if (arena != NULL) { tcache_t *tcache; @@ -1309,7 +1247,8 @@ malloc_init_hard_a0_locked(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas_total = narenas_auto = 1; + narenas_auto = 1; + narenas_total_set(narenas_auto); arenas = &a0; memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* @@ -1391,28 +1330,22 @@ malloc_init_hard_finish(void) } narenas_auto = opt_narenas; /* - * Make sure that the arenas array can be allocated. In practice, this - * limit is enough to allow the allocator to function, but the ctl - * machinery will fail to allocate memory at far lower limits. + * Limit the number of arenas to the indexing range of MALLOCX_ARENA(). */ - if (narenas_auto > chunksize / sizeof(arena_t *)) { - narenas_auto = (unsigned)(chunksize / sizeof(arena_t *)); + if (narenas_auto > MALLOCX_ARENA_MAX) { + narenas_auto = MALLOCX_ARENA_MAX; malloc_printf(": Reducing narenas to limit (%d)\n", narenas_auto); } - narenas_total = narenas_auto; + narenas_total_set(narenas_auto); /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * + (MALLOCX_ARENA_MAX+1)); if (arenas == NULL) return (true); - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = a0; + arena_set(0, a0); malloc_init_state = malloc_init_initialized; malloc_slow_flag_init(); @@ -2084,7 +2017,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *tcache = tcache_get(tsd, true); if ((flags & MALLOCX_ARENA_MASK) != 0) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - *arena = arena_get(tsd, arena_ind, true, true); + *arena = arena_get(arena_ind, true); if (unlikely(*arena == NULL)) return (true); } else @@ -2325,7 +2258,7 @@ je_rallocx(void *ptr, size_t size, int flags) if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) { unsigned arena_ind = MALLOCX_ARENA_GET(flags); - arena = arena_get(tsd, arena_ind, true, true); + arena = arena_get(arena_ind, true); if (unlikely(arena == NULL)) goto label_oom; } else @@ -2677,7 +2610,7 @@ JEMALLOC_EXPORT void _malloc_prefork(void) #endif { - unsigned i; + unsigned i, narenas; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2689,9 +2622,11 @@ _malloc_prefork(void) ctl_prefork(); prof_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_prefork(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_prefork(arena); } chunk_prefork(); base_prefork(); @@ -2705,7 +2640,7 @@ JEMALLOC_EXPORT void _malloc_postfork(void) #endif { - unsigned i; + unsigned i, narenas; #ifdef JEMALLOC_MUTEX_INIT_CB if (!malloc_initialized()) @@ -2716,9 +2651,11 @@ _malloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ base_postfork_parent(); chunk_postfork_parent(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_parent(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_postfork_parent(arena); } malloc_mutex_postfork_parent(&arenas_lock); prof_postfork_parent(); @@ -2728,16 +2665,18 @@ _malloc_postfork(void) void jemalloc_postfork_child(void) { - unsigned i; + unsigned i, narenas; assert(malloc_initialized()); /* Release all mutexes, now that fork() has completed. */ base_postfork_child(); chunk_postfork_child(); - for (i = 0; i < narenas_total; i++) { - if (arenas[i] != NULL) - arena_postfork_child(arenas[i]); + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { + arena_t *arena; + + if ((arena = arena_get(i, false)) != NULL) + arena_postfork_child(arena); } malloc_mutex_postfork_child(&arenas_lock); prof_postfork_child(); diff --git a/src/tcache.c b/src/tcache.c index 9f10a745..6e32f404 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -325,7 +325,8 @@ tcache_create(tsd_t *tsd, arena_t *arena) /* Avoid false cacheline sharing. */ size = sa2u(size, CACHELINE); - tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get()); + tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, + arena_get(0, false)); if (tcache == NULL) return (NULL); @@ -453,7 +454,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) return (true); - tcache = tcache_create(tsd, a0get()); + tcache = tcache_create(tsd, arena_get(0, false)); if (tcache == NULL) return (true); From 0c516a00c4cb28cff55ce0995f756b5aae074c9e Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Feb 2016 15:29:49 -0800 Subject: [PATCH 74/96] Make *allocx() size class overflow behavior defined. Limit supported size and alignment to HUGE_MAXCLASS, which in turn is now limited to be less than PTRDIFF_MAX. This resolves #278 and #295. --- doc/jemalloc.xml.in | 14 ++-- include/jemalloc/internal/arena.h | 3 +- include/jemalloc/internal/huge.h | 4 +- .../jemalloc/internal/jemalloc_internal.h.in | 15 ++-- include/jemalloc/internal/size_classes.sh | 4 +- include/jemalloc/internal/tcache.h | 10 +-- include/jemalloc/jemalloc_macros.h.in | 3 +- src/arena.c | 36 ++++---- src/ckh.c | 6 +- src/huge.c | 34 ++++---- src/jemalloc.c | 68 +++++++++------ test/integration/mallocx.c | 30 +++++++ test/integration/rallocx.c | 84 ++++++++++++++++++- test/unit/size_classes.c | 25 +++++- 14 files changed, 247 insertions(+), 89 deletions(-) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index d7b33582..bc5dbd1d 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -310,16 +310,14 @@ The mallocx function allocates at least size bytes of memory, and returns a pointer to the base address of the allocation. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. The rallocx function resizes the allocation at ptr to be at least size bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location. Behavior is undefined if - size is 0, or if request size - overflows due to size class and/or alignment constraints. + size is 0. The xallocx function resizes the allocation at ptr in place to be at least @@ -354,10 +352,10 @@ memory, but it performs the same size computation as the mallocx function, and returns the real size of the allocation that would result from the equivalent - mallocx function call. Behavior is - undefined if size is 0, or if - request size overflows due to size class and/or alignment - constraints. + mallocx function call, or + 0 if the inputs exceed the maximum supported size + class and/or alignment. Behavior is undefined if + size is 0. The mallctl function provides a general interface for introspecting the memory allocator, as well as diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 470eee65..891b9d79 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -536,8 +536,7 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); #endif void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, - szind_t ind, bool zero); +void *arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero); void *arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero, tcache_t *tcache); void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h index 68d3789f..cb6f69e6 100644 --- a/include/jemalloc/internal/huge.h +++ b/include/jemalloc/internal/huge.h @@ -9,9 +9,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, tcache_t *tcache); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache); bool huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero); diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 611ed36a..3f54391f 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -642,7 +642,7 @@ JEMALLOC_ALWAYS_INLINE size_t index2size(szind_t index) { - assert(index <= NSIZES); + assert(index < NSIZES); return (index2size_lookup(index)); } @@ -745,17 +745,16 @@ sa2u(size_t size, size_t alignment) return (usize); } - /* Huge size class. Beware of size_t overflow. */ + /* Huge size class. Beware of overflow. */ + + if (unlikely(alignment > HUGE_MAXCLASS)) + return (0); /* * We can't achieve subchunk alignment, so round up alignment to the * minimum that can actually be supported. */ alignment = CHUNK_CEILING(alignment); - if (alignment == 0) { - /* size_t overflow. */ - return (0); - } /* Make sure result is a huge size class. */ if (size <= chunksize) @@ -1106,7 +1105,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t usize, copysize; usize = sa2u(size + extra, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { @@ -1114,7 +1113,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, return (NULL); /* Try again, without extra this time. */ usize = sa2u(size, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh index fc82036d..2b0ca29a 100755 --- a/include/jemalloc/internal/size_classes.sh +++ b/include/jemalloc/internal/size_classes.sh @@ -142,10 +142,10 @@ size_classes() { # All remaining groups. lg_grp=$((${lg_grp} + ${lg_g})) - while [ ${lg_grp} -lt ${ptr_bits} ] ; do + while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do sep_line ndelta=1 - if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then ndelta_limit=$((${g} - 1)) else ndelta_limit=${g} diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 25eaf142..8357820b 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -344,7 +344,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, void *ret; tcache_bin_t *tbin; bool tcache_success; - size_t usize JEMALLOC_CC_SILENCE_INIT(0); assert(binind < nhbins); tbin = &tcache->tbins[binind]; @@ -359,14 +358,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, if (unlikely(arena == NULL)) return (NULL); - usize = index2size(binind); - assert(usize <= tcache_maxclass); - ret = arena_malloc_large(tsd, arena, usize, binind, zero); + ret = arena_malloc_large(tsd, arena, binind, zero); if (ret == NULL) return (NULL); } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + /* Only compute usize on demand */ - if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { usize = index2size(binind); assert(usize <= tcache_maxclass); } diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in index d164edac..9f356f98 100644 --- a/include/jemalloc/jemalloc_macros.h.in +++ b/include/jemalloc/jemalloc_macros.h.in @@ -16,7 +16,8 @@ # define MALLOCX_ALIGN(a) ((int)(ffs(a)-1)) # else # define MALLOCX_ALIGN(a) \ - ((int)((a < (size_t)INT_MAX) ? ffs((int)a)-1 : ffs((int)(a>>32))+31)) + ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)((a)>>32))+31)) # endif # define MALLOCX_ZERO ((int)0x40) /* diff --git a/src/arena.c b/src/arena.c index 3f394681..1ceb59fd 100644 --- a/src/arena.c +++ b/src/arena.c @@ -2364,16 +2364,16 @@ arena_quarantine_junk_small(void *ptr, size_t usize) } static void * -arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, - bool zero) +arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero) { void *ret; arena_bin_t *bin; + size_t usize; arena_run_t *run; assert(binind < NBINS); bin = &arena->bins[binind]; - size = index2size(binind); + usize = index2size(binind); malloc_mutex_lock(&bin->lock); if ((run = bin->runcur) != NULL && run->nfree > 0) @@ -2392,7 +2392,7 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, bin->stats.curregs++; } malloc_mutex_unlock(&bin->lock); - if (config_prof && !isthreaded && arena_prof_accum(arena, size)) + if (config_prof && !isthreaded && arena_prof_accum(arena, usize)) prof_idump(); if (!zero) { @@ -2401,16 +2401,16 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, arena_alloc_junk_small(ret, &arena_bin_info[binind], false); } else if (unlikely(opt_zero)) - memset(ret, 0, size); + memset(ret, 0, usize); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); } else { if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize); + memset(ret, 0, usize); } arena_decay_tick(tsd, arena); @@ -2418,8 +2418,7 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, } void * -arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind, - bool zero) +arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero) { void *ret; size_t usize; @@ -2490,10 +2489,10 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, return (NULL); if (likely(size <= SMALL_MAXCLASS)) - return (arena_malloc_small(tsd, arena, size, ind, zero)); + return (arena_malloc_small(tsd, arena, ind, zero)); if (likely(size <= large_maxclass)) - return (arena_malloc_large(tsd, arena, size, ind, zero)); - return (huge_malloc(tsd, arena, size, zero, tcache)); + return (arena_malloc_large(tsd, arena, ind, zero)); + return (huge_malloc(tsd, arena, index2size(ind), zero, tcache)); } /* Only handles large allocations that require more than page alignment. */ @@ -3047,6 +3046,13 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, { size_t usize_min, usize_max; + /* Calls with non-zero extra had to clamp extra. */ + assert(extra == 0 || size + extra <= HUGE_MAXCLASS); + + /* Prevent exceeding PTRDIFF_MAX. */ + if (unlikely(size > HUGE_MAXCLASS)) + return (true); + usize_min = s2u(size); usize_max = s2u(size + extra); if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) { @@ -3089,7 +3095,7 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize, return (arena_malloc(tsd, arena, usize, size2index(usize), zero, tcache, true)); usize = sa2u(usize, alignment); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return (NULL); return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); } @@ -3102,7 +3108,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t usize; usize = s2u(size); - if (usize == 0) + if (unlikely(usize == 0 || size > HUGE_MAXCLASS)) return (NULL); if (likely(usize <= large_maxclass)) { diff --git a/src/ckh.c b/src/ckh.c index d1cfd234..3b423aa2 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -266,7 +266,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) lg_curcells++; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } @@ -312,7 +312,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); - if (usize == 0) + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) return; tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true, NULL); @@ -387,7 +387,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh->keycomp = keycomp; usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); - if (usize == 0) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { ret = true; goto label_return; } diff --git a/src/huge.c b/src/huge.c index 9f880484..5f7ceaf1 100644 --- a/src/huge.c +++ b/src/huge.c @@ -31,35 +31,30 @@ huge_node_unset(const void *ptr, const extent_node_t *node) } void * -huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, +huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero, tcache_t *tcache) { - size_t usize; - usize = s2u(size); - if (usize == 0) { - /* size_t overflow. */ - return (NULL); - } + assert(usize == s2u(usize)); return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache)); } void * -huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, +huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - size_t usize; + size_t ausize; extent_node_t *node; bool is_zeroed; /* Allocate one or more contiguous chunks for this request. */ - usize = sa2u(size, alignment); - if (unlikely(usize == 0)) + ausize = sa2u(usize, alignment); + if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS)) return (NULL); - assert(usize >= chunksize); + assert(ausize >= chunksize); /* Allocate an extent node with which to track the chunk. */ node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)), @@ -74,15 +69,15 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, is_zeroed = zero; arena = arena_choose(tsd, arena); if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena, - size, alignment, &is_zeroed)) == NULL) { + usize, alignment, &is_zeroed)) == NULL) { idalloctm(tsd, node, tcache, true, true); return (NULL); } - extent_node_init(node, arena, ret, size, is_zeroed, true); + extent_node_init(node, arena, ret, usize, is_zeroed, true); if (huge_node_set(ret, node)) { - arena_chunk_dalloc_huge(arena, ret, size); + arena_chunk_dalloc_huge(arena, ret, usize); idalloctm(tsd, node, tcache, true, true); return (NULL); } @@ -95,9 +90,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, if (zero || (config_fill && unlikely(opt_zero))) { if (!is_zeroed) - memset(ret, 0, size); + memset(ret, 0, usize); } else if (config_fill && unlikely(opt_junk_alloc)) - memset(ret, 0xa5, size); + memset(ret, 0xa5, usize); arena_decay_tick(tsd, arena); return (ret); @@ -286,6 +281,8 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min, { assert(s2u(oldsize) == oldsize); + /* The following should have been caught by callers. */ + assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS); /* Both allocations must be huge to avoid a move. */ if (oldsize < chunksize || usize_max < chunksize) @@ -346,6 +343,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize, void *ret; size_t copysize; + /* The following should have been caught by callers. */ + assert(usize > 0 && usize <= HUGE_MAXCLASS); + /* Try to avoid moving the allocation. */ if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero)) return (ptr); diff --git a/src/jemalloc.c b/src/jemalloc.c index 86032a40..d9197e00 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1449,18 +1449,17 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path) return (NULL); *tsd = tsd_fetch(); ind = size2index(size); + if (unlikely(ind >= NSIZES)) + return (NULL); - if (config_stats || - (config_prof && opt_prof) || - (slow_path && config_valgrind && unlikely(in_valgrind))) { + if (config_stats || (config_prof && opt_prof) || (slow_path && + config_valgrind && unlikely(in_valgrind))) { *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); } - if (config_prof && opt_prof) { - if (unlikely(*usize == 0)) - return (NULL); + if (config_prof && opt_prof) return (imalloc_prof(*tsd, *usize, ind, slow_path)); - } return (imalloc(*tsd, size, ind, slow_path)); } @@ -1584,7 +1583,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment) } usize = sa2u(size, alignment); - if (unlikely(usize == 0)) { + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) { result = NULL; goto label_oom; } @@ -1722,12 +1721,12 @@ je_calloc(size_t num, size_t size) } ind = size2index(num_size); + if (unlikely(ind >= NSIZES)) { + ret = NULL; + goto label_return; + } if (config_prof && opt_prof) { usize = index2size(ind); - if (unlikely(usize == 0)) { - ret = NULL; - goto label_return; - } ret = icalloc_prof(tsd, usize, ind); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -1874,8 +1873,8 @@ je_realloc(void *ptr, size_t size) if (config_prof && opt_prof) { usize = s2u(size); - ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd, - ptr, old_usize, usize); + ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ? + NULL : irealloc_prof(tsd, ptr, old_usize, usize); } else { if (config_stats || (config_valgrind && unlikely(in_valgrind))) @@ -2006,7 +2005,8 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize, *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags); *usize = sa2u(size, *alignment); } - assert(*usize != 0); + if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) + return (true); *zero = MALLOCX_ZERO_GET(flags); if ((flags & MALLOCX_TCACHE_MASK) != 0) { if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) @@ -2032,7 +2032,6 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, if (likely(flags == 0)) { *usize = s2u(size); - assert(*usize != 0); *alignment = 0; *zero = false; *tcache = tcache_get(tsd, true); @@ -2051,6 +2050,8 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, szind_t ind; ind = size2index(usize); + if (unlikely(ind >= NSIZES)) + return (NULL); if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); if (unlikely(zero)) @@ -2120,8 +2121,13 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize) if (likely(flags == 0)) { szind_t ind = size2index(size); - if (config_stats || (config_valgrind && unlikely(in_valgrind))) + if (unlikely(ind >= NSIZES)) + return (NULL); + if (config_stats || (config_valgrind && + unlikely(in_valgrind))) { *usize = index2size(ind); + assert(*usize > 0 && *usize <= HUGE_MAXCLASS); + } return (imalloc(tsd, size, ind, true)); } @@ -2278,7 +2284,8 @@ je_rallocx(void *ptr, size_t size, int flags) if (config_prof && opt_prof) { usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); - assert(usize != 0); + if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) + goto label_oom; p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize, zero, tcache, arena); if (unlikely(p == NULL)) @@ -2392,14 +2399,23 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_usize = isalloc(ptr, config_prof); - /* Clamp extra if necessary to avoid (size + extra) overflow. */ - if (unlikely(size + extra > HUGE_MAXCLASS)) { - /* Check for size overflow. */ + if (unlikely(extra > 0)) { + /* + * The API explicitly absolves itself of protecting against + * (size + extra) numerical overflow, but we may need to clamp + * extra to avoid exceeding HUGE_MAXCLASS. + * + * Ordinarily, size limit checking is handled deeper down, but + * here we have to check as part of (size + extra) clamping, + * since we need the clamped value in the above helper + * functions. + */ if (unlikely(size > HUGE_MAXCLASS)) { usize = old_usize; goto label_not_resized; } - extra = HUGE_MAXCLASS - size; + if (unlikely(HUGE_MAXCLASS - size < extra)) + extra = HUGE_MAXCLASS - size; } if (config_valgrind && unlikely(in_valgrind)) @@ -2474,7 +2490,6 @@ inallocx(size_t size, int flags) usize = s2u(size); else usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags)); - assert(usize != 0); return (usize); } @@ -2507,13 +2522,18 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) { + size_t usize; assert(size != 0); if (unlikely(malloc_init())) return (0); - return (inallocx(size, flags)); + usize = inallocx(size, flags); + if (unlikely(usize > HUGE_MAXCLASS)) + return (0); + + return (usize); } JEMALLOC_EXPORT int JEMALLOC_NOTHROW diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 6253175d..35c559a4 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -46,6 +46,35 @@ get_huge_size(size_t ind) return (get_size_impl("arenas.hchunk.0.size", ind)); } +TEST_BEGIN(test_overflow) +{ + size_t hugemax, size; + + hugemax = get_huge_size(get_nhuge()-1); + + assert_ptr_null(mallocx(hugemax+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1); + + assert_ptr_null(mallocx(PTRDIFF_MAX+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX+1)); + + assert_ptr_null(mallocx(SIZE_T_MAX, 0), + "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); + +#if LG_SIZEOF_PTR == 3 + size = ZU(0x600000000000000); +#else + size = ZU(0x6000000); +#endif + assert_ptr_null(mallocx(size, 0), + "Expected OOM for mallocx(size=%#zx, 0", size); + + assert_ptr_null(mallocx(1, MALLOCX_ALIGN(PTRDIFF_MAX+1)), + "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX+1)); +} +TEST_END + TEST_BEGIN(test_oom) { size_t hugemax, size, alignment; @@ -176,6 +205,7 @@ main(void) { return (test( + test_overflow, test_oom, test_basic, test_alignment_and_size)); diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index 022e0bf0..3b7d21cf 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -1,5 +1,51 @@ #include "test/jemalloc_test.h" +static unsigned +get_nsizes_impl(const char *cmd) +{ + unsigned ret; + size_t z; + + z = sizeof(unsigned); + assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0, + "Unexpected mallctl(\"%s\", ...) failure", cmd); + + return (ret); +} + +static unsigned +get_nhuge(void) +{ + + return (get_nsizes_impl("arenas.nhchunks")); +} + +static size_t +get_size_impl(const char *cmd, size_t ind) +{ + size_t ret; + size_t z; + size_t mib[4]; + size_t miblen = 4; + + z = sizeof(size_t); + assert_d_eq(mallctlnametomib(cmd, mib, &miblen), + 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd); + mib[2] = ind; + z = sizeof(size_t); + assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0), + 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind); + + return (ret); +} + +static size_t +get_huge_size(size_t ind) +{ + + return (get_size_impl("arenas.hchunk.0.size", ind)); +} + TEST_BEGIN(test_grow_and_shrink) { void *p, *q; @@ -173,6 +219,41 @@ TEST_BEGIN(test_lg_align_and_zero) } TEST_END +TEST_BEGIN(test_overflow) +{ + size_t hugemax, size; + void *p; + + hugemax = get_huge_size(get_nhuge()-1); + + p = mallocx(1, 0); + assert_ptr_not_null(p, "Unexpected mallocx() failure"); + + assert_ptr_null(rallocx(p, hugemax+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1); + + assert_ptr_null(rallocx(p, PTRDIFF_MAX+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX+1)); + + assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); + +#if LG_SIZEOF_PTR == 3 + size = ZU(0x600000000000000); +#else + size = ZU(0x6000000); +#endif + assert_ptr_null(rallocx(p, size, 0), + "Expected OOM for rallocx(p, size=%#zx, 0", size); + + assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(PTRDIFF_MAX+1)), + "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", + ZU(PTRDIFF_MAX+1)); + + dallocx(p, 0); +} +TEST_END + int main(void) { @@ -181,5 +262,6 @@ main(void) test_grow_and_shrink, test_zero, test_align, - test_lg_align_and_zero)); + test_lg_align_and_zero, + test_overflow)); } diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index d3aaebd7..3a2126fc 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -80,10 +80,33 @@ TEST_BEGIN(test_size_classes) } TEST_END +TEST_BEGIN(test_overflow) +{ + size_t max_size_class; + + max_size_class = get_max_size_class(); + + assert_u_ge(size2index(max_size_class+1), NSIZES, + "size2index() should return >= NSIZES on overflow"); + assert_u_ge(size2index(PTRDIFF_MAX+1), NSIZES, + "size2index() should return >= NSIZES on overflow"); + assert_u_ge(size2index(SIZE_T_MAX), NSIZES, + "size2index() should return >= NSIZES on overflow"); + + assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS, + "s2u() should return > HUGE_MAXCLASS for unsupported size"); + assert_zu_gt(s2u(PTRDIFF_MAX+1), HUGE_MAXCLASS, + "s2u() should return > HUGE_MAXCLASS for unsupported size"); + assert_zu_eq(s2u(SIZE_T_MAX), 0, + "s2u() should return 0 on overflow"); +} +TEST_END + int main(void) { return (test( - test_size_classes)); + test_size_classes, + test_overflow)); } From e3195fa4a54344cf707d30e510e91ed43f5a8b84 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Feb 2016 16:40:24 -0800 Subject: [PATCH 75/96] Cast PTRDIFF_MAX to size_t before adding 1. This fixes compilation warnings regarding integer overflow that were introduced by 0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class overflow behavior defined.). --- test/integration/mallocx.c | 8 ++++---- test/integration/rallocx.c | 8 ++++---- test/unit/size_classes.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 35c559a4..6ecd636b 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -55,8 +55,8 @@ TEST_BEGIN(test_overflow) assert_ptr_null(mallocx(hugemax+1, 0), "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1); - assert_ptr_null(mallocx(PTRDIFF_MAX+1, 0), - "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX+1)); + assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); assert_ptr_null(mallocx(SIZE_T_MAX, 0), "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); @@ -69,9 +69,9 @@ TEST_BEGIN(test_overflow) assert_ptr_null(mallocx(size, 0), "Expected OOM for mallocx(size=%#zx, 0", size); - assert_ptr_null(mallocx(1, MALLOCX_ALIGN(PTRDIFF_MAX+1)), + assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", - ZU(PTRDIFF_MAX+1)); + ZU(PTRDIFF_MAX)+1); } TEST_END diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index 3b7d21cf..c3c22419 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -232,8 +232,8 @@ TEST_BEGIN(test_overflow) assert_ptr_null(rallocx(p, hugemax+1, 0), "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1); - assert_ptr_null(rallocx(p, PTRDIFF_MAX+1, 0), - "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX+1)); + assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0), + "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1); assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); @@ -246,9 +246,9 @@ TEST_BEGIN(test_overflow) assert_ptr_null(rallocx(p, size, 0), "Expected OOM for rallocx(p, size=%#zx, 0", size); - assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(PTRDIFF_MAX+1)), + assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", - ZU(PTRDIFF_MAX+1)); + ZU(PTRDIFF_MAX)+1); dallocx(p, 0); } diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c index 3a2126fc..2e2caaf5 100644 --- a/test/unit/size_classes.c +++ b/test/unit/size_classes.c @@ -88,14 +88,14 @@ TEST_BEGIN(test_overflow) assert_u_ge(size2index(max_size_class+1), NSIZES, "size2index() should return >= NSIZES on overflow"); - assert_u_ge(size2index(PTRDIFF_MAX+1), NSIZES, + assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES, "size2index() should return >= NSIZES on overflow"); assert_u_ge(size2index(SIZE_T_MAX), NSIZES, "size2index() should return >= NSIZES on overflow"); assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS, "s2u() should return > HUGE_MAXCLASS for unsupported size"); - assert_zu_gt(s2u(PTRDIFF_MAX+1), HUGE_MAXCLASS, + assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS, "s2u() should return > HUGE_MAXCLASS for unsupported size"); assert_zu_eq(s2u(SIZE_T_MAX), 0, "s2u() should return 0 on overflow"); From 9d2c10f2e83e9a357630bebd220e02a2ef95d6fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Feb 2016 16:42:15 -0800 Subject: [PATCH 76/96] Add more HUGE_MAXCLASS overflow checks. Add HUGE_MAXCLASS overflow checks that are specific to heap profiling code paths. This fixes test failures that were introduced by 0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class overflow behavior defined.). --- src/jemalloc.c | 57 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/jemalloc.c b/src/jemalloc.c index d9197e00..c8841783 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2032,6 +2032,8 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize, if (likely(flags == 0)) { *usize = s2u(size); + if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS)) + return (true); *alignment = 0; *zero = false; *tcache = tcache_get(tsd, true); @@ -2049,11 +2051,10 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero, { szind_t ind; - ind = size2index(usize); - if (unlikely(ind >= NSIZES)) - return (NULL); if (unlikely(alignment != 0)) return (ipalloct(tsd, usize, alignment, zero, tcache, arena)); + ind = size2index(usize); + assert(ind < NSIZES); if (unlikely(zero)) return (icalloct(tsd, usize, ind, tcache, arena)); return (imalloct(tsd, usize, ind, tcache, arena)); @@ -2360,10 +2361,23 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size, * prof_alloc_prep() to decide whether to capture a backtrace. * prof_realloc() will use the actual usize to decide whether to sample. */ - usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra, - alignment); - assert(usize_max != 0); + if (alignment == 0) { + usize_max = s2u(size+extra); + assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS); + } else { + usize_max = sa2u(size+extra, alignment); + if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) { + /* + * usize_max is out of range, and chances are that + * allocation will fail, but use the maximum possible + * value and carry on with prof_alloc_prep(), just in + * case allocation succeeds. + */ + usize_max = HUGE_MAXCLASS; + } + } tctx = prof_alloc_prep(tsd, usize_max, prof_active, false); + if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) { usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra, alignment, zero, tctx); @@ -2399,24 +2413,21 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) old_usize = isalloc(ptr, config_prof); - if (unlikely(extra > 0)) { - /* - * The API explicitly absolves itself of protecting against - * (size + extra) numerical overflow, but we may need to clamp - * extra to avoid exceeding HUGE_MAXCLASS. - * - * Ordinarily, size limit checking is handled deeper down, but - * here we have to check as part of (size + extra) clamping, - * since we need the clamped value in the above helper - * functions. - */ - if (unlikely(size > HUGE_MAXCLASS)) { - usize = old_usize; - goto label_not_resized; - } - if (unlikely(HUGE_MAXCLASS - size < extra)) - extra = HUGE_MAXCLASS - size; + /* + * The API explicitly absolves itself of protecting against (size + + * extra) numerical overflow, but we may need to clamp extra to avoid + * exceeding HUGE_MAXCLASS. + * + * Ordinarily, size limit checking is handled deeper down, but here we + * have to check as part of (size + extra) clamping, since we need the + * clamped value in the above helper functions. + */ + if (unlikely(size > HUGE_MAXCLASS)) { + usize = old_usize; + goto label_not_resized; } + if (unlikely(HUGE_MAXCLASS - size < extra)) + extra = HUGE_MAXCLASS - size; if (config_valgrind && unlikely(in_valgrind)) old_rzsize = u2rz(old_usize); From 8282a2ad979a9e72ffb645321c8a0b58a09eb9d8 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Feb 2016 16:44:48 -0800 Subject: [PATCH 77/96] Remove a superfluous comment. --- src/arena.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 1ceb59fd..ec4315a7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3049,7 +3049,6 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, /* Calls with non-zero extra had to clamp extra. */ assert(extra == 0 || size + extra <= HUGE_MAXCLASS); - /* Prevent exceeding PTRDIFF_MAX. */ if (unlikely(size > HUGE_MAXCLASS)) return (true); From 42ce80e15a5aa2ab6f2ec7e5f7c18164803f3076 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Feb 2016 20:51:00 -0800 Subject: [PATCH 78/96] Silence miscellaneous 64-to-32-bit data loss warnings. This resolves #341. --- include/jemalloc/internal/arena.h | 19 +++++++++---------- src/arena.c | 4 ++-- src/prof.c | 2 +- src/util.c | 4 ++-- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 891b9d79..c7c18748 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -634,7 +634,7 @@ bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, +size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr); void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); @@ -1076,11 +1076,10 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin) return (binind); } -JEMALLOC_INLINE unsigned +JEMALLOC_INLINE size_t arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { - unsigned shift, diff, regind; - size_t interval; + size_t diff, interval, shift, regind; arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); void *rpages = arena_miscelm_to_rpages(miscelm); @@ -1095,7 +1094,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - + diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ @@ -1122,9 +1121,9 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * divide by 0, and 1 and 2 are both powers of two, which are * handled above. */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned interval_invs[] = { +#define SIZE_INV_SHIFT ((sizeof(size_t) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1) + static const size_t interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -1135,8 +1134,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (likely(interval <= ((sizeof(interval_invs) / - sizeof(unsigned)) + 2))) { + if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t)) + + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else diff --git a/src/arena.c b/src/arena.c index ec4315a7..5fcecbaf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -301,7 +301,7 @@ JEMALLOC_INLINE_C void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { void *ret; - unsigned regind; + size_t regind; arena_chunk_map_misc_t *miscelm; void *rpages; @@ -325,7 +325,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr) size_t mapbits = arena_mapbits_get(chunk, pageind); szind_t binind = arena_ptr_small_binind_get(ptr, mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); + size_t regind = arena_run_regind(run, bin_info, ptr); assert(run->nfree < bin_info->nregs); /* Freeing an interior pointer can cause assertion failure. */ diff --git a/src/prof.c b/src/prof.c index 93421abb..b3872277 100644 --- a/src/prof.c +++ b/src/prof.c @@ -109,7 +109,7 @@ static char prof_dump_buf[ 1 #endif ]; -static unsigned prof_dump_buf_end; +static size_t prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ diff --git a/src/util.c b/src/util.c index 9aaa8062..02673c70 100644 --- a/src/util.c +++ b/src/util.c @@ -60,7 +60,7 @@ wrtmessage(void *cbopaque, const char *s) */ UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); #else - UNUSED int result = write(STDERR_FILENO, s, strlen(s)); + UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); #endif } @@ -90,7 +90,7 @@ buferror(int err, char *buf, size_t buflen) #ifdef _WIN32 FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, - (LPSTR)buf, buflen, NULL); + (LPSTR)buf, (DWORD)buflen, NULL); return (0); #elif defined(__GLIBC__) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); From ebd00e95b863c790d085c906a9aeddd22bd19d69 Mon Sep 17 00:00:00 2001 From: rustyx Date: Fri, 26 Feb 2016 17:18:48 +0100 Subject: [PATCH 79/96] Fix MSVC project --- msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 + msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++ 2 files changed, 4 insertions(+) diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj index 395837c3..d8ad505b 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj @@ -101,6 +101,7 @@ + diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters index 69f64169..89a51f76 100644 --- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters +++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters @@ -238,5 +238,8 @@ Source Files + + Source Files + \ No newline at end of file From 4c4ee292e411b0b2381e7b5e8f7c34d480cda99a Mon Sep 17 00:00:00 2001 From: rustyx Date: Fri, 26 Feb 2016 17:18:58 +0100 Subject: [PATCH 80/96] Improve test_threads performance --- msvc/projects/vc2015/test_threads/test_threads.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp index 046843f3..603bdce7 100644 --- a/msvc/projects/vc2015/test_threads/test_threads.cpp +++ b/msvc/projects/vc2015/test_threads/test_threads.cpp @@ -58,22 +58,22 @@ int test_threads() thread t([&]() { for (int i = 0; i < numIter2; ++i) { const int numAllocs = numAllocsMax - sizeDist(rnd); - for (int j = 0; j < numAllocs; j++) { + for (int j = 0; j < numAllocs; j += 64) { const int x = sizeDist(rnd); const int sz = sizes[x]; ptrsz[j] = sz; ptrs[j] = (uint8_t*)je_malloc(sz); if (!ptrs[j]) { - printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d", sz, tid, i, j, x); + printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); exit(1); } for (int k = 0; k < sz; k++) ptrs[j][k] = tid + k; } - for (int j = 0; j < numAllocs; j++) { + for (int j = 0; j < numAllocs; j += 64) { for (int k = 0, sz = ptrsz[j]; k < sz; k++) if (ptrs[j][k] != (uint8_t)(tid + k)) { - printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); + printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); exit(1); } je_free(ptrs[j]); From 01ecdf32d657f9e19f84ba9785c9954734666a9c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 26 Feb 2016 13:59:41 -0800 Subject: [PATCH 81/96] Miscellaneous bitmap refactoring. --- include/jemalloc/internal/bitmap.h | 21 ++++++------ include/jemalloc/internal/private_symbols.txt | 1 - src/bitmap.c | 33 +++++++++---------- test/unit/bitmap.c | 22 ++++++++----- 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index 8452bfed..a53ac799 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -93,9 +93,8 @@ struct bitmap_info_s { #ifdef JEMALLOC_H_EXTERNS void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); +size_t bitmap_size(const bitmap_info_t *binfo); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -128,7 +127,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); + return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))); } JEMALLOC_INLINE void @@ -143,8 +142,8 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); /* Propagate group state transitions up the tree. */ @@ -155,8 +154,8 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (g != 0) break; @@ -201,8 +200,8 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); /* Propagate group state transitions up the tree. */ @@ -214,9 +213,9 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; if (!propagate) break; diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index d716b82d..b57cfbcc 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -131,7 +131,6 @@ base_stats_get bitmap_full bitmap_get bitmap_info_init -bitmap_info_ngroups bitmap_init bitmap_set bitmap_sfu diff --git a/src/bitmap.c b/src/bitmap.c index c733372b..22c92fe4 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -32,22 +32,6 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) binfo->nbits = nbits; } -size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); -} - void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) { @@ -61,8 +45,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) * correspond to the first logical bit in the group, so extra bits * are the most significant bits of the last group. */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); + memset(bitmap, 0xffU, bitmap_size(binfo)); extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; if (extra != 0) @@ -76,3 +59,17 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; } } + +static size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset); +} + +size_t +bitmap_size(const bitmap_info_t *binfo) +{ + + return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP); +} diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c index 7da583d8..1ab0bb8e 100644 --- a/test/unit/bitmap.c +++ b/test/unit/bitmap.c @@ -6,7 +6,11 @@ TEST_BEGIN(test_bitmap_size) prev_size = 0; for (i = 1; i <= BITMAP_MAXBITS; i++) { - size_t size = bitmap_size(i); + bitmap_info_t binfo; + size_t size; + + bitmap_info_init(&binfo, i); + size = bitmap_size(&binfo); assert_true(size >= prev_size, "Bitmap size is smaller than expected"); prev_size = size; @@ -23,8 +27,8 @@ TEST_BEGIN(test_bitmap_init) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) { @@ -46,8 +50,8 @@ TEST_BEGIN(test_bitmap_set) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -69,8 +73,8 @@ TEST_BEGIN(test_bitmap_unset) bitmap_info_init(&binfo, i); { size_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); for (j = 0; j < i; j++) @@ -98,8 +102,8 @@ TEST_BEGIN(test_bitmap_sfu) bitmap_info_init(&binfo, i); { ssize_t j; - bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) * - bitmap_info_ngroups(&binfo)); + bitmap_t *bitmap = (bitmap_t *)malloc( + bitmap_size(&binfo)); bitmap_init(bitmap, &binfo); /* Iteratively set bits starting at the beginning. */ From b8823ab02607d6f03febd32ac504bb6188c54047 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Wed, 24 Feb 2016 08:04:43 -0800 Subject: [PATCH 82/96] Use linear scan for small bitmaps For small bitmaps, a linear scan of the bitmap is slightly faster than a tree search - bitmap_t is more compact, and there are fewer writes since we don't have to propogate state transitions up the tree. On x86_64 with the current settings, I'm seeing ~.5%-1% CPU improvement in production canaries with this change. The old tree code is left since 32bit sizes are much larger (and ffsl smaller), and maybe the run sizes will change in the future. This resolves #339. --- include/jemalloc/internal/bitmap.h | 50 ++++++++++++++++++++++++++++-- src/bitmap.c | 41 +++++++++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index a53ac799..ee2e7e9d 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -15,6 +15,15 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffsl more than 2^3 times, use a + * tree instead. + */ +#if LG_RUN_MAXREGS - LG_BITMAP_GROUP_NBITS > 3 +# define USE_TREE +#endif + /* Number of groups required to store a given number of bits. */ #define BITMAP_BITS2GROUPS(nbits) \ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) @@ -48,6 +57,8 @@ typedef unsigned long bitmap_t; /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ +#ifdef USE_TREE + #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 @@ -65,6 +76,13 @@ typedef unsigned long bitmap_t; (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) +#else /* USE_TREE */ + +#define BITMAP_GROUPS_MAX \ + (ZU(1) << (LG_RUN_MAXREGS - LG_SIZEOF_BITMAP - LG_SIZEOF_BITMAP)) + +#endif /* USE_TREE */ + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -78,6 +96,7 @@ struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; +#ifdef USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -86,6 +105,10 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +#else /* USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* USE_TREE */ }; #endif /* JEMALLOC_H_STRUCTS */ @@ -112,10 +135,20 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); JEMALLOC_INLINE bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { +#ifdef USE_TREE size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) + return (false); + } + return (true); +#endif } JEMALLOC_INLINE bool @@ -146,6 +179,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -161,6 +195,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) break; } } +#endif } /* sfu: set first unset. */ @@ -173,6 +208,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) assert(!bitmap_full(bitmap, binfo)); +#ifdef USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; bit = ffs_lu(g) - 1; @@ -181,7 +217,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) g = bitmap[binfo->levels[i].group_offset + bit]; bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); } - +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (bit - 1) + (i << 6); +#endif bitmap_set(bitmap, binfo, bit); return (bit); } @@ -192,7 +236,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) size_t goff; bitmap_t *gp; bitmap_t g; - bool propagate; + UNUSED bool propagate; assert(bit < binfo->nbits); assert(bitmap_get(bitmap, binfo, bit)); @@ -204,6 +248,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -221,6 +266,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) break; } } +#endif /* USE_TREE */ } #endif diff --git a/src/bitmap.c b/src/bitmap.c index 22c92fe4..b1e66271 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -3,6 +3,8 @@ /******************************************************************************/ +#ifdef USE_TREE + void bitmap_info_init(bitmap_info_t *binfo, size_t nbits) { @@ -32,6 +34,13 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) binfo->nbits = nbits; } +static size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset); +} + void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) { @@ -60,13 +69,43 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) } } +#else /* USE_TREE */ + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + size_t i; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + i = nbits >> LG_BITMAP_GROUP_NBITS; + if (nbits % BITMAP_GROUP_NBITS != 0) + i++; + binfo->ngroups = i; + binfo->nbits = nbits; +} + static size_t bitmap_info_ngroups(const bitmap_info_t *binfo) { - return (binfo->levels[binfo->nlevels].group_offset); + return (binfo->ngroups); } +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + + memset(bitmap, 0xffU, bitmap_size(binfo)); + extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS)); + if (extra != 0) + bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra); +} + +#endif /* USE_TREE */ + size_t bitmap_size(const bitmap_info_t *binfo) { From 20fad3430c5fa999fd094199f55a6af962993b51 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 26 Feb 2016 14:43:39 -0800 Subject: [PATCH 83/96] Refactor some bitmap cpp logic. --- include/jemalloc/internal/bitmap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h index ee2e7e9d..2594e3a4 100644 --- a/include/jemalloc/internal/bitmap.h +++ b/include/jemalloc/internal/bitmap.h @@ -20,7 +20,7 @@ typedef unsigned long bitmap_t; * force linear search, if we would have to call ffsl more than 2^3 times, use a * tree instead. */ -#if LG_RUN_MAXREGS - LG_BITMAP_GROUP_NBITS > 3 +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 # define USE_TREE #endif @@ -78,8 +78,7 @@ typedef unsigned long bitmap_t; #else /* USE_TREE */ -#define BITMAP_GROUPS_MAX \ - (ZU(1) << (LG_RUN_MAXREGS - LG_SIZEOF_BITMAP - LG_SIZEOF_BITMAP)) +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) #endif /* USE_TREE */ From d412624b25eed2b5c52b7d94a71070d3aab03cb4 Mon Sep 17 00:00:00 2001 From: buchgr Date: Wed, 9 Dec 2015 18:00:57 +0100 Subject: [PATCH 84/96] Move retaining out of default chunk hooks This fixes chunk allocation to reuse retained memory even if an application-provided chunk allocation function is in use. This resolves #307. --- src/chunk.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 26622ced..b179d213 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -332,19 +332,12 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) { void *ret; - chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; assert(size != 0); assert((size & chunksize_mask) == 0); assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - /* Retained. */ - if ((ret = chunk_recycle(arena, &chunk_hooks, - &arena->chunks_szad_retained, &arena->chunks_ad_retained, false, - new_addr, size, alignment, zero, commit, true)) != NULL) - return (ret); - /* "primary" dss. */ if (have_dss && dss_prec == dss_prec_primary && (ret = chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) != @@ -442,6 +435,21 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero, return (ret); } +static void * +chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit) +{ + + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert(alignment != 0); + assert((alignment & chunksize_mask) == 0); + + return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained, + &arena->chunks_ad_retained, false, new_addr, size, alignment, zero, + commit, true)); +} + void * chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) @@ -449,10 +457,16 @@ chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr, void *ret; chunk_hooks_assure_initialized(arena, chunk_hooks); - ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit, - arena->ind); - if (ret == NULL) - return (NULL); + + ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size, + alignment, zero, commit); + if (ret == NULL) { + ret = chunk_hooks->alloc(new_addr, size, alignment, zero, + commit, arena->ind); + if (ret == NULL) + return (NULL); + } + if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default) JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize); return (ret); From a62e94cabb349982f3270a2057ab49b975e7cbb7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 26 Feb 2016 16:27:52 -0800 Subject: [PATCH 85/96] Remove invalid tests. Remove invalid tests that were intended to be tests of (hugemax+1) OOM, for which tests already exist. --- test/integration/mallocx.c | 10 +--------- test/integration/rallocx.c | 10 +--------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c index 6ecd636b..42eee105 100644 --- a/test/integration/mallocx.c +++ b/test/integration/mallocx.c @@ -48,7 +48,7 @@ get_huge_size(size_t ind) TEST_BEGIN(test_overflow) { - size_t hugemax, size; + size_t hugemax; hugemax = get_huge_size(get_nhuge()-1); @@ -61,14 +61,6 @@ TEST_BEGIN(test_overflow) assert_ptr_null(mallocx(SIZE_T_MAX, 0), "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX); -#if LG_SIZEOF_PTR == 3 - size = ZU(0x600000000000000); -#else - size = ZU(0x6000000); -#endif - assert_ptr_null(mallocx(size, 0), - "Expected OOM for mallocx(size=%#zx, 0", size); - assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))", ZU(PTRDIFF_MAX)+1); diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c index c3c22419..66ad8660 100644 --- a/test/integration/rallocx.c +++ b/test/integration/rallocx.c @@ -221,7 +221,7 @@ TEST_END TEST_BEGIN(test_overflow) { - size_t hugemax, size; + size_t hugemax; void *p; hugemax = get_huge_size(get_nhuge()-1); @@ -238,14 +238,6 @@ TEST_BEGIN(test_overflow) assert_ptr_null(rallocx(p, SIZE_T_MAX, 0), "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX); -#if LG_SIZEOF_PTR == 3 - size = ZU(0x600000000000000); -#else - size = ZU(0x6000000); -#endif - assert_ptr_null(rallocx(p, size, 0), - "Expected OOM for rallocx(p, size=%#zx, 0", size); - assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)), "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))", ZU(PTRDIFF_MAX)+1); From 3763d3b5f92d855596e111a339c1fa9583c4602a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 26 Feb 2016 17:29:35 -0800 Subject: [PATCH 86/96] Refactor arena_cactive_update() into arena_cactive_{add,sub}(). This removes an implicit conversion from size_t to ssize_t. For cactive decreases, the size_t value was intentionally underflowed to generate "negative" values (actually positive values above the positive range of ssize_t), and the conversion to ssize_t was undefined according to C language semantics. This regression was perpetuated by 1522937e9cbcfa24c881dc439cc454f9a34a7e88 (Fix the cactive statistic.) and first release in 4.0.0, which in retrospect only fixed one of two problems introduced by aa5113b1fdafd1129c22512837c6c3d66c295fc8 (Refactor overly large/complex functions) and first released in 3.5.0. --- src/arena.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/arena.c b/src/arena.c index 5fcecbaf..3163d56e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -373,15 +373,27 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages) +arena_cactive_add(arena_t *arena, size_t add_pages) { if (config_stats) { - ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages - - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + size_t cactive_add = CHUNK_CEILING((arena->nactive + + add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); + if (cactive_add != 0) + stats_cactive_add(cactive_add); + } +} + +static void +arena_cactive_sub(arena_t *arena, size_t sub_pages) +{ + + if (config_stats) { + size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE); + if (cactive_sub != 0) + stats_cactive_sub(cactive_sub); } } @@ -403,7 +415,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) arena_run_dirty_remove(arena, chunk, run_ind, total_pages); - arena_cactive_update(arena, need_pages, 0); + arena_cactive_add(arena, need_pages); arena->nactive += need_pages; /* Keep track of trailing unused pages for later use. */ @@ -1915,7 +1927,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, assert(run_ind < chunk_npages); size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); - arena_cactive_update(arena, 0, run_pages); + arena_cactive_sub(arena, run_pages); arena->nactive -= run_pages; /* From 14be4a7ccad0582ab0427e61273d81ff0a5822e7 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 26 Feb 2016 21:00:02 -0800 Subject: [PATCH 87/96] Update ChangeLog in preparation for 4.1.0. --- ChangeLog | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8ed42cbe..92d267eb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,76 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.1.0 (XXX) + + This release is primarily about optimizations, but it also incorporates a lot + of portability-motivated refactoring and enhancements. Many people worked on + this release, to an extent that even with the omission here of minor changes + (see git revision history), and of the people who reported and diagnosed + issues, so much of the work was contributed that starting with this release, + changes are annotated with author credits to help reflect the collaborative + effort involved. + + New features: + - Implement decay-based unused dirty page purging, a major optimization with + mallctl API impact. This is an alternative to the existing ratio-based + unused dirty page purging, and is intended to eventually become the sole + purging mechanism. New mallctls: + + opt.purge + + opt.decay_time + + arena..decay + + arena..decay_time + + arenas.decay_time + + stats.arenas..decay_time + (@jasone, @cevans87) + - Add --with-malloc-conf, which makes it possible to embed a default + options string during configuration. This was motivated by the desire to + specify --with-malloc-conf=purge:decay , since the default must remain + purge:ratio until the 5.0.0 release. (@jasone) + - Make *allocx() size class overflow behavior defined. The maximum + size class is now less than PTRDIFF_MAX to protect applications against + numerical overflow, and all allocation functions are guaranteed to indicate + errors rather than potentially crashing if the request size exceeds the + maximum size class. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) + - jeprof: + + Add raw heap profile support. (@jasone) + + Add --retain and --exclude for backtrace symbol filtering. (@jasone) + + Optimizations: + - Optimize the fast path to combine various bootstrapping and configuration + checks and execute more streamlined code in the common case. (@interwq) + - Use linear scan for small bitmaps (used for small object tracking). In + addition to speeding up bitmap operations on 64-bit systems, this reduces + allocator metadata overhead by approximately 0.2%. (@djwatson) + - Separate arena_avail trees, which substantially speeds up run tree + operations. (@djwatson) + - Use memoization (boot-time-computed table) for run quantization. Separate + arena_avail trees reduced the importance of this optimization. (@jasone) + - Attempt mmap-based in-place huge reallocation. This can dramatically speed + up incremental huge reallocation. (@jasone) + + Incompatible changes: + - Make opt.narenas unsigned rather than size_t. (@jasone) + + Bug fixes: + - Refactor arenas array. In addition to fixing a fork-related deadlock, this + makes arena lookups faster and simpler. (@jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. (@jasone) + - Move retained memory allocation out of the default chunk allocation + function, to a location that gets executed even if the application installs + a custom chunk allocation function. This resolves a virtual memory leak. + (@buchgr) + - Resolve undefined unsigned-to-signed conversion that could cause corruption + of the stats.cactive statistic. (@jasone) + - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) + - jeprof: + + Don't discard curl options if timeout is not defined. (@djwatson) + + Detect failed profile fetches. (@djwatson) + * 4.0.4 (October 24, 2015) This bugfix release fixes another xallocx() regression. No other regressions From 40ee9aa9577ea5eb6616c10b9e6b0fa7e6796821 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 12:34:50 -0800 Subject: [PATCH 88/96] Fix stats.cactive accounting regression. Fix stats.cactive accounting to always increase/decrease by multiples of the chunk size, even for huge size classes that are not multiples of the chunk size, e.g. {2.5, 3, 3.5, 5, 7} MiB with 2 MiB chunk size. This regression was introduced by 155bfa7da18cab0d21d87aa2dce4554166836f5d (Normalize size classes.) and first released in 4.0.0. This resolves #336. --- include/jemalloc/internal/stats.h | 14 +++++++-- src/arena.c | 48 +++++++++++-------------------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index c91dba99..705903ad 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -167,15 +167,25 @@ stats_cactive_get(void) JEMALLOC_INLINE void stats_cactive_add(size_t size) { + UNUSED size_t cactive; - atomic_add_z(&stats_cactive, size); + assert(size > 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_add_z(&stats_cactive, size); + assert(cactive - size < cactive); } JEMALLOC_INLINE void stats_cactive_sub(size_t size) { + UNUSED size_t cactive; - atomic_sub_z(&stats_cactive, size); + assert(size > 0); + assert((size & chunksize_mask) == 0); + + cactive = atomic_sub_z(&stats_cactive, size); + assert(cactive + size > cactive); } #endif diff --git a/src/arena.c b/src/arena.c index 3163d56e..c579a582 100644 --- a/src/arena.c +++ b/src/arena.c @@ -373,7 +373,7 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) } static void -arena_cactive_add(arena_t *arena, size_t add_pages) +arena_nactive_add(arena_t *arena, size_t add_pages) { if (config_stats) { @@ -383,10 +383,11 @@ arena_cactive_add(arena_t *arena, size_t add_pages) if (cactive_add != 0) stats_cactive_add(cactive_add); } + arena->nactive += add_pages; } static void -arena_cactive_sub(arena_t *arena, size_t sub_pages) +arena_nactive_sub(arena_t *arena, size_t sub_pages) { if (config_stats) { @@ -395,6 +396,7 @@ arena_cactive_sub(arena_t *arena, size_t sub_pages) if (cactive_sub != 0) stats_cactive_sub(cactive_sub); } + arena->nactive -= sub_pages; } static void @@ -415,8 +417,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind, arena_avail_remove(arena, chunk, run_ind, total_pages); if (flag_dirty != 0) arena_run_dirty_remove(arena, chunk, run_ind, total_pages); - arena_cactive_add(arena, need_pages); - arena->nactive += need_pages; + arena_nactive_add(arena, need_pages); /* Keep track of trailing unused pages for later use. */ if (rem_pages > 0) { @@ -905,7 +906,7 @@ arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, arena_huge_malloc_stats_update_undo(arena, usize); arena->stats.mapped -= usize; } - arena->nactive -= (usize >> LG_PAGE); + arena_nactive_sub(arena, usize >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } @@ -927,7 +928,7 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, arena_huge_malloc_stats_update(arena, usize); arena->stats.mapped += usize; } - arena->nactive += (usize >> LG_PAGE); + arena_nactive_add(arena, usize >> LG_PAGE); ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment, zero, true); @@ -937,8 +938,6 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, alignment, zero, csize); } - if (config_stats && ret != NULL) - stats_cactive_add(usize); return (ret); } @@ -953,9 +952,8 @@ arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize) if (config_stats) { arena_huge_dalloc_stats_update(arena, usize); arena->stats.mapped -= usize; - stats_cactive_sub(usize); } - arena->nactive -= (usize >> LG_PAGE); + arena_nactive_sub(arena, usize >> LG_PAGE); chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true); malloc_mutex_unlock(&arena->lock); @@ -972,17 +970,10 @@ arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize, malloc_mutex_lock(&arena->lock); if (config_stats) arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (oldsize < usize) { - size_t udiff = usize - oldsize; - arena->nactive += udiff >> LG_PAGE; - if (config_stats) - stats_cactive_add(udiff); - } else { - size_t udiff = oldsize - usize; - arena->nactive -= udiff >> LG_PAGE; - if (config_stats) - stats_cactive_sub(udiff); - } + if (oldsize < usize) + arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE); + else + arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } @@ -996,12 +987,10 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize, malloc_mutex_lock(&arena->lock); if (config_stats) { arena_huge_ralloc_stats_update(arena, oldsize, usize); - if (cdiff != 0) { + if (cdiff != 0) arena->stats.mapped -= cdiff; - stats_cactive_sub(udiff); - } } - arena->nactive -= udiff >> LG_PAGE; + arena_nactive_sub(arena, udiff >> LG_PAGE); if (cdiff != 0) { chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; @@ -1031,7 +1020,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks, usize); arena->stats.mapped -= cdiff; } - arena->nactive -= (udiff >> LG_PAGE); + arena_nactive_sub(arena, udiff >> LG_PAGE); malloc_mutex_unlock(&arena->lock); } else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk, cdiff, true, arena->ind)) { @@ -1059,7 +1048,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, arena_huge_ralloc_stats_update(arena, oldsize, usize); arena->stats.mapped += cdiff; } - arena->nactive += (udiff >> LG_PAGE); + arena_nactive_add(arena, udiff >> LG_PAGE); err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff, chunksize, zero, true) == NULL); @@ -1075,8 +1064,6 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize, err = true; } - if (config_stats && !err) - stats_cactive_add(udiff); return (err); } @@ -1927,8 +1914,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned, assert(run_ind < chunk_npages); size = arena_run_size_get(arena, chunk, run, run_ind); run_pages = (size >> LG_PAGE); - arena_cactive_sub(arena, run_pages); - arena->nactive -= run_pages; + arena_nactive_sub(arena, run_pages); /* * The run is dirty if the caller claims to have dirtied it, as well as From 69acd25a64a570ec8987558d149a6730bcf9a83d Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 14:38:03 -0800 Subject: [PATCH 89/96] Add/alphabetize private symbols. --- include/jemalloc/internal/private_symbols.txt | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index b57cfbcc..54d3807d 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -24,12 +24,12 @@ arena_dalloc_junk_small arena_dalloc_large arena_dalloc_large_junked_locked arena_dalloc_small +arena_decay_tick +arena_decay_ticks arena_decay_time_default_get arena_decay_time_default_set arena_decay_time_get arena_decay_time_set -arena_decay_tick -arena_decay_ticks arena_dss_prec_get arena_dss_prec_set arena_get @@ -51,9 +51,6 @@ arena_mapbits_large_binind_set arena_mapbits_large_get arena_mapbits_large_set arena_mapbits_large_size_get -arena_mapbitsp_get -arena_mapbitsp_read -arena_mapbitsp_write arena_mapbits_size_decode arena_mapbits_size_encode arena_mapbits_small_runind_get @@ -62,6 +59,9 @@ arena_mapbits_unallocated_set arena_mapbits_unallocated_size_get arena_mapbits_unallocated_size_set arena_mapbits_unzeroed_get +arena_mapbitsp_get +arena_mapbitsp_read +arena_mapbitsp_write arena_maxrun arena_maybe_purge arena_metadata_allocated_add @@ -99,14 +99,14 @@ arena_redzone_corruption arena_run_regind arena_run_to_miscelm arena_salloc -arenas_tdata_bypass_cleanup -arenas_tdata_cleanup arena_sdalloc arena_stats_merge arena_tcache_fill_small arena_tdata_get arena_tdata_get_hard arenas +arenas_tdata_bypass_cleanup +arenas_tdata_cleanup atomic_add_p atomic_add_u atomic_add_uint32 @@ -170,9 +170,9 @@ chunk_prefork chunk_purge_arena chunk_purge_wrapper chunk_register +chunks_rtree chunksize chunksize_mask -chunks_rtree ckh_count ckh_delete ckh_insert @@ -280,11 +280,11 @@ idalloct idalloctm imalloc imalloct +in_valgrind index2size index2size_compute index2size_lookup index2size_tab -in_valgrind ipalloc ipalloct ipallocztm @@ -489,13 +489,13 @@ tcache_flush tcache_get tcache_get_hard tcache_maxclass -tcaches tcache_salloc +tcache_stats_merge +tcaches tcaches_create tcaches_destroy tcaches_flush tcaches_get -tcache_stats_merge thread_allocated_cleanup thread_deallocated_cleanup ticker_copy @@ -520,6 +520,8 @@ tsd_init_check_recursion tsd_init_finish tsd_init_head tsd_nominal +tsd_prof_tdata_get +tsd_prof_tdata_set tsd_quarantine_get tsd_quarantine_set tsd_set @@ -527,14 +529,12 @@ tsd_tcache_enabled_get tsd_tcache_enabled_set tsd_tcache_get tsd_tcache_set -tsd_tls -tsd_tsd -tsd_prof_tdata_get -tsd_prof_tdata_set tsd_thread_allocated_get tsd_thread_allocated_set tsd_thread_deallocated_get tsd_thread_deallocated_set +tsd_tls +tsd_tsd u2rz valgrind_freelike_block valgrind_make_mem_defined From fd4858225b84c12e071eeeaea1fa1bce8731e409 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 20:38:29 -0800 Subject: [PATCH 90/96] Fix decay tests for --disable-stats case. --- test/unit/decay.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/test/unit/decay.c b/test/unit/decay.c index 66d54dc8..1052f6fb 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -201,7 +201,9 @@ TEST_BEGIN(test_decay_ticker) #define NPS 1024 int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); void *ps[NPS]; - uint64_t epoch, npurge0, npurge1; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; size_t sz, tcache_max, large; unsigned i, nupdates0; nstime_t time, decay_time, deadline; @@ -224,8 +226,8 @@ TEST_BEGIN(test_decay_ticker) assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, "Unexpected mallctl failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); for (i = 0; i < NPS; i++) { ps[i] = mallocx(large, flags); @@ -266,12 +268,14 @@ TEST_BEGIN(test_decay_ticker) sizeof(uint64_t)), 0, "Unexpected mallctl failure"); sz = sizeof(uint64_t); assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, - NULL, 0), 0, "Unexpected mallctl failure"); + NULL, 0), config_stats ? 0 : ENOENT, + "Unexpected mallctl result"); nstime_update(&time); } while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0); - assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); + if (config_stats) + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); #undef NPS } TEST_END @@ -281,7 +285,9 @@ TEST_BEGIN(test_decay_nonmonotonic) #define NPS (SMOOTHSTEP_NSTEPS + 1) int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE); void *ps[NPS]; - uint64_t epoch, npurge0, npurge1; + uint64_t epoch; + uint64_t npurge0 = 0; + uint64_t npurge1 = 0; size_t sz, large0; unsigned i, nupdates0; @@ -296,8 +302,8 @@ TEST_BEGIN(test_decay_nonmonotonic) assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, "Unexpected mallctl failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); nupdates_mock = 0; nstime_init(&time_mock, 0); @@ -324,10 +330,11 @@ TEST_BEGIN(test_decay_nonmonotonic) assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0, "Unexpected mallctl failure"); sz = sizeof(uint64_t); - assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); + assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), + config_stats ? 0 : ENOENT, "Unexpected mallctl result"); - assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); + if (config_stats) + assert_u64_gt(npurge1, npurge0, "Expected purging to occur"); nstime_update = nstime_update_orig; #undef NPS From 3c07f803aa282598451eb0664cc94717b769a5e6 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 20:40:13 -0800 Subject: [PATCH 91/96] Fix stats.arenas..[...] for --disable-stats case. Add missing stats.arenas..{dss,lg_dirty_mult,decay_time} initialization. Fix stats.arenas..{pactive,pdirty} to read under the protection of the arena mutex. --- include/jemalloc/internal/arena.h | 5 +- include/jemalloc/internal/ctl.h | 3 + include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 39 ++++- src/ctl.c | 142 +++++++++--------- test/unit/mallctl.c | 4 +- 6 files changed, 114 insertions(+), 80 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index c7c18748..3519873c 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -572,7 +572,10 @@ ssize_t arena_lg_dirty_mult_default_get(void); bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); ssize_t arena_decay_time_default_get(void); bool arena_decay_time_default_set(ssize_t decay_time); -void arena_stats_merge(arena_t *arena, const char **dss, +void arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty); +void arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index 9add3ed9..9c5e9328 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -38,6 +38,9 @@ struct ctl_arena_stats_s { ssize_t decay_time; size_t pactive; size_t pdirty; + + /* The remainder are only populated if config_stats is true. */ + arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 54d3807d..5880996a 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -2,6 +2,7 @@ a0dalloc a0malloc arena_aalloc arena_alloc_junk_small +arena_basic_stats_merge arena_bin_index arena_bin_info arena_bitselm_get diff --git a/src/arena.c b/src/arena.c index c579a582..99e20fde 100644 --- a/src/arena.c +++ b/src/arena.c @@ -3202,20 +3202,45 @@ arena_decay_time_default_set(ssize_t decay_time) return (false); } -void -arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult, - ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats, - malloc_huge_stats_t *hstats) +static void +arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time, + size_t *nactive, size_t *ndirty) { - unsigned i; - malloc_mutex_lock(&arena->lock); + *nthreads += arena_nthreads_get(arena); *dss = dss_prec_names[arena->dss_prec]; *lg_dirty_mult = arena->lg_dirty_mult; *decay_time = arena->decay_time; *nactive += arena->nactive; *ndirty += arena->ndirty; +} + +void +arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty) +{ + + malloc_mutex_lock(&arena->lock); + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss, + ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats) +{ + unsigned i; + + cassert(config_stats); + + malloc_mutex_lock(&arena->lock); + arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult, + decay_time, nactive, ndirty); astats->mapped += arena->stats.mapped; astats->npurge += arena->stats.npurge; diff --git a/src/ctl.c b/src/ctl.c index dbf57c36..17bd0719 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -533,6 +533,7 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->nthreads = 0; astats->dss = dss_prec_names[dss_prec_limit]; astats->lg_dirty_mult = -1; astats->decay_time = -1; @@ -557,16 +558,23 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult, - &cstats->decay_time, &cstats->pactive, &cstats->pdirty, - &cstats->astats, cstats->bstats, cstats->lstats, cstats->hstats); + if (config_stats) { + arena_stats_merge(arena, &cstats->nthreads, &cstats->dss, + &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty, &cstats->astats, + cstats->bstats, cstats->lstats, cstats->hstats); - for (i = 0; i < NBINS; i++) { - cstats->allocated_small += cstats->bstats[i].curregs * - index2size(i); - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].curregs * + index2size(i); + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } + } else { + arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss, + &cstats->lg_dirty_mult, &cstats->decay_time, + &cstats->pactive, &cstats->pdirty); } } @@ -575,57 +583,68 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) { unsigned i; + sstats->nthreads += astats->nthreads; sstats->pactive += astats->pactive; sstats->pdirty += astats->pdirty; - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; + if (config_stats) { + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; - sstats->astats.metadata_mapped += astats->astats.metadata_mapped; - sstats->astats.metadata_allocated += astats->astats.metadata_allocated; + sstats->astats.metadata_mapped += + astats->astats.metadata_mapped; + sstats->astats.metadata_allocated += + astats->astats.metadata_allocated; - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; - sstats->astats.allocated_large += astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += astats->astats.nrequests_large; + sstats->astats.allocated_large += + astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += + astats->astats.nrequests_large; - sstats->astats.allocated_huge += astats->astats.allocated_huge; - sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; - sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; + sstats->astats.allocated_huge += astats->astats.allocated_huge; + sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge; + sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge; - for (i = 0; i < NBINS; i++) { - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += astats->bstats[i].nrequests; - sstats->bstats[i].curregs += astats->bstats[i].curregs; - if (config_tcache) { - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += - astats->bstats[i].nflushes; + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += + astats->bstats[i].nrequests; + sstats->bstats[i].curregs += astats->bstats[i].curregs; + if (config_tcache) { + sstats->bstats[i].nfills += + astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; } - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; - } - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += + astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } - for (i = 0; i < nhclasses; i++) { - sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; - sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; - sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks; + for (i = 0; i < nhclasses; i++) { + sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc; + sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc; + sstats->hstats[i].curhchunks += + astats->hstats[i].curhchunks; + } } } @@ -636,19 +655,9 @@ ctl_arena_refresh(arena_t *arena, unsigned i) ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); - - sstats->nthreads += astats->nthreads; - if (config_stats) { - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); - } else { - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; - } + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); } static bool @@ -701,20 +710,11 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); for (i = 0; i < ctl_stats.narenas; i++) tarenas[i] = arena_get(i, false); - for (i = 0; i < ctl_stats.narenas; i++) { - if (tarenas[i] != NULL) { - ctl_stats.arenas[i].nthreads = - arena_nthreads_get(arena_get(i, false)); - } else - ctl_stats.arenas[i].nthreads = 0; - } - for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 01333514..69f8c20c 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -687,8 +687,10 @@ TEST_BEGIN(test_stats_arenas) 0), 0, "Unexpected mallctl() failure"); \ } while (0) - TEST_STATS_ARENAS(const char *, dss); TEST_STATS_ARENAS(unsigned, nthreads); + TEST_STATS_ARENAS(const char *, dss); + TEST_STATS_ARENAS(ssize_t, lg_dirty_mult); + TEST_STATS_ARENAS(ssize_t, decay_time); TEST_STATS_ARENAS(size_t, pactive); TEST_STATS_ARENAS(size_t, pdirty); From 39f58755a7c2c5c12c9b732c17fe472c9872ab4b Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 21:18:15 -0800 Subject: [PATCH 92/96] Fix a potential tsd cleanup leak. Prior to 767d85061a6fb88ec977bbcd9b429a43aff391e6 (Refactor arenas array (fixes deadlock).), it was possible under some circumstances for arena_get() to trigger recreation of the arenas cache during tsd cleanup, and the arenas cache would then be leaked. In principle a similar issue could still occur as a side effect of decay-based purging, which calls arena_tdata_get(). Fix arenas_tdata_cleanup() by setting tsd->arenas_tdata_bypass to true, so that arena_tdata_get() will gracefully fail (an expected behavior) rather than recreating tsd->arena_tdata. Reported by Christopher Ferris . --- src/jemalloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/jemalloc.c b/src/jemalloc.c index c8841783..0735376e 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -652,6 +652,9 @@ arenas_tdata_cleanup(tsd_t *tsd) { arena_tdata_t *arenas_tdata; + /* Prevent tsd->arenas_tdata from being (re)created. */ + *tsd_arenas_tdata_bypassp_get(tsd) = true; + arenas_tdata = tsd_arenas_tdata_get(tsd); if (arenas_tdata != NULL) { tsd_arenas_tdata_set(tsd, NULL); From 7d3055432d303f114d15f67c60bdebcbb4dbd39a Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sat, 27 Feb 2016 23:40:31 -0800 Subject: [PATCH 93/96] Fix decay tests for --disable-tcache case. --- test/unit/decay.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/test/unit/decay.c b/test/unit/decay.c index 1052f6fb..70a2e67a 100644 --- a/test/unit/decay.c +++ b/test/unit/decay.c @@ -160,7 +160,7 @@ TEST_BEGIN(test_decay_ticks) * Test tcache fill/flush interactions for large and small size classes, * using an explicit tcache. */ - { + if (config_tcache) { unsigned tcache_ind, i; size_t tcache_sizes[2]; tcache_sizes[0] = large0; @@ -204,7 +204,7 @@ TEST_BEGIN(test_decay_ticker) uint64_t epoch; uint64_t npurge0 = 0; uint64_t npurge1 = 0; - size_t sz, tcache_max, large; + size_t sz, large; unsigned i, nupdates0; nstime_t time, decay_time, deadline; @@ -216,10 +216,18 @@ TEST_BEGIN(test_decay_ticker) * verify the ticker triggers purging. */ - sz = sizeof(size_t); - assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0), 0, - "Unexpected mallctl failure"); - large = nallocx(tcache_max + 1, flags); + if (config_tcache) { + size_t tcache_max; + + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, + 0), 0, "Unexpected mallctl failure"); + large = nallocx(tcache_max + 1, flags); + } else { + sz = sizeof(size_t); + assert_d_eq(mallctl("arenas.lrun.0.size", &large, &sz, NULL, 0), + 0, "Unexpected mallctl failure"); + } assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0, "Unexpected mallctl failure"); From e025c5158b2dd524a20ffc8db9d096816f6641fa Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 28 Feb 2016 00:01:13 -0800 Subject: [PATCH 94/96] Update ChangeLog. --- ChangeLog | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 92d267eb..e35d74cc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -30,12 +30,12 @@ brevity. Much more detail can be found in the git revision history: options string during configuration. This was motivated by the desire to specify --with-malloc-conf=purge:decay , since the default must remain purge:ratio until the 5.0.0 release. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) - Make *allocx() size class overflow behavior defined. The maximum size class is now less than PTRDIFF_MAX to protect applications against numerical overflow, and all allocation functions are guaranteed to indicate errors rather than potentially crashing if the request size exceeds the maximum size class. (@jasone) - - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) - jeprof: + Add raw heap profile support. (@jasone) + Add --retain and --exclude for backtrace symbol filtering. (@jasone) @@ -57,22 +57,25 @@ brevity. Much more detail can be found in the git revision history: - Make opt.narenas unsigned rather than size_t. (@jasone) Bug fixes: + - Fix stats.cactive accounting regression. (@rustyx, @jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone, Christopher Ferris) - Refactor arenas array. In addition to fixing a fork-related deadlock, this makes arena lookups faster and simpler. (@jasone) - - Handle unaligned keys in hash(). This caused problems for some ARM systems. - (@jasone) - - Fix run quantization. In practice this bug had no impact unless - applications requested memory with alignment exceeding one page. (@jasone) - Move retained memory allocation out of the default chunk allocation function, to a location that gets executed even if the application installs a custom chunk allocation function. This resolves a virtual memory leak. (@buchgr) - - Resolve undefined unsigned-to-signed conversion that could cause corruption - of the stats.cactive statistic. (@jasone) + - Fix a potential tsd cleanup leak. (Christopher Ferris, @jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. + (@jasone, @djwatson) - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) - jeprof: + Don't discard curl options if timeout is not defined. (@djwatson) + Detect failed profile fetches. (@djwatson) + - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for + --disable-stats case. (@jasone) * 4.0.4 (October 24, 2015) From e270a8f936d52766557a2ceca8b5e3ad315dc54d Mon Sep 17 00:00:00 2001 From: rustyx Date: Sat, 27 Feb 2016 18:29:31 +0100 Subject: [PATCH 95/96] Make test_threads more generic --- .../vc2015/test_threads/test_threads.cpp | 121 ++++++++---------- 1 file changed, 55 insertions(+), 66 deletions(-) diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp index 603bdce7..c8cb7d66 100644 --- a/msvc/projects/vc2015/test_threads/test_threads.cpp +++ b/msvc/projects/vc2015/test_threads/test_threads.cpp @@ -10,88 +10,77 @@ #include #include #include -#include using std::vector; using std::thread; using std::uniform_int_distribution; using std::minstd_rand; -#if NDEBUG && JEMALLOC_ISSUE_318_WORKAROUND -extern "C" JEMALLOC_EXPORT void _malloc_thread_cleanup(void); - -static thread_local struct JeMallocThreadHelper { - ~JeMallocThreadHelper() { - _malloc_thread_cleanup(); - } -} tls_jemallocThreadHelper; -#endif - int test_threads() { - je_malloc_conf = "narenas:3"; - int narenas = 0; - size_t sz = sizeof(narenas); - je_mallctl("opt.narenas", &narenas, &sz, NULL, 0); - if (narenas != 3) { - printf("Error: unexpected number of arenas: %d\n", narenas); - return 1; - } - static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; - static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); - vector workers; - static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; - je_malloc_stats_print(NULL, NULL, NULL); + je_malloc_conf = "narenas:3"; + int narenas = 0; + size_t sz = sizeof(narenas); + je_mallctl("opt.narenas", &narenas, &sz, NULL, 0); + if (narenas != 3) { + printf("Error: unexpected number of arenas: %d\n", narenas); + return 1; + } + static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 }; + static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0])); + vector workers; + static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50; + je_malloc_stats_print(NULL, NULL, NULL); size_t allocated1; size_t sz1 = sizeof(allocated1); je_mallctl("stats.active", &allocated1, &sz1, NULL, 0); printf("\nPress Enter to start threads...\n"); - getchar(); - printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); - for (int i = 0; i < numThreads; i++) { - workers.emplace_back([tid=i]() { - uniform_int_distribution sizeDist(0, numSizes - 1); - minstd_rand rnd(tid * 17); - uint8_t* ptrs[numAllocsMax]; - int ptrsz[numAllocsMax]; - for (int i = 0; i < numIter1; ++i) { - thread t([&]() { - for (int i = 0; i < numIter2; ++i) { - const int numAllocs = numAllocsMax - sizeDist(rnd); - for (int j = 0; j < numAllocs; j += 64) { - const int x = sizeDist(rnd); - const int sz = sizes[x]; - ptrsz[j] = sz; - ptrs[j] = (uint8_t*)je_malloc(sz); - if (!ptrs[j]) { - printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); - exit(1); - } - for (int k = 0; k < sz; k++) - ptrs[j][k] = tid + k; - } - for (int j = 0; j < numAllocs; j += 64) { - for (int k = 0, sz = ptrsz[j]; k < sz; k++) - if (ptrs[j][k] != (uint8_t)(tid + k)) { - printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); - exit(1); - } - je_free(ptrs[j]); - } - } - }); - t.join(); - } - }); - } - for (thread& t : workers) { - t.join(); - } + getchar(); + printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2); + for (int i = 0; i < numThreads; i++) { + workers.emplace_back([tid=i]() { + uniform_int_distribution sizeDist(0, numSizes - 1); + minstd_rand rnd(tid * 17); + uint8_t* ptrs[numAllocsMax]; + int ptrsz[numAllocsMax]; + for (int i = 0; i < numIter1; ++i) { + thread t([&]() { + for (int i = 0; i < numIter2; ++i) { + const int numAllocs = numAllocsMax - sizeDist(rnd); + for (int j = 0; j < numAllocs; j += 64) { + const int x = sizeDist(rnd); + const int sz = sizes[x]; + ptrsz[j] = sz; + ptrs[j] = (uint8_t*)je_malloc(sz); + if (!ptrs[j]) { + printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x); + exit(1); + } + for (int k = 0; k < sz; k++) + ptrs[j][k] = tid + k; + } + for (int j = 0; j < numAllocs; j += 64) { + for (int k = 0, sz = ptrsz[j]; k < sz; k++) + if (ptrs[j][k] != (uint8_t)(tid + k)) { + printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k)); + exit(1); + } + je_free(ptrs[j]); + } + } + }); + t.join(); + } + }); + } + for (thread& t : workers) { + t.join(); + } je_malloc_stats_print(NULL, NULL, NULL); size_t allocated2; je_mallctl("stats.active", &allocated2, &sz1, NULL, 0); size_t leaked = allocated2 - allocated1; - printf("\nDone. Leaked: %Id bytes\n", leaked); + printf("\nDone. Leaked: %zd bytes\n", leaked); bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet) printf("\nTest %s!\n", (failed ? "FAILED" : "successful")); printf("\nPress Enter to continue...\n"); From 3a342616ffc4992e19fdb57df6d6b85a952718be Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 28 Feb 2016 14:52:17 -0800 Subject: [PATCH 96/96] Update ChangeLog for 4.1.0. --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index e35d74cc..9cbfbf96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,7 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc -* 4.1.0 (XXX) +* 4.1.0 (February 28, 2016) This release is primarily about optimizations, but it also incorporates a lot of portability-motivated refactoring and enhancements. Many people worked on